1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s
3 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
4 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s
5 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10-SCRATCH %s
7 declare hidden amdgpu_gfx void @external_void_func_i1(i1) #0
8 declare hidden amdgpu_gfx void @external_void_func_i1_signext(i1 signext) #0
9 declare hidden amdgpu_gfx void @external_void_func_i1_zeroext(i1 zeroext) #0
11 declare hidden amdgpu_gfx void @external_void_func_i8(i8) #0
12 declare hidden amdgpu_gfx void @external_void_func_i8_signext(i8 signext) #0
13 declare hidden amdgpu_gfx void @external_void_func_i8_zeroext(i8 zeroext) #0
14 declare hidden amdgpu_gfx void @external_void_func_v2i8(<2 x i8>) #0
15 declare hidden amdgpu_gfx void @external_void_func_v3i8(<3 x i8>) #0
16 declare hidden amdgpu_gfx void @external_void_func_v4i8(<4 x i8>) #0
17 declare hidden amdgpu_gfx void @external_void_func_v5i8(<5 x i8>) #0
18 declare hidden amdgpu_gfx void @external_void_func_v8i8(<8 x i8>) #0
19 declare hidden amdgpu_gfx void @external_void_func_v16i8(<8 x i8>) #0
20 declare hidden amdgpu_gfx void @external_void_func_v32i8(<32 x i8>) #0
22 declare hidden amdgpu_gfx i8 @external_void_func_i8_ret(i8) #0
23 declare hidden amdgpu_gfx <2 x i8> @external_void_func_v2i8_ret(<2 x i8>) #0
24 declare hidden amdgpu_gfx <3 x i8> @external_void_func_v3i8_ret(<3 x i8>) #0
25 declare hidden amdgpu_gfx <4 x i8> @external_void_func_v4i8_ret(<4 x i8>) #0
26 declare hidden amdgpu_gfx <5 x i8> @external_void_func_v5i8_ret(<5 x i8>) #0
27 declare hidden amdgpu_gfx <8 x i8> @external_void_func_v8i8_ret(<8 x i8>) #0
28 declare hidden amdgpu_gfx <32 x i8> @external_void_func_v32i8_ret(<32 x i8>) #0
30 declare hidden amdgpu_gfx void @external_void_func_i16(i16) #0
31 declare hidden amdgpu_gfx void @external_void_func_i16_signext(i16 signext) #0
32 declare hidden amdgpu_gfx void @external_void_func_i16_zeroext(i16 zeroext) #0
34 declare hidden amdgpu_gfx void @external_void_func_i32(i32) #0
35 declare hidden amdgpu_gfx void @external_void_func_i64(i64) #0
36 declare hidden amdgpu_gfx void @external_void_func_v2i64(<2 x i64>) #0
37 declare hidden amdgpu_gfx void @external_void_func_v3i64(<3 x i64>) #0
38 declare hidden amdgpu_gfx void @external_void_func_v4i64(<4 x i64>) #0
40 declare hidden amdgpu_gfx void @external_void_func_f16(half) #0
41 declare hidden amdgpu_gfx void @external_void_func_f32(float) #0
42 declare hidden amdgpu_gfx void @external_void_func_f64(double) #0
43 declare hidden amdgpu_gfx void @external_void_func_v2f32(<2 x float>) #0
44 declare hidden amdgpu_gfx void @external_void_func_v2f64(<2 x double>) #0
45 declare hidden amdgpu_gfx void @external_void_func_v3f32(<3 x float>) #0
46 declare hidden amdgpu_gfx void @external_void_func_v3f64(<3 x double>) #0
47 declare hidden amdgpu_gfx void @external_void_func_v5f32(<5 x float>) #0
49 declare hidden amdgpu_gfx void @external_void_func_v2i16(<2 x i16>) #0
50 declare hidden amdgpu_gfx void @external_void_func_v2f16(<2 x half>) #0
51 declare hidden amdgpu_gfx void @external_void_func_v3i16(<3 x i16>) #0
52 declare hidden amdgpu_gfx void @external_void_func_v3f16(<3 x half>) #0
53 declare hidden amdgpu_gfx void @external_void_func_v4i16(<4 x i16>) #0
54 declare hidden amdgpu_gfx void @external_void_func_v4f16(<4 x half>) #0
56 declare hidden amdgpu_gfx void @external_void_func_v2i32(<2 x i32>) #0
57 declare hidden amdgpu_gfx void @external_void_func_v3i32(<3 x i32>) #0
58 declare hidden amdgpu_gfx void @external_void_func_v3i32_i32(<3 x i32>, i32) #0
59 declare hidden amdgpu_gfx void @external_void_func_v4i32(<4 x i32>) #0
60 declare hidden amdgpu_gfx void @external_void_func_v5i32(<5 x i32>) #0
61 declare hidden amdgpu_gfx void @external_void_func_v8i32(<8 x i32>) #0
62 declare hidden amdgpu_gfx void @external_void_func_v16i32(<16 x i32>) #0
63 declare hidden amdgpu_gfx void @external_void_func_v32i32(<32 x i32>) #0
64 declare hidden amdgpu_gfx void @external_void_func_v32i32_i32(<32 x i32>, i32) #0
66 declare hidden amdgpu_gfx void @external_void_func_i1_inreg(i1 inreg) #0
67 declare hidden amdgpu_gfx void @external_void_func_i8_inreg(i8 inreg) #0
68 declare hidden amdgpu_gfx void @external_void_func_i16_inreg(i16 inreg) #0
69 declare hidden amdgpu_gfx void @external_void_func_i32_inreg(i32 inreg) #0
70 declare hidden amdgpu_gfx void @external_void_func_i64_inreg(i64 inreg) #0
71 declare hidden amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg) #0
72 declare hidden amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg) #0
73 declare hidden amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> inreg) #0
75 declare hidden amdgpu_gfx void @external_void_func_f16_inreg(half inreg) #0
76 declare hidden amdgpu_gfx void @external_void_func_f32_inreg(float inreg) #0
77 declare hidden amdgpu_gfx void @external_void_func_f64_inreg(double inreg) #0
78 declare hidden amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> inreg) #0
79 declare hidden amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> inreg) #0
80 declare hidden amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> inreg) #0
81 declare hidden amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> inreg) #0
82 declare hidden amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> inreg) #0
84 declare hidden amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> inreg) #0
85 declare hidden amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> inreg) #0
86 declare hidden amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg) #0
87 declare hidden amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg) #0
88 declare hidden amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg) #0
89 declare hidden amdgpu_gfx void @external_void_func_v4f16_inreg(<4 x half> inreg) #0
91 declare hidden amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg) #0
92 declare hidden amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> inreg) #0
93 declare hidden amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> inreg, i32 inreg) #0
94 declare hidden amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg) #0
95 declare hidden amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> inreg) #0
96 declare hidden amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg) #0
97 declare hidden amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> inreg) #0
98 declare hidden amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> inreg) #0
99 declare hidden amdgpu_gfx void @external_void_func_v32i32_i32_inreg(<32 x i32> inreg, i32 inreg) #0
101 ; return value and argument
102 declare hidden amdgpu_gfx i32 @external_i32_func_i32(i32) #0
105 declare hidden amdgpu_gfx void @external_void_func_struct_i8_i32({ i8, i32 }) #0
106 declare hidden amdgpu_gfx void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 })) #0
107 declare hidden amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0
109 define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 {
110 ; GFX9-LABEL: test_call_external_void_func_i1_imm:
112 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113 ; GFX9-NEXT: s_mov_b32 s34, s33
114 ; GFX9-NEXT: s_mov_b32 s33, s32
115 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
116 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
117 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
118 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
119 ; GFX9-NEXT: s_addk_i32 s32, 0x400
120 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
121 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
122 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
123 ; GFX9-NEXT: s_getpc_b64 s[34:35]
124 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1@rel32@lo+4
125 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1@rel32@hi+12
126 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32
127 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
128 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
129 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
130 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
131 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
132 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
133 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
134 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
135 ; GFX9-NEXT: s_mov_b32 s33, s34
136 ; GFX9-NEXT: s_waitcnt vmcnt(0)
137 ; GFX9-NEXT: s_setpc_b64 s[30:31]
139 ; GFX10-LABEL: test_call_external_void_func_i1_imm:
141 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142 ; GFX10-NEXT: s_mov_b32 s34, s33
143 ; GFX10-NEXT: s_mov_b32 s33, s32
144 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
145 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
146 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
147 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
148 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
149 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
150 ; GFX10-NEXT: s_addk_i32 s32, 0x200
151 ; GFX10-NEXT: s_getpc_b64 s[34:35]
152 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1@rel32@lo+4
153 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1@rel32@hi+12
154 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
155 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32
156 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
157 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
158 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
159 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
160 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
161 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
162 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
163 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
164 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
165 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
166 ; GFX10-NEXT: s_mov_b32 s33, s34
167 ; GFX10-NEXT: s_waitcnt vmcnt(0)
168 ; GFX10-NEXT: s_setpc_b64 s[30:31]
170 ; GFX11-LABEL: test_call_external_void_func_i1_imm:
172 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173 ; GFX11-NEXT: s_mov_b32 s0, s33
174 ; GFX11-NEXT: s_mov_b32 s33, s32
175 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
176 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
177 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
178 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
179 ; GFX11-NEXT: v_mov_b32_e32 v0, 1
180 ; GFX11-NEXT: s_add_i32 s32, s32, 16
181 ; GFX11-NEXT: s_getpc_b64 s[0:1]
182 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i1@rel32@lo+4
183 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i1@rel32@hi+12
184 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
185 ; GFX11-NEXT: scratch_store_b8 off, v0, s32
186 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
187 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
188 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
189 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
190 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
191 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
192 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
193 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
194 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
195 ; GFX11-NEXT: s_add_i32 s32, s32, -16
196 ; GFX11-NEXT: s_mov_b32 s33, s0
197 ; GFX11-NEXT: s_waitcnt vmcnt(0)
198 ; GFX11-NEXT: s_setpc_b64 s[30:31]
200 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_imm:
201 ; GFX10-SCRATCH: ; %bb.0:
202 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
204 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
205 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
206 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
207 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
208 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
209 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
210 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
211 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
212 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
213 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1@rel32@lo+4
214 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1@rel32@hi+12
215 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
216 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32
217 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
218 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
219 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
220 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
221 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
222 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
223 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
224 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
225 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
226 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
227 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
228 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
229 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
230 call amdgpu_gfx void @external_void_func_i1(i1 true)
234 define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 {
235 ; GFX9-LABEL: test_call_external_void_func_i1_signext:
237 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238 ; GFX9-NEXT: s_mov_b32 s34, s33
239 ; GFX9-NEXT: s_mov_b32 s33, s32
240 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
241 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
242 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
243 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
244 ; GFX9-NEXT: s_waitcnt vmcnt(0)
245 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
246 ; GFX9-NEXT: s_addk_i32 s32, 0x400
247 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
248 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
249 ; GFX9-NEXT: s_getpc_b64 s[34:35]
250 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_signext@rel32@lo+4
251 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_signext@rel32@hi+12
252 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
253 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32
254 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
255 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
256 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
257 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
258 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
259 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
260 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
261 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
262 ; GFX9-NEXT: s_mov_b32 s33, s34
263 ; GFX9-NEXT: s_waitcnt vmcnt(0)
264 ; GFX9-NEXT: s_setpc_b64 s[30:31]
266 ; GFX10-LABEL: test_call_external_void_func_i1_signext:
268 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
269 ; GFX10-NEXT: s_mov_b32 s34, s33
270 ; GFX10-NEXT: s_mov_b32 s33, s32
271 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
272 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
273 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
274 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
275 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
276 ; GFX10-NEXT: s_waitcnt vmcnt(0)
277 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
278 ; GFX10-NEXT: s_addk_i32 s32, 0x200
279 ; GFX10-NEXT: s_getpc_b64 s[34:35]
280 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_signext@rel32@lo+4
281 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_signext@rel32@hi+12
282 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
283 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
284 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
285 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32
286 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
287 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
288 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
289 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
290 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
291 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
292 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
293 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
294 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
295 ; GFX10-NEXT: s_mov_b32 s33, s34
296 ; GFX10-NEXT: s_waitcnt vmcnt(0)
297 ; GFX10-NEXT: s_setpc_b64 s[30:31]
299 ; GFX11-LABEL: test_call_external_void_func_i1_signext:
301 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
302 ; GFX11-NEXT: s_mov_b32 s0, s33
303 ; GFX11-NEXT: s_mov_b32 s33, s32
304 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
305 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
306 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
307 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc
308 ; GFX11-NEXT: s_waitcnt vmcnt(0)
309 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
310 ; GFX11-NEXT: s_add_i32 s32, s32, 16
311 ; GFX11-NEXT: s_getpc_b64 s[0:1]
312 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i1_signext@rel32@lo+4
313 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i1_signext@rel32@hi+12
314 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
315 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
316 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
317 ; GFX11-NEXT: scratch_store_b8 off, v0, s32
318 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
319 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
320 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
321 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
322 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
323 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
324 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
325 ; GFX11-NEXT: s_add_i32 s32, s32, -16
326 ; GFX11-NEXT: s_mov_b32 s33, s0
327 ; GFX11-NEXT: s_waitcnt vmcnt(0)
328 ; GFX11-NEXT: s_setpc_b64 s[30:31]
330 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_signext:
331 ; GFX10-SCRATCH: ; %bb.0:
332 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
333 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
334 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
335 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
336 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
337 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
338 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
339 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
340 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
341 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
342 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
343 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
344 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1_signext@rel32@lo+4
345 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1_signext@rel32@hi+12
346 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
347 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
348 ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0
349 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32
350 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
351 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
352 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
353 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
354 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
355 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
356 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
357 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
358 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
359 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
360 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
361 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
362 %var = load volatile i1, ptr addrspace(1) undef
363 call amdgpu_gfx void @external_void_func_i1_signext(i1 signext%var)
367 define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 {
368 ; GFX9-LABEL: test_call_external_void_func_i1_zeroext:
370 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
371 ; GFX9-NEXT: s_mov_b32 s34, s33
372 ; GFX9-NEXT: s_mov_b32 s33, s32
373 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
374 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
375 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
376 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
377 ; GFX9-NEXT: s_waitcnt vmcnt(0)
378 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
379 ; GFX9-NEXT: s_addk_i32 s32, 0x400
380 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
381 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
382 ; GFX9-NEXT: s_getpc_b64 s[34:35]
383 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_zeroext@rel32@lo+4
384 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_zeroext@rel32@hi+12
385 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
386 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32
387 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
388 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
389 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
390 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
391 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
392 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
393 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
394 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
395 ; GFX9-NEXT: s_mov_b32 s33, s34
396 ; GFX9-NEXT: s_waitcnt vmcnt(0)
397 ; GFX9-NEXT: s_setpc_b64 s[30:31]
399 ; GFX10-LABEL: test_call_external_void_func_i1_zeroext:
401 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
402 ; GFX10-NEXT: s_mov_b32 s34, s33
403 ; GFX10-NEXT: s_mov_b32 s33, s32
404 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
405 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
406 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
407 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
408 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
409 ; GFX10-NEXT: s_waitcnt vmcnt(0)
410 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
411 ; GFX10-NEXT: s_addk_i32 s32, 0x200
412 ; GFX10-NEXT: s_getpc_b64 s[34:35]
413 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_zeroext@rel32@lo+4
414 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_zeroext@rel32@hi+12
415 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
416 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
417 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
418 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32
419 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
420 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
421 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
422 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
423 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
424 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
425 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
426 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
427 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
428 ; GFX10-NEXT: s_mov_b32 s33, s34
429 ; GFX10-NEXT: s_waitcnt vmcnt(0)
430 ; GFX10-NEXT: s_setpc_b64 s[30:31]
432 ; GFX11-LABEL: test_call_external_void_func_i1_zeroext:
434 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
435 ; GFX11-NEXT: s_mov_b32 s0, s33
436 ; GFX11-NEXT: s_mov_b32 s33, s32
437 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
438 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
439 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
440 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc
441 ; GFX11-NEXT: s_waitcnt vmcnt(0)
442 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
443 ; GFX11-NEXT: s_add_i32 s32, s32, 16
444 ; GFX11-NEXT: s_getpc_b64 s[0:1]
445 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i1_zeroext@rel32@lo+4
446 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i1_zeroext@rel32@hi+12
447 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
448 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
449 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
450 ; GFX11-NEXT: scratch_store_b8 off, v0, s32
451 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
452 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
453 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
454 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
455 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
456 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
457 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
458 ; GFX11-NEXT: s_add_i32 s32, s32, -16
459 ; GFX11-NEXT: s_mov_b32 s33, s0
460 ; GFX11-NEXT: s_waitcnt vmcnt(0)
461 ; GFX11-NEXT: s_setpc_b64 s[30:31]
463 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_zeroext:
464 ; GFX10-SCRATCH: ; %bb.0:
465 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
466 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
467 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
468 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
469 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
470 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
471 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
472 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
473 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
474 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
475 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
476 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
477 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1_zeroext@rel32@lo+4
478 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1_zeroext@rel32@hi+12
479 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
480 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
481 ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0
482 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32
483 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
484 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
485 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
486 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
487 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
488 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
489 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
490 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
491 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
492 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
493 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
494 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
495 %var = load volatile i1, ptr addrspace(1) undef
496 call amdgpu_gfx void @external_void_func_i1_zeroext(i1 zeroext %var)
500 define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 {
501 ; GFX9-LABEL: test_call_external_void_func_i8_imm:
503 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
504 ; GFX9-NEXT: s_mov_b32 s34, s33
505 ; GFX9-NEXT: s_mov_b32 s33, s32
506 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
507 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
508 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
509 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
510 ; GFX9-NEXT: s_addk_i32 s32, 0x400
511 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
512 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
513 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
514 ; GFX9-NEXT: s_getpc_b64 s[34:35]
515 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8@rel32@lo+4
516 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8@rel32@hi+12
517 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
518 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
519 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
520 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
521 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
522 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
523 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
524 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
525 ; GFX9-NEXT: s_mov_b32 s33, s34
526 ; GFX9-NEXT: s_waitcnt vmcnt(0)
527 ; GFX9-NEXT: s_setpc_b64 s[30:31]
529 ; GFX10-LABEL: test_call_external_void_func_i8_imm:
531 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
532 ; GFX10-NEXT: s_mov_b32 s34, s33
533 ; GFX10-NEXT: s_mov_b32 s33, s32
534 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
535 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
536 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
537 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
538 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
539 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b
540 ; GFX10-NEXT: s_addk_i32 s32, 0x200
541 ; GFX10-NEXT: s_getpc_b64 s[34:35]
542 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8@rel32@lo+4
543 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8@rel32@hi+12
544 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
545 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
546 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
547 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
548 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
549 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
550 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
551 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
552 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
553 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
554 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
555 ; GFX10-NEXT: s_mov_b32 s33, s34
556 ; GFX10-NEXT: s_waitcnt vmcnt(0)
557 ; GFX10-NEXT: s_setpc_b64 s[30:31]
559 ; GFX11-LABEL: test_call_external_void_func_i8_imm:
561 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
562 ; GFX11-NEXT: s_mov_b32 s0, s33
563 ; GFX11-NEXT: s_mov_b32 s33, s32
564 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
565 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
566 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
567 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
568 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b
569 ; GFX11-NEXT: s_add_i32 s32, s32, 16
570 ; GFX11-NEXT: s_getpc_b64 s[0:1]
571 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i8@rel32@lo+4
572 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i8@rel32@hi+12
573 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
574 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
575 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
576 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
577 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
578 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
579 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
580 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
581 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
582 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
583 ; GFX11-NEXT: s_add_i32 s32, s32, -16
584 ; GFX11-NEXT: s_mov_b32 s33, s0
585 ; GFX11-NEXT: s_waitcnt vmcnt(0)
586 ; GFX11-NEXT: s_setpc_b64 s[30:31]
588 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm:
589 ; GFX10-SCRATCH: ; %bb.0:
590 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
591 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
592 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
593 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
594 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
595 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
596 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
597 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
598 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b
599 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
600 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
601 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8@rel32@lo+4
602 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8@rel32@hi+12
603 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
604 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
605 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
606 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
607 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
608 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
609 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
610 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
611 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
612 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
613 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
614 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
615 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
616 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
617 call amdgpu_gfx void @external_void_func_i8(i8 123)
621 define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 {
622 ; GFX9-LABEL: test_call_external_void_func_i8_signext:
624 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
625 ; GFX9-NEXT: s_mov_b32 s34, s33
626 ; GFX9-NEXT: s_mov_b32 s33, s32
627 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
628 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
629 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
630 ; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc
631 ; GFX9-NEXT: s_waitcnt vmcnt(0)
632 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
633 ; GFX9-NEXT: s_addk_i32 s32, 0x400
634 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
635 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
636 ; GFX9-NEXT: s_getpc_b64 s[34:35]
637 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_signext@rel32@lo+4
638 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_signext@rel32@hi+12
639 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
640 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
641 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
642 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
643 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
644 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
645 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
646 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
647 ; GFX9-NEXT: s_mov_b32 s33, s34
648 ; GFX9-NEXT: s_waitcnt vmcnt(0)
649 ; GFX9-NEXT: s_setpc_b64 s[30:31]
651 ; GFX10-LABEL: test_call_external_void_func_i8_signext:
653 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
654 ; GFX10-NEXT: s_mov_b32 s34, s33
655 ; GFX10-NEXT: s_mov_b32 s33, s32
656 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
657 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
658 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
659 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
660 ; GFX10-NEXT: global_load_sbyte v0, v[0:1], off glc dlc
661 ; GFX10-NEXT: s_waitcnt vmcnt(0)
662 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
663 ; GFX10-NEXT: s_addk_i32 s32, 0x200
664 ; GFX10-NEXT: s_getpc_b64 s[34:35]
665 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_signext@rel32@lo+4
666 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_signext@rel32@hi+12
667 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
668 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
669 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
670 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
671 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
672 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
673 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
674 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
675 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
676 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
677 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
678 ; GFX10-NEXT: s_mov_b32 s33, s34
679 ; GFX10-NEXT: s_waitcnt vmcnt(0)
680 ; GFX10-NEXT: s_setpc_b64 s[30:31]
682 ; GFX11-LABEL: test_call_external_void_func_i8_signext:
684 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
685 ; GFX11-NEXT: s_mov_b32 s0, s33
686 ; GFX11-NEXT: s_mov_b32 s33, s32
687 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
688 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
689 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
690 ; GFX11-NEXT: global_load_i8 v0, v[0:1], off glc dlc
691 ; GFX11-NEXT: s_waitcnt vmcnt(0)
692 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
693 ; GFX11-NEXT: s_add_i32 s32, s32, 16
694 ; GFX11-NEXT: s_getpc_b64 s[0:1]
695 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i8_signext@rel32@lo+4
696 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i8_signext@rel32@hi+12
697 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
698 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
699 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
700 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
701 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
702 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
703 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
704 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
705 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
706 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
707 ; GFX11-NEXT: s_add_i32 s32, s32, -16
708 ; GFX11-NEXT: s_mov_b32 s33, s0
709 ; GFX11-NEXT: s_waitcnt vmcnt(0)
710 ; GFX11-NEXT: s_setpc_b64 s[30:31]
712 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_signext:
713 ; GFX10-SCRATCH: ; %bb.0:
714 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
715 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
716 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
717 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
718 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
719 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
720 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
721 ; GFX10-SCRATCH-NEXT: global_load_sbyte v0, v[0:1], off glc dlc
722 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
723 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
724 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
725 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
726 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_signext@rel32@lo+4
727 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_signext@rel32@hi+12
728 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
729 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
730 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
731 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
732 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
733 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
734 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
735 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
736 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
737 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
738 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
739 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
740 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
741 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
742 %var = load volatile i8, ptr addrspace(1) undef
743 call amdgpu_gfx void @external_void_func_i8_signext(i8 signext %var)
747 define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 {
748 ; GFX9-LABEL: test_call_external_void_func_i8_zeroext:
750 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
751 ; GFX9-NEXT: s_mov_b32 s34, s33
752 ; GFX9-NEXT: s_mov_b32 s33, s32
753 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
754 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
755 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
756 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
757 ; GFX9-NEXT: s_waitcnt vmcnt(0)
758 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
759 ; GFX9-NEXT: s_addk_i32 s32, 0x400
760 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
761 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
762 ; GFX9-NEXT: s_getpc_b64 s[34:35]
763 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_zeroext@rel32@lo+4
764 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_zeroext@rel32@hi+12
765 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
766 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
767 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
768 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
769 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
770 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
771 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
772 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
773 ; GFX9-NEXT: s_mov_b32 s33, s34
774 ; GFX9-NEXT: s_waitcnt vmcnt(0)
775 ; GFX9-NEXT: s_setpc_b64 s[30:31]
777 ; GFX10-LABEL: test_call_external_void_func_i8_zeroext:
779 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
780 ; GFX10-NEXT: s_mov_b32 s34, s33
781 ; GFX10-NEXT: s_mov_b32 s33, s32
782 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
783 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
784 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
785 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
786 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
787 ; GFX10-NEXT: s_waitcnt vmcnt(0)
788 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
789 ; GFX10-NEXT: s_addk_i32 s32, 0x200
790 ; GFX10-NEXT: s_getpc_b64 s[34:35]
791 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_zeroext@rel32@lo+4
792 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_zeroext@rel32@hi+12
793 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
794 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
795 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
796 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
797 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
798 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
799 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
800 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
801 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
802 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
803 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
804 ; GFX10-NEXT: s_mov_b32 s33, s34
805 ; GFX10-NEXT: s_waitcnt vmcnt(0)
806 ; GFX10-NEXT: s_setpc_b64 s[30:31]
808 ; GFX11-LABEL: test_call_external_void_func_i8_zeroext:
810 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
811 ; GFX11-NEXT: s_mov_b32 s0, s33
812 ; GFX11-NEXT: s_mov_b32 s33, s32
813 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
814 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
815 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
816 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc
817 ; GFX11-NEXT: s_waitcnt vmcnt(0)
818 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
819 ; GFX11-NEXT: s_add_i32 s32, s32, 16
820 ; GFX11-NEXT: s_getpc_b64 s[0:1]
821 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i8_zeroext@rel32@lo+4
822 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i8_zeroext@rel32@hi+12
823 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
824 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
825 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
826 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
827 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
828 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
829 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
830 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
831 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
832 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
833 ; GFX11-NEXT: s_add_i32 s32, s32, -16
834 ; GFX11-NEXT: s_mov_b32 s33, s0
835 ; GFX11-NEXT: s_waitcnt vmcnt(0)
836 ; GFX11-NEXT: s_setpc_b64 s[30:31]
838 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_zeroext:
839 ; GFX10-SCRATCH: ; %bb.0:
840 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
841 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
842 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
843 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
844 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
845 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
846 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
847 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
848 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
849 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
850 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
851 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
852 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_zeroext@rel32@lo+4
853 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_zeroext@rel32@hi+12
854 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
855 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
856 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
857 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
858 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
859 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
860 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
861 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
862 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
863 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
864 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
865 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
866 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
867 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
868 %var = load volatile i8, ptr addrspace(1) undef
869 call amdgpu_gfx void @external_void_func_i8_zeroext(i8 zeroext %var)
873 define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 {
874 ; GFX9-LABEL: test_call_external_void_func_i16_imm:
876 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
877 ; GFX9-NEXT: s_mov_b32 s34, s33
878 ; GFX9-NEXT: s_mov_b32 s33, s32
879 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
880 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
881 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
882 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
883 ; GFX9-NEXT: s_addk_i32 s32, 0x400
884 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
885 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
886 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
887 ; GFX9-NEXT: s_getpc_b64 s[34:35]
888 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16@rel32@lo+4
889 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16@rel32@hi+12
890 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
891 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
892 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
893 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
894 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
895 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
896 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
897 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
898 ; GFX9-NEXT: s_mov_b32 s33, s34
899 ; GFX9-NEXT: s_waitcnt vmcnt(0)
900 ; GFX9-NEXT: s_setpc_b64 s[30:31]
902 ; GFX10-LABEL: test_call_external_void_func_i16_imm:
904 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
905 ; GFX10-NEXT: s_mov_b32 s34, s33
906 ; GFX10-NEXT: s_mov_b32 s33, s32
907 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
908 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
909 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
910 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
911 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
912 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b
913 ; GFX10-NEXT: s_addk_i32 s32, 0x200
914 ; GFX10-NEXT: s_getpc_b64 s[34:35]
915 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16@rel32@lo+4
916 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16@rel32@hi+12
917 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
918 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
919 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
920 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
921 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
922 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
923 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
924 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
925 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
926 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
927 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
928 ; GFX10-NEXT: s_mov_b32 s33, s34
929 ; GFX10-NEXT: s_waitcnt vmcnt(0)
930 ; GFX10-NEXT: s_setpc_b64 s[30:31]
932 ; GFX11-LABEL: test_call_external_void_func_i16_imm:
934 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
935 ; GFX11-NEXT: s_mov_b32 s0, s33
936 ; GFX11-NEXT: s_mov_b32 s33, s32
937 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
938 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
939 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
940 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
941 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b
942 ; GFX11-NEXT: s_add_i32 s32, s32, 16
943 ; GFX11-NEXT: s_getpc_b64 s[0:1]
944 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i16@rel32@lo+4
945 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i16@rel32@hi+12
946 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
947 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
948 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
949 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
950 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
951 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
952 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
953 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
954 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
955 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
956 ; GFX11-NEXT: s_add_i32 s32, s32, -16
957 ; GFX11-NEXT: s_mov_b32 s33, s0
958 ; GFX11-NEXT: s_waitcnt vmcnt(0)
959 ; GFX11-NEXT: s_setpc_b64 s[30:31]
961 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm:
962 ; GFX10-SCRATCH: ; %bb.0:
963 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
964 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
965 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
966 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
967 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
968 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
969 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
970 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
971 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b
972 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
973 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
974 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16@rel32@lo+4
975 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16@rel32@hi+12
976 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
977 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
978 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
979 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
980 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
981 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
982 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
983 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
984 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
985 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
986 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
987 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
988 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
989 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
990 call amdgpu_gfx void @external_void_func_i16(i16 123)
994 define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 {
995 ; GFX9-LABEL: test_call_external_void_func_i16_signext:
997 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
998 ; GFX9-NEXT: s_mov_b32 s34, s33
999 ; GFX9-NEXT: s_mov_b32 s33, s32
1000 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1001 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1002 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1003 ; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc
1004 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1005 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
1006 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1007 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1008 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1009 ; GFX9-NEXT: s_getpc_b64 s[34:35]
1010 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_signext@rel32@lo+4
1011 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_signext@rel32@hi+12
1012 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
1013 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
1014 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
1015 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
1016 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1017 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1018 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1019 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1020 ; GFX9-NEXT: s_mov_b32 s33, s34
1021 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1022 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1024 ; GFX10-LABEL: test_call_external_void_func_i16_signext:
1026 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1027 ; GFX10-NEXT: s_mov_b32 s34, s33
1028 ; GFX10-NEXT: s_mov_b32 s33, s32
1029 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1030 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1031 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1032 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1033 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc
1034 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1035 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
1036 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1037 ; GFX10-NEXT: s_getpc_b64 s[34:35]
1038 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_signext@rel32@lo+4
1039 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_signext@rel32@hi+12
1040 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1041 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1042 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1043 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
1044 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
1045 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
1046 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1047 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1048 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1049 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1050 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1051 ; GFX10-NEXT: s_mov_b32 s33, s34
1052 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1053 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1055 ; GFX11-LABEL: test_call_external_void_func_i16_signext:
1057 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1058 ; GFX11-NEXT: s_mov_b32 s0, s33
1059 ; GFX11-NEXT: s_mov_b32 s33, s32
1060 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1061 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1062 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1063 ; GFX11-NEXT: global_load_u16 v0, v[0:1], off glc dlc
1064 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1065 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
1066 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1067 ; GFX11-NEXT: s_getpc_b64 s[0:1]
1068 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i16_signext@rel32@lo+4
1069 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i16_signext@rel32@hi+12
1070 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
1071 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
1072 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1073 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1074 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
1075 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
1076 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
1077 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1078 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1079 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1080 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1081 ; GFX11-NEXT: s_mov_b32 s33, s0
1082 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1083 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1085 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_signext:
1086 ; GFX10-SCRATCH: ; %bb.0:
1087 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1088 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
1089 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1090 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1091 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
1092 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1093 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1094 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off glc dlc
1095 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1096 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
1097 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1098 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
1099 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16_signext@rel32@lo+4
1100 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16_signext@rel32@hi+12
1101 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1102 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1103 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1104 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
1105 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
1106 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
1107 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1108 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
1109 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1110 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1111 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1112 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
1113 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1114 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
1115 %var = load volatile i16, ptr addrspace(1) undef
1116 call amdgpu_gfx void @external_void_func_i16_signext(i16 signext %var)
1120 define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 {
1121 ; GFX9-LABEL: test_call_external_void_func_i16_zeroext:
1123 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1124 ; GFX9-NEXT: s_mov_b32 s34, s33
1125 ; GFX9-NEXT: s_mov_b32 s33, s32
1126 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1127 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1128 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1129 ; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc
1130 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1131 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
1132 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1133 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1134 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1135 ; GFX9-NEXT: s_getpc_b64 s[34:35]
1136 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_zeroext@rel32@lo+4
1137 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_zeroext@rel32@hi+12
1138 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
1139 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
1140 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
1141 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
1142 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1143 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1144 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1145 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1146 ; GFX9-NEXT: s_mov_b32 s33, s34
1147 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1148 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1150 ; GFX10-LABEL: test_call_external_void_func_i16_zeroext:
1152 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1153 ; GFX10-NEXT: s_mov_b32 s34, s33
1154 ; GFX10-NEXT: s_mov_b32 s33, s32
1155 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1156 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1157 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1158 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1159 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc
1160 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1161 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
1162 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1163 ; GFX10-NEXT: s_getpc_b64 s[34:35]
1164 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_zeroext@rel32@lo+4
1165 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_zeroext@rel32@hi+12
1166 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1167 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1168 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1169 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
1170 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
1171 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
1172 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1173 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1174 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1175 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1176 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1177 ; GFX10-NEXT: s_mov_b32 s33, s34
1178 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1179 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1181 ; GFX11-LABEL: test_call_external_void_func_i16_zeroext:
1183 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1184 ; GFX11-NEXT: s_mov_b32 s0, s33
1185 ; GFX11-NEXT: s_mov_b32 s33, s32
1186 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1187 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1188 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1189 ; GFX11-NEXT: global_load_u16 v0, v[0:1], off glc dlc
1190 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1191 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
1192 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1193 ; GFX11-NEXT: s_getpc_b64 s[0:1]
1194 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i16_zeroext@rel32@lo+4
1195 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i16_zeroext@rel32@hi+12
1196 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
1197 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
1198 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1199 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1200 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
1201 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
1202 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
1203 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1204 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1205 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1206 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1207 ; GFX11-NEXT: s_mov_b32 s33, s0
1208 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1209 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1211 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_zeroext:
1212 ; GFX10-SCRATCH: ; %bb.0:
1213 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1214 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
1215 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1216 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1217 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
1218 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1219 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1220 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off glc dlc
1221 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1222 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
1223 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1224 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
1225 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16_zeroext@rel32@lo+4
1226 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16_zeroext@rel32@hi+12
1227 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1228 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1229 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1230 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
1231 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
1232 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
1233 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1234 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
1235 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1236 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1237 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1238 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
1239 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1240 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
1241 %var = load volatile i16, ptr addrspace(1) undef
1242 call amdgpu_gfx void @external_void_func_i16_zeroext(i16 zeroext %var)
1246 define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 {
1247 ; GFX9-LABEL: test_call_external_void_func_i32_imm:
1249 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1250 ; GFX9-NEXT: s_mov_b32 s34, s33
1251 ; GFX9-NEXT: s_mov_b32 s33, s32
1252 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1253 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1254 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1255 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
1256 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1257 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1258 ; GFX9-NEXT: v_mov_b32_e32 v0, 42
1259 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1260 ; GFX9-NEXT: s_getpc_b64 s[34:35]
1261 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i32@rel32@lo+4
1262 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i32@rel32@hi+12
1263 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
1264 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
1265 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
1266 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
1267 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1268 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1269 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1270 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1271 ; GFX9-NEXT: s_mov_b32 s33, s34
1272 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1273 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1275 ; GFX10-LABEL: test_call_external_void_func_i32_imm:
1277 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1278 ; GFX10-NEXT: s_mov_b32 s34, s33
1279 ; GFX10-NEXT: s_mov_b32 s33, s32
1280 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1281 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1282 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1283 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1284 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
1285 ; GFX10-NEXT: v_mov_b32_e32 v0, 42
1286 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1287 ; GFX10-NEXT: s_getpc_b64 s[34:35]
1288 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i32@rel32@lo+4
1289 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i32@rel32@hi+12
1290 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1291 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1292 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1293 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
1294 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
1295 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
1296 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1297 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1298 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1299 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1300 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1301 ; GFX10-NEXT: s_mov_b32 s33, s34
1302 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1303 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1305 ; GFX11-LABEL: test_call_external_void_func_i32_imm:
1307 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1308 ; GFX11-NEXT: s_mov_b32 s0, s33
1309 ; GFX11-NEXT: s_mov_b32 s33, s32
1310 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1311 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1312 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1313 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
1314 ; GFX11-NEXT: v_mov_b32_e32 v0, 42
1315 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1316 ; GFX11-NEXT: s_getpc_b64 s[0:1]
1317 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i32@rel32@lo+4
1318 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i32@rel32@hi+12
1319 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
1320 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
1321 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1322 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1323 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
1324 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
1325 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
1326 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1327 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1328 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1329 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1330 ; GFX11-NEXT: s_mov_b32 s33, s0
1331 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1332 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1334 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i32_imm:
1335 ; GFX10-SCRATCH: ; %bb.0:
1336 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1337 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
1338 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1339 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1340 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
1341 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1342 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1343 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
1344 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 42
1345 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1346 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
1347 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i32@rel32@lo+4
1348 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i32@rel32@hi+12
1349 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1350 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1351 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1352 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
1353 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
1354 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
1355 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1356 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
1357 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1358 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1359 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1360 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
1361 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1362 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
1363 call amdgpu_gfx void @external_void_func_i32(i32 42)
1367 define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 {
1368 ; GFX9-LABEL: test_call_external_void_func_i64_imm:
1370 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1371 ; GFX9-NEXT: s_mov_b32 s34, s33
1372 ; GFX9-NEXT: s_mov_b32 s33, s32
1373 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1374 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1375 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1376 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
1377 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1378 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1379 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
1380 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1381 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1382 ; GFX9-NEXT: s_getpc_b64 s[34:35]
1383 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i64@rel32@lo+4
1384 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i64@rel32@hi+12
1385 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
1386 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
1387 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
1388 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
1389 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1390 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1391 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1392 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1393 ; GFX9-NEXT: s_mov_b32 s33, s34
1394 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1395 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1397 ; GFX10-LABEL: test_call_external_void_func_i64_imm:
1399 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1400 ; GFX10-NEXT: s_mov_b32 s34, s33
1401 ; GFX10-NEXT: s_mov_b32 s33, s32
1402 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1403 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1404 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1405 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1406 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
1407 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b
1408 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
1409 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1410 ; GFX10-NEXT: s_getpc_b64 s[34:35]
1411 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i64@rel32@lo+4
1412 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i64@rel32@hi+12
1413 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1414 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1415 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1416 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
1417 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
1418 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
1419 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1420 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1421 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1422 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1423 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1424 ; GFX10-NEXT: s_mov_b32 s33, s34
1425 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1426 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1428 ; GFX11-LABEL: test_call_external_void_func_i64_imm:
1430 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1431 ; GFX11-NEXT: s_mov_b32 s0, s33
1432 ; GFX11-NEXT: s_mov_b32 s33, s32
1433 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1434 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1435 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1436 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
1437 ; GFX11-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
1438 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1439 ; GFX11-NEXT: s_getpc_b64 s[0:1]
1440 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i64@rel32@lo+4
1441 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i64@rel32@hi+12
1442 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
1443 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
1444 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1445 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1446 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
1447 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
1448 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
1449 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1450 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1451 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1452 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1453 ; GFX11-NEXT: s_mov_b32 s33, s0
1454 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1455 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1457 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i64_imm:
1458 ; GFX10-SCRATCH: ; %bb.0:
1459 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1460 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
1461 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1462 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1463 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
1464 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1465 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1466 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
1467 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b
1468 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
1469 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1470 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
1471 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i64@rel32@lo+4
1472 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i64@rel32@hi+12
1473 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1474 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1475 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1476 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
1477 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
1478 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
1479 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1480 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
1481 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1482 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1483 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1484 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
1485 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1486 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
1487 call amdgpu_gfx void @external_void_func_i64(i64 123)
1491 define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 {
1492 ; GFX9-LABEL: test_call_external_void_func_v2i64:
1494 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1495 ; GFX9-NEXT: s_mov_b32 s34, s33
1496 ; GFX9-NEXT: s_mov_b32 s33, s32
1497 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1498 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1499 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1500 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1501 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1502 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1503 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
1504 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1505 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1506 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1507 ; GFX9-NEXT: s_getpc_b64 s[34:35]
1508 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4
1509 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12
1510 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
1511 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
1512 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
1513 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
1514 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1515 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1516 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1517 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1518 ; GFX9-NEXT: s_mov_b32 s33, s34
1519 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1520 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1522 ; GFX10-LABEL: test_call_external_void_func_v2i64:
1524 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1525 ; GFX10-NEXT: s_mov_b32 s34, s33
1526 ; GFX10-NEXT: s_mov_b32 s33, s32
1527 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1528 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1529 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1530 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1531 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1532 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
1533 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
1534 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1535 ; GFX10-NEXT: s_getpc_b64 s[34:35]
1536 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4
1537 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12
1538 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1539 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1540 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1541 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1542 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
1543 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
1544 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
1545 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1546 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1547 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1548 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1549 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1550 ; GFX10-NEXT: s_mov_b32 s33, s34
1551 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1552 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1554 ; GFX11-LABEL: test_call_external_void_func_v2i64:
1556 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1557 ; GFX11-NEXT: s_mov_b32 s0, s33
1558 ; GFX11-NEXT: s_mov_b32 s33, s32
1559 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1560 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1561 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1562 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1563 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
1564 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
1565 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1566 ; GFX11-NEXT: s_getpc_b64 s[0:1]
1567 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4
1568 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12
1569 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1570 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
1571 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
1572 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1573 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1574 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
1575 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
1576 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
1577 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1578 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1579 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1580 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1581 ; GFX11-NEXT: s_mov_b32 s33, s0
1582 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1583 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1585 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64:
1586 ; GFX10-SCRATCH: ; %bb.0:
1587 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1588 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
1589 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1590 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1591 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
1592 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1593 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1594 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
1595 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
1596 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
1597 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1598 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
1599 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4
1600 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12
1601 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1602 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1603 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1604 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1605 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
1606 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
1607 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
1608 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1609 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
1610 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1611 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1612 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1613 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
1614 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1615 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
1616 %val = load <2 x i64>, ptr addrspace(1) null
1617 call amdgpu_gfx void @external_void_func_v2i64(<2 x i64> %val)
1621 define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 {
1622 ; GFX9-LABEL: test_call_external_void_func_v2i64_imm:
1624 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1625 ; GFX9-NEXT: s_mov_b32 s34, s33
1626 ; GFX9-NEXT: s_mov_b32 s33, s32
1627 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1628 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1629 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1630 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
1631 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1632 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1633 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
1634 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
1635 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
1636 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
1637 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1638 ; GFX9-NEXT: s_getpc_b64 s[34:35]
1639 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4
1640 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12
1641 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
1642 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
1643 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
1644 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
1645 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1646 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1647 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1648 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1649 ; GFX9-NEXT: s_mov_b32 s33, s34
1650 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1651 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1653 ; GFX10-LABEL: test_call_external_void_func_v2i64_imm:
1655 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1656 ; GFX10-NEXT: s_mov_b32 s34, s33
1657 ; GFX10-NEXT: s_mov_b32 s33, s32
1658 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1659 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1660 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1661 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1662 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
1663 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
1664 ; GFX10-NEXT: v_mov_b32_e32 v1, 2
1665 ; GFX10-NEXT: v_mov_b32_e32 v2, 3
1666 ; GFX10-NEXT: v_mov_b32_e32 v3, 4
1667 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1668 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1669 ; GFX10-NEXT: s_getpc_b64 s[34:35]
1670 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4
1671 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12
1672 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1673 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1674 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
1675 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
1676 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
1677 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1678 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1679 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1680 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1681 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1682 ; GFX10-NEXT: s_mov_b32 s33, s34
1683 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1684 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1686 ; GFX11-LABEL: test_call_external_void_func_v2i64_imm:
1688 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1689 ; GFX11-NEXT: s_mov_b32 s0, s33
1690 ; GFX11-NEXT: s_mov_b32 s33, s32
1691 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1692 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1693 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1694 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
1695 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
1696 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
1697 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
1698 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1699 ; GFX11-NEXT: s_getpc_b64 s[0:1]
1700 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4
1701 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12
1702 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
1703 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1704 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1705 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
1706 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
1707 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
1708 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1709 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1710 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1711 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1712 ; GFX11-NEXT: s_mov_b32 s33, s0
1713 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1714 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1716 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_imm:
1717 ; GFX10-SCRATCH: ; %bb.0:
1718 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1719 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
1720 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1721 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1722 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
1723 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1724 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1725 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
1726 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
1727 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2
1728 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3
1729 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4
1730 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1731 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1732 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
1733 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4
1734 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12
1735 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1736 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1737 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
1738 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
1739 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
1740 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1741 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
1742 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1743 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1744 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1745 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
1746 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1747 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
1748 call amdgpu_gfx void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
1752 define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 {
1753 ; GFX9-LABEL: test_call_external_void_func_v3i64:
1755 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1756 ; GFX9-NEXT: s_mov_b32 s34, s33
1757 ; GFX9-NEXT: s_mov_b32 s33, s32
1758 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1759 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1760 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1761 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1762 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1763 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1764 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
1765 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1766 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1767 ; GFX9-NEXT: v_mov_b32_e32 v4, 1
1768 ; GFX9-NEXT: v_mov_b32_e32 v5, 2
1769 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1770 ; GFX9-NEXT: s_getpc_b64 s[34:35]
1771 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i64@rel32@lo+4
1772 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64@rel32@hi+12
1773 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
1774 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
1775 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
1776 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
1777 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1778 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1779 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1780 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1781 ; GFX9-NEXT: s_mov_b32 s33, s34
1782 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1783 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1785 ; GFX10-LABEL: test_call_external_void_func_v3i64:
1787 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1788 ; GFX10-NEXT: s_mov_b32 s34, s33
1789 ; GFX10-NEXT: s_mov_b32 s33, s32
1790 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1791 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1792 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1793 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1794 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1795 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
1796 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
1797 ; GFX10-NEXT: v_mov_b32_e32 v4, 1
1798 ; GFX10-NEXT: v_mov_b32_e32 v5, 2
1799 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1800 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1801 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1802 ; GFX10-NEXT: s_getpc_b64 s[34:35]
1803 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i64@rel32@lo+4
1804 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64@rel32@hi+12
1805 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1806 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1807 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
1808 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
1809 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
1810 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1811 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1812 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1813 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1814 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1815 ; GFX10-NEXT: s_mov_b32 s33, s34
1816 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1817 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1819 ; GFX11-LABEL: test_call_external_void_func_v3i64:
1821 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1822 ; GFX11-NEXT: s_mov_b32 s0, s33
1823 ; GFX11-NEXT: s_mov_b32 s33, s32
1824 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1825 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1826 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1827 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v5, 2
1828 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 1
1829 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
1830 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1831 ; GFX11-NEXT: s_getpc_b64 s[0:1]
1832 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i64@rel32@lo+4
1833 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64@rel32@hi+12
1834 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1835 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
1836 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
1837 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1838 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1839 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
1840 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
1841 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
1842 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1843 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1844 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1845 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1846 ; GFX11-NEXT: s_mov_b32 s33, s0
1847 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1848 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1850 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i64:
1851 ; GFX10-SCRATCH: ; %bb.0:
1852 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1853 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
1854 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1855 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1856 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
1857 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1858 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1859 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
1860 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
1861 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
1862 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1
1863 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 2
1864 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1865 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1866 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1867 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
1868 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i64@rel32@lo+4
1869 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64@rel32@hi+12
1870 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1871 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1872 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
1873 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
1874 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
1875 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1876 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
1877 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1878 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
1879 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1880 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
1881 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1882 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
1883 %load = load <2 x i64>, ptr addrspace(1) null
1884 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
1886 call amdgpu_gfx void @external_void_func_v3i64(<3 x i64> %val)
1890 define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 {
1891 ; GFX9-LABEL: test_call_external_void_func_v4i64:
1893 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1894 ; GFX9-NEXT: s_mov_b32 s34, s33
1895 ; GFX9-NEXT: s_mov_b32 s33, s32
1896 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1897 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1898 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1899 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1900 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1901 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1902 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
1903 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1904 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1905 ; GFX9-NEXT: v_mov_b32_e32 v4, 1
1906 ; GFX9-NEXT: v_mov_b32_e32 v5, 2
1907 ; GFX9-NEXT: v_mov_b32_e32 v6, 3
1908 ; GFX9-NEXT: v_mov_b32_e32 v7, 4
1909 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1910 ; GFX9-NEXT: s_getpc_b64 s[34:35]
1911 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i64@rel32@lo+4
1912 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64@rel32@hi+12
1913 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
1914 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
1915 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
1916 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
1917 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1918 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1919 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1920 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1921 ; GFX9-NEXT: s_mov_b32 s33, s34
1922 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1923 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1925 ; GFX10-LABEL: test_call_external_void_func_v4i64:
1927 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1928 ; GFX10-NEXT: s_mov_b32 s34, s33
1929 ; GFX10-NEXT: s_mov_b32 s33, s32
1930 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1931 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1932 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1933 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1934 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1935 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
1936 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
1937 ; GFX10-NEXT: v_mov_b32_e32 v4, 1
1938 ; GFX10-NEXT: v_mov_b32_e32 v5, 2
1939 ; GFX10-NEXT: v_mov_b32_e32 v6, 3
1940 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1941 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1942 ; GFX10-NEXT: v_mov_b32_e32 v7, 4
1943 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1944 ; GFX10-NEXT: s_getpc_b64 s[34:35]
1945 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i64@rel32@lo+4
1946 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64@rel32@hi+12
1947 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1948 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1949 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
1950 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
1951 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
1952 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1953 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1954 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1955 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1956 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1957 ; GFX10-NEXT: s_mov_b32 s33, s34
1958 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1959 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1961 ; GFX11-LABEL: test_call_external_void_func_v4i64:
1963 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1964 ; GFX11-NEXT: s_mov_b32 s0, s33
1965 ; GFX11-NEXT: s_mov_b32 s33, s32
1966 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1967 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1968 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1969 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v5, 2
1970 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 1
1971 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
1972 ; GFX11-NEXT: v_dual_mov_b32 v6, 3 :: v_dual_mov_b32 v7, 4
1973 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
1974 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1975 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
1976 ; GFX11-NEXT: s_getpc_b64 s[0:1]
1977 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i64@rel32@lo+4
1978 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64@rel32@hi+12
1979 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
1980 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1981 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1982 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
1983 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
1984 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
1985 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1986 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1987 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1988 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1989 ; GFX11-NEXT: s_mov_b32 s33, s0
1990 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1991 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1993 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i64:
1994 ; GFX10-SCRATCH: ; %bb.0:
1995 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1996 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
1997 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1998 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
1999 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
2000 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2001 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2002 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
2003 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
2004 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
2005 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1
2006 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 2
2007 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 3
2008 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
2009 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2010 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 4
2011 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2012 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2013 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i64@rel32@lo+4
2014 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64@rel32@hi+12
2015 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2016 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2017 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
2018 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
2019 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
2020 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2021 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
2022 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2023 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2024 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2025 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
2026 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2027 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
2028 %load = load <2 x i64>, ptr addrspace(1) null
2029 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2030 call amdgpu_gfx void @external_void_func_v4i64(<4 x i64> %val)
2034 define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 {
2035 ; GFX9-LABEL: test_call_external_void_func_f16_imm:
2037 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2038 ; GFX9-NEXT: s_mov_b32 s34, s33
2039 ; GFX9-NEXT: s_mov_b32 s33, s32
2040 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2041 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2042 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2043 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
2044 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2045 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2046 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400
2047 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2048 ; GFX9-NEXT: s_getpc_b64 s[34:35]
2049 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f16@rel32@lo+4
2050 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f16@rel32@hi+12
2051 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
2052 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
2053 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
2054 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
2055 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2056 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2057 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2058 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2059 ; GFX9-NEXT: s_mov_b32 s33, s34
2060 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2061 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2063 ; GFX10-LABEL: test_call_external_void_func_f16_imm:
2065 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2066 ; GFX10-NEXT: s_mov_b32 s34, s33
2067 ; GFX10-NEXT: s_mov_b32 s33, s32
2068 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2069 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2070 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2071 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2072 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
2073 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400
2074 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2075 ; GFX10-NEXT: s_getpc_b64 s[34:35]
2076 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f16@rel32@lo+4
2077 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f16@rel32@hi+12
2078 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2079 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2080 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
2081 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
2082 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
2083 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
2084 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2085 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2086 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2087 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2088 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2089 ; GFX10-NEXT: s_mov_b32 s33, s34
2090 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2091 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2093 ; GFX11-LABEL: test_call_external_void_func_f16_imm:
2095 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2096 ; GFX11-NEXT: s_mov_b32 s0, s33
2097 ; GFX11-NEXT: s_mov_b32 s33, s32
2098 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2099 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
2100 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2101 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
2102 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x4400
2103 ; GFX11-NEXT: s_add_i32 s32, s32, 16
2104 ; GFX11-NEXT: s_getpc_b64 s[0:1]
2105 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_f16@rel32@lo+4
2106 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_f16@rel32@hi+12
2107 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
2108 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
2109 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
2110 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2111 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
2112 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
2113 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
2114 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2115 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
2116 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2117 ; GFX11-NEXT: s_add_i32 s32, s32, -16
2118 ; GFX11-NEXT: s_mov_b32 s33, s0
2119 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2120 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2122 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm:
2123 ; GFX10-SCRATCH: ; %bb.0:
2124 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2125 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
2126 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2127 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2128 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
2129 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2130 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2131 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
2132 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x4400
2133 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2134 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2135 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f16@rel32@lo+4
2136 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f16@rel32@hi+12
2137 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2138 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2139 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2140 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
2141 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
2142 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
2143 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2144 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
2145 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2146 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2147 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2148 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
2149 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2150 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
2151 call amdgpu_gfx void @external_void_func_f16(half 4.0)
2155 define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 {
2156 ; GFX9-LABEL: test_call_external_void_func_f32_imm:
2158 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2159 ; GFX9-NEXT: s_mov_b32 s34, s33
2160 ; GFX9-NEXT: s_mov_b32 s33, s32
2161 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2162 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2163 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2164 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
2165 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2166 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2167 ; GFX9-NEXT: v_mov_b32_e32 v0, 4.0
2168 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2169 ; GFX9-NEXT: s_getpc_b64 s[34:35]
2170 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f32@rel32@lo+4
2171 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f32@rel32@hi+12
2172 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
2173 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
2174 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
2175 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
2176 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2177 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2178 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2179 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2180 ; GFX9-NEXT: s_mov_b32 s33, s34
2181 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2182 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2184 ; GFX10-LABEL: test_call_external_void_func_f32_imm:
2186 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2187 ; GFX10-NEXT: s_mov_b32 s34, s33
2188 ; GFX10-NEXT: s_mov_b32 s33, s32
2189 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2190 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2191 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2192 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2193 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
2194 ; GFX10-NEXT: v_mov_b32_e32 v0, 4.0
2195 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2196 ; GFX10-NEXT: s_getpc_b64 s[34:35]
2197 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f32@rel32@lo+4
2198 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f32@rel32@hi+12
2199 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2201 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
2202 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
2203 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
2204 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
2205 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2206 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2207 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2208 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2209 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2210 ; GFX10-NEXT: s_mov_b32 s33, s34
2211 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2212 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2214 ; GFX11-LABEL: test_call_external_void_func_f32_imm:
2216 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2217 ; GFX11-NEXT: s_mov_b32 s0, s33
2218 ; GFX11-NEXT: s_mov_b32 s33, s32
2219 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2220 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
2221 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2222 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
2223 ; GFX11-NEXT: v_mov_b32_e32 v0, 4.0
2224 ; GFX11-NEXT: s_add_i32 s32, s32, 16
2225 ; GFX11-NEXT: s_getpc_b64 s[0:1]
2226 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_f32@rel32@lo+4
2227 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_f32@rel32@hi+12
2228 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
2229 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
2230 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
2231 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2232 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
2233 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
2234 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
2235 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2236 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
2237 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2238 ; GFX11-NEXT: s_add_i32 s32, s32, -16
2239 ; GFX11-NEXT: s_mov_b32 s33, s0
2240 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2241 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2243 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f32_imm:
2244 ; GFX10-SCRATCH: ; %bb.0:
2245 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2246 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
2247 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2248 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2249 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
2250 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2251 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2252 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
2253 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 4.0
2254 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2255 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2256 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f32@rel32@lo+4
2257 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f32@rel32@hi+12
2258 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2259 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2260 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2261 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
2262 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
2263 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
2264 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2265 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
2266 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2267 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2268 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2269 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
2270 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2271 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
2272 call amdgpu_gfx void @external_void_func_f32(float 4.0)
2276 define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 {
2277 ; GFX9-LABEL: test_call_external_void_func_v2f32_imm:
2279 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2280 ; GFX9-NEXT: s_mov_b32 s34, s33
2281 ; GFX9-NEXT: s_mov_b32 s33, s32
2282 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2283 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2284 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2285 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
2286 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2287 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2288 ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
2289 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
2290 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2291 ; GFX9-NEXT: s_getpc_b64 s[34:35]
2292 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f32@rel32@lo+4
2293 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32@rel32@hi+12
2294 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
2295 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
2296 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
2297 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
2298 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2299 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2300 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2301 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2302 ; GFX9-NEXT: s_mov_b32 s33, s34
2303 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2304 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2306 ; GFX10-LABEL: test_call_external_void_func_v2f32_imm:
2308 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2309 ; GFX10-NEXT: s_mov_b32 s34, s33
2310 ; GFX10-NEXT: s_mov_b32 s33, s32
2311 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2312 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2313 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2314 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2315 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
2316 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0
2317 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
2318 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2319 ; GFX10-NEXT: s_getpc_b64 s[34:35]
2320 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f32@rel32@lo+4
2321 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32@rel32@hi+12
2322 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2323 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2324 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
2325 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
2326 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
2327 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
2328 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2329 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2330 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2331 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2332 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2333 ; GFX10-NEXT: s_mov_b32 s33, s34
2334 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2335 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2337 ; GFX11-LABEL: test_call_external_void_func_v2f32_imm:
2339 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2340 ; GFX11-NEXT: s_mov_b32 s0, s33
2341 ; GFX11-NEXT: s_mov_b32 s33, s32
2342 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2343 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
2344 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2345 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
2346 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0
2347 ; GFX11-NEXT: s_add_i32 s32, s32, 16
2348 ; GFX11-NEXT: s_getpc_b64 s[0:1]
2349 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2f32@rel32@lo+4
2350 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32@rel32@hi+12
2351 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
2352 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
2353 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
2354 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2355 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
2356 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
2357 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
2358 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2359 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
2360 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2361 ; GFX11-NEXT: s_add_i32 s32, s32, -16
2362 ; GFX11-NEXT: s_mov_b32 s33, s0
2363 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2364 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2366 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f32_imm:
2367 ; GFX10-SCRATCH: ; %bb.0:
2368 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2369 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
2370 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2371 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2372 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
2373 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2374 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2375 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
2376 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0
2377 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0
2378 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2379 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2380 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f32@rel32@lo+4
2381 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32@rel32@hi+12
2382 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2383 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2384 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2385 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
2386 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
2387 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
2388 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2389 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
2390 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2391 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2392 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2393 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
2394 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2395 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
2396 call amdgpu_gfx void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>)
2400 define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 {
2401 ; GFX9-LABEL: test_call_external_void_func_v3f32_imm:
2403 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2404 ; GFX9-NEXT: s_mov_b32 s34, s33
2405 ; GFX9-NEXT: s_mov_b32 s33, s32
2406 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2407 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2408 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2409 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
2410 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2411 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2412 ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
2413 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
2414 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0
2415 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2416 ; GFX9-NEXT: s_getpc_b64 s[34:35]
2417 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f32@rel32@lo+4
2418 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32@rel32@hi+12
2419 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
2420 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
2421 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
2422 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
2423 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2424 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2425 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2426 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2427 ; GFX9-NEXT: s_mov_b32 s33, s34
2428 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2429 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2431 ; GFX10-LABEL: test_call_external_void_func_v3f32_imm:
2433 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2434 ; GFX10-NEXT: s_mov_b32 s34, s33
2435 ; GFX10-NEXT: s_mov_b32 s33, s32
2436 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2437 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2438 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2439 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2440 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
2441 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0
2442 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
2443 ; GFX10-NEXT: v_mov_b32_e32 v2, 4.0
2444 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2445 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2446 ; GFX10-NEXT: s_getpc_b64 s[34:35]
2447 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f32@rel32@lo+4
2448 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32@rel32@hi+12
2449 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2450 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
2451 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
2452 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
2453 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
2454 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2455 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2456 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2457 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2458 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2459 ; GFX10-NEXT: s_mov_b32 s33, s34
2460 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2461 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2463 ; GFX11-LABEL: test_call_external_void_func_v3f32_imm:
2465 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2466 ; GFX11-NEXT: s_mov_b32 s0, s33
2467 ; GFX11-NEXT: s_mov_b32 s33, s32
2468 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2469 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
2470 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2471 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
2472 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0
2473 ; GFX11-NEXT: v_mov_b32_e32 v2, 4.0
2474 ; GFX11-NEXT: s_add_i32 s32, s32, 16
2475 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
2476 ; GFX11-NEXT: s_getpc_b64 s[0:1]
2477 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f32@rel32@lo+4
2478 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32@rel32@hi+12
2479 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
2480 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
2481 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2482 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
2483 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
2484 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
2485 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2486 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
2487 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2488 ; GFX11-NEXT: s_add_i32 s32, s32, -16
2489 ; GFX11-NEXT: s_mov_b32 s33, s0
2490 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2491 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2493 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f32_imm:
2494 ; GFX10-SCRATCH: ; %bb.0:
2495 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2496 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
2497 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2498 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2499 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
2500 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2501 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2502 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
2503 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0
2504 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0
2505 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 4.0
2506 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2507 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2508 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2509 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f32@rel32@lo+4
2510 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32@rel32@hi+12
2511 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2512 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2513 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
2514 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
2515 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
2516 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2517 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
2518 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2519 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2520 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2521 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
2522 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2523 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
2524 call amdgpu_gfx void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>)
2528 define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 {
2529 ; GFX9-LABEL: test_call_external_void_func_v5f32_imm:
2531 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2532 ; GFX9-NEXT: s_mov_b32 s34, s33
2533 ; GFX9-NEXT: s_mov_b32 s33, s32
2534 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2535 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2536 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2537 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
2538 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2539 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2540 ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
2541 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
2542 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0
2543 ; GFX9-NEXT: v_mov_b32_e32 v3, -1.0
2544 ; GFX9-NEXT: v_mov_b32_e32 v4, 0.5
2545 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2546 ; GFX9-NEXT: s_getpc_b64 s[34:35]
2547 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5f32@rel32@lo+4
2548 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32@rel32@hi+12
2549 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
2550 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
2551 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
2552 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
2553 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2554 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2555 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2556 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2557 ; GFX9-NEXT: s_mov_b32 s33, s34
2558 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2559 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2561 ; GFX10-LABEL: test_call_external_void_func_v5f32_imm:
2563 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2564 ; GFX10-NEXT: s_mov_b32 s34, s33
2565 ; GFX10-NEXT: s_mov_b32 s33, s32
2566 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2567 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2568 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2569 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2570 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
2571 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0
2572 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
2573 ; GFX10-NEXT: v_mov_b32_e32 v2, 4.0
2574 ; GFX10-NEXT: v_mov_b32_e32 v3, -1.0
2575 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2576 ; GFX10-NEXT: v_mov_b32_e32 v4, 0.5
2577 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2578 ; GFX10-NEXT: s_getpc_b64 s[34:35]
2579 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5f32@rel32@lo+4
2580 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32@rel32@hi+12
2581 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2582 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
2583 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
2584 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
2585 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
2586 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2587 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2588 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2589 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2590 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2591 ; GFX10-NEXT: s_mov_b32 s33, s34
2592 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2593 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2595 ; GFX11-LABEL: test_call_external_void_func_v5f32_imm:
2597 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2598 ; GFX11-NEXT: s_mov_b32 s0, s33
2599 ; GFX11-NEXT: s_mov_b32 s33, s32
2600 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2601 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
2602 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2603 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
2604 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0
2605 ; GFX11-NEXT: v_dual_mov_b32 v2, 4.0 :: v_dual_mov_b32 v3, -1.0
2606 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
2607 ; GFX11-NEXT: v_mov_b32_e32 v4, 0.5
2608 ; GFX11-NEXT: s_add_i32 s32, s32, 16
2609 ; GFX11-NEXT: s_getpc_b64 s[0:1]
2610 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v5f32@rel32@lo+4
2611 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32@rel32@hi+12
2612 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
2613 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
2614 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2615 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
2616 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
2617 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
2618 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2619 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
2620 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2621 ; GFX11-NEXT: s_add_i32 s32, s32, -16
2622 ; GFX11-NEXT: s_mov_b32 s33, s0
2623 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2624 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2626 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5f32_imm:
2627 ; GFX10-SCRATCH: ; %bb.0:
2628 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2629 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
2630 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2631 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2632 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
2633 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2634 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2635 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
2636 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0
2637 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0
2638 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 4.0
2639 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, -1.0
2640 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2641 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0.5
2642 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2643 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2644 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5f32@rel32@lo+4
2645 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32@rel32@hi+12
2646 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2647 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2648 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
2649 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
2650 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
2651 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2652 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
2653 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2654 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2655 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2656 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
2657 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2658 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
2659 call amdgpu_gfx void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
2663 define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 {
2664 ; GFX9-LABEL: test_call_external_void_func_f64_imm:
2666 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2667 ; GFX9-NEXT: s_mov_b32 s34, s33
2668 ; GFX9-NEXT: s_mov_b32 s33, s32
2669 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2670 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2671 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2672 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
2673 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2674 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2675 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2676 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000
2677 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2678 ; GFX9-NEXT: s_getpc_b64 s[34:35]
2679 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f64@rel32@lo+4
2680 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f64@rel32@hi+12
2681 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
2682 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
2683 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
2684 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
2685 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2686 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2687 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2688 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2689 ; GFX9-NEXT: s_mov_b32 s33, s34
2690 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2691 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2693 ; GFX10-LABEL: test_call_external_void_func_f64_imm:
2695 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2696 ; GFX10-NEXT: s_mov_b32 s34, s33
2697 ; GFX10-NEXT: s_mov_b32 s33, s32
2698 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2699 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2700 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2701 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2702 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
2703 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
2704 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x40100000
2705 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2706 ; GFX10-NEXT: s_getpc_b64 s[34:35]
2707 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f64@rel32@lo+4
2708 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f64@rel32@hi+12
2709 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2710 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2711 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
2712 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
2713 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
2714 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
2715 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2716 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2717 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2718 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2719 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2720 ; GFX10-NEXT: s_mov_b32 s33, s34
2721 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2722 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2724 ; GFX11-LABEL: test_call_external_void_func_f64_imm:
2726 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2727 ; GFX11-NEXT: s_mov_b32 s0, s33
2728 ; GFX11-NEXT: s_mov_b32 s33, s32
2729 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2730 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
2731 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2732 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
2733 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40100000
2734 ; GFX11-NEXT: s_add_i32 s32, s32, 16
2735 ; GFX11-NEXT: s_getpc_b64 s[0:1]
2736 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_f64@rel32@lo+4
2737 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_f64@rel32@hi+12
2738 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
2739 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
2740 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
2741 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2742 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
2743 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
2744 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
2745 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2746 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
2747 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2748 ; GFX11-NEXT: s_add_i32 s32, s32, -16
2749 ; GFX11-NEXT: s_mov_b32 s33, s0
2750 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2751 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2753 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f64_imm:
2754 ; GFX10-SCRATCH: ; %bb.0:
2755 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2756 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
2757 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2758 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2759 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
2760 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2761 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2762 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
2763 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
2764 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40100000
2765 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2766 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2767 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f64@rel32@lo+4
2768 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f64@rel32@hi+12
2769 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2770 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2771 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2772 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
2773 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
2774 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
2775 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2776 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
2777 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2778 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2779 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2780 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
2781 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2782 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
2783 call amdgpu_gfx void @external_void_func_f64(double 4.0)
2787 define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 {
2788 ; GFX9-LABEL: test_call_external_void_func_v2f64_imm:
2790 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2791 ; GFX9-NEXT: s_mov_b32 s34, s33
2792 ; GFX9-NEXT: s_mov_b32 s33, s32
2793 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2794 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2795 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2796 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
2797 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2798 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2799 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2800 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
2801 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
2802 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000
2803 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2804 ; GFX9-NEXT: s_getpc_b64 s[34:35]
2805 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f64@rel32@lo+4
2806 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64@rel32@hi+12
2807 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
2808 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
2809 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
2810 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
2811 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2812 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2813 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2814 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2815 ; GFX9-NEXT: s_mov_b32 s33, s34
2816 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2817 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2819 ; GFX10-LABEL: test_call_external_void_func_v2f64_imm:
2821 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2822 ; GFX10-NEXT: s_mov_b32 s34, s33
2823 ; GFX10-NEXT: s_mov_b32 s33, s32
2824 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2825 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2826 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2827 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2828 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
2829 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
2830 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
2831 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
2832 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x40100000
2833 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2834 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2835 ; GFX10-NEXT: s_getpc_b64 s[34:35]
2836 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f64@rel32@lo+4
2837 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64@rel32@hi+12
2838 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2839 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
2840 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
2841 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
2842 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
2843 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2844 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2845 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2846 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2847 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2848 ; GFX10-NEXT: s_mov_b32 s33, s34
2849 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2850 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2852 ; GFX11-LABEL: test_call_external_void_func_v2f64_imm:
2854 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2855 ; GFX11-NEXT: s_mov_b32 s0, s33
2856 ; GFX11-NEXT: s_mov_b32 s33, s32
2857 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2858 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
2859 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2860 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
2861 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0
2862 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000
2863 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
2864 ; GFX11-NEXT: s_add_i32 s32, s32, 16
2865 ; GFX11-NEXT: s_getpc_b64 s[0:1]
2866 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2f64@rel32@lo+4
2867 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64@rel32@hi+12
2868 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
2869 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
2870 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2871 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
2872 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
2873 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
2874 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2875 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
2876 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2877 ; GFX11-NEXT: s_add_i32 s32, s32, -16
2878 ; GFX11-NEXT: s_mov_b32 s33, s0
2879 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2880 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2882 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f64_imm:
2883 ; GFX10-SCRATCH: ; %bb.0:
2884 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2885 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
2886 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2887 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2888 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
2889 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2890 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2891 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
2892 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
2893 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0
2894 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0
2895 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x40100000
2896 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2897 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2898 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2899 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f64@rel32@lo+4
2900 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64@rel32@hi+12
2901 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2902 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2903 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
2904 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
2905 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
2906 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
2907 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
2908 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2909 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
2910 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2911 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
2912 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2913 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
2914 call amdgpu_gfx void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
2918 define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 {
2919 ; GFX9-LABEL: test_call_external_void_func_v3f64_imm:
2921 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2922 ; GFX9-NEXT: s_mov_b32 s34, s33
2923 ; GFX9-NEXT: s_mov_b32 s33, s32
2924 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2925 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2926 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2927 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
2928 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2929 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2930 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2931 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
2932 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
2933 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000
2934 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
2935 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x40200000
2936 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2937 ; GFX9-NEXT: s_getpc_b64 s[34:35]
2938 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f64@rel32@lo+4
2939 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64@rel32@hi+12
2940 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
2941 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
2942 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
2943 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
2944 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
2945 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2946 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
2947 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2948 ; GFX9-NEXT: s_mov_b32 s33, s34
2949 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2950 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2952 ; GFX10-LABEL: test_call_external_void_func_v3f64_imm:
2954 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2955 ; GFX10-NEXT: s_mov_b32 s34, s33
2956 ; GFX10-NEXT: s_mov_b32 s33, s32
2957 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2958 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2959 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2960 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2961 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
2962 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
2963 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
2964 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
2965 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x40100000
2966 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2967 ; GFX10-NEXT: v_mov_b32_e32 v4, 0
2968 ; GFX10-NEXT: v_mov_b32_e32 v5, 0x40200000
2969 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2970 ; GFX10-NEXT: s_getpc_b64 s[34:35]
2971 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f64@rel32@lo+4
2972 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64@rel32@hi+12
2973 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2974 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
2975 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
2976 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
2977 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
2978 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
2979 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2980 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2981 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
2982 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2983 ; GFX10-NEXT: s_mov_b32 s33, s34
2984 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2985 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2987 ; GFX11-LABEL: test_call_external_void_func_v3f64_imm:
2989 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2990 ; GFX11-NEXT: s_mov_b32 s0, s33
2991 ; GFX11-NEXT: s_mov_b32 s33, s32
2992 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
2993 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
2994 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
2995 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
2996 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0
2997 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000
2998 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
2999 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x40200000
3000 ; GFX11-NEXT: s_add_i32 s32, s32, 16
3001 ; GFX11-NEXT: s_getpc_b64 s[0:1]
3002 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f64@rel32@lo+4
3003 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64@rel32@hi+12
3004 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
3005 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
3006 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
3007 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
3008 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
3009 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
3010 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3011 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
3012 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3013 ; GFX11-NEXT: s_add_i32 s32, s32, -16
3014 ; GFX11-NEXT: s_mov_b32 s33, s0
3015 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3016 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3018 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f64_imm:
3019 ; GFX10-SCRATCH: ; %bb.0:
3020 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3021 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
3022 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3023 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3024 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
3025 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3026 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3027 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
3028 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
3029 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0
3030 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0
3031 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x40100000
3032 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3033 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0
3034 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0x40200000
3035 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3036 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
3037 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64@rel32@lo+4
3038 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64@rel32@hi+12
3039 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3040 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3041 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
3042 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
3043 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
3044 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3045 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
3046 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3047 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3048 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3049 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
3050 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3051 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
3052 call amdgpu_gfx void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
3056 define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 {
3057 ; GFX9-LABEL: test_call_external_void_func_v2i8:
3059 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3060 ; GFX9-NEXT: s_mov_b32 s34, s33
3061 ; GFX9-NEXT: s_mov_b32 s33, s32
3062 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3063 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3064 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3065 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
3066 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
3067 ; GFX9-NEXT: global_load_ushort v0, v[0:1], off
3068 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
3069 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3070 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3071 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3072 ; GFX9-NEXT: s_getpc_b64 s[34:35]
3073 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i8@rel32@lo+4
3074 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i8@rel32@hi+12
3075 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3076 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, 8, v0
3077 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
3078 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
3079 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
3080 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
3081 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3082 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3083 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3084 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3085 ; GFX9-NEXT: s_mov_b32 s33, s34
3086 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3087 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3089 ; GFX10-LABEL: test_call_external_void_func_v2i8:
3091 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3092 ; GFX10-NEXT: s_mov_b32 s34, s33
3093 ; GFX10-NEXT: s_mov_b32 s33, s32
3094 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3095 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3096 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3097 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3098 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
3099 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
3100 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
3101 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3102 ; GFX10-NEXT: s_getpc_b64 s[34:35]
3103 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i8@rel32@lo+4
3104 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i8@rel32@hi+12
3105 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off
3106 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3107 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3108 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3109 ; GFX10-NEXT: v_lshrrev_b16 v1, 8, v0
3110 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
3111 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
3112 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
3113 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
3114 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3115 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3116 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3117 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3118 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3119 ; GFX10-NEXT: s_mov_b32 s33, s34
3120 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3121 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3123 ; GFX11-LABEL: test_call_external_void_func_v2i8:
3125 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3126 ; GFX11-NEXT: s_mov_b32 s0, s33
3127 ; GFX11-NEXT: s_mov_b32 s33, s32
3128 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3129 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
3130 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3131 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
3132 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
3133 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
3134 ; GFX11-NEXT: s_add_i32 s32, s32, 16
3135 ; GFX11-NEXT: s_getpc_b64 s[0:1]
3136 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i8@rel32@lo+4
3137 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i8@rel32@hi+12
3138 ; GFX11-NEXT: global_load_u16 v0, v[0:1], off
3139 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
3140 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
3141 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3142 ; GFX11-NEXT: v_lshrrev_b16 v1, 8, v0
3143 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
3144 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
3145 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
3146 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
3147 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
3148 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3149 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
3150 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3151 ; GFX11-NEXT: s_add_i32 s32, s32, -16
3152 ; GFX11-NEXT: s_mov_b32 s33, s0
3153 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3154 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3156 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i8:
3157 ; GFX10-SCRATCH: ; %bb.0:
3158 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3159 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
3160 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3161 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3162 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
3163 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3164 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3165 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
3166 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
3167 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
3168 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3169 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
3170 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i8@rel32@lo+4
3171 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i8@rel32@hi+12
3172 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off
3173 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3174 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3175 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3176 ; GFX10-SCRATCH-NEXT: v_lshrrev_b16 v1, 8, v0
3177 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3178 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
3179 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
3180 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
3181 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3182 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
3183 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3184 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3185 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3186 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
3187 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3188 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
3189 %val = load <2 x i8>, ptr addrspace(1) null
3190 call amdgpu_gfx void @external_void_func_v2i8(<2 x i8> %val)
3194 define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 {
3195 ; GFX9-LABEL: test_call_external_void_func_v3i8:
3197 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3198 ; GFX9-NEXT: s_mov_b32 s34, s33
3199 ; GFX9-NEXT: s_mov_b32 s33, s32
3200 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3201 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3202 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3203 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
3204 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
3205 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
3206 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
3207 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3208 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3209 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3210 ; GFX9-NEXT: s_getpc_b64 s[34:35]
3211 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i8@rel32@lo+4
3212 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i8@rel32@hi+12
3213 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3214 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0
3215 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3216 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
3217 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
3218 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
3219 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
3220 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3221 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3222 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3223 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3224 ; GFX9-NEXT: s_mov_b32 s33, s34
3225 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3226 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3228 ; GFX10-LABEL: test_call_external_void_func_v3i8:
3230 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3231 ; GFX10-NEXT: s_mov_b32 s34, s33
3232 ; GFX10-NEXT: s_mov_b32 s33, s32
3233 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3234 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3235 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3236 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3237 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
3238 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
3239 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
3240 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3241 ; GFX10-NEXT: s_getpc_b64 s[34:35]
3242 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i8@rel32@lo+4
3243 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i8@rel32@hi+12
3244 ; GFX10-NEXT: global_load_dword v0, v[0:1], off
3245 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3246 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3247 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3248 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0
3249 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3250 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
3251 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
3252 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
3253 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
3254 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3255 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3256 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3257 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3258 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3259 ; GFX10-NEXT: s_mov_b32 s33, s34
3260 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3261 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3263 ; GFX11-LABEL: test_call_external_void_func_v3i8:
3265 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3266 ; GFX11-NEXT: s_mov_b32 s0, s33
3267 ; GFX11-NEXT: s_mov_b32 s33, s32
3268 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3269 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
3270 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3271 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
3272 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
3273 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
3274 ; GFX11-NEXT: s_add_i32 s32, s32, 16
3275 ; GFX11-NEXT: s_getpc_b64 s[0:1]
3276 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i8@rel32@lo+4
3277 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i8@rel32@hi+12
3278 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
3279 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
3280 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
3281 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3282 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0
3283 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3284 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
3285 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
3286 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
3287 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
3288 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3289 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
3290 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3291 ; GFX11-NEXT: s_add_i32 s32, s32, -16
3292 ; GFX11-NEXT: s_mov_b32 s33, s0
3293 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3294 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3296 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i8:
3297 ; GFX10-SCRATCH: ; %bb.0:
3298 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3299 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
3300 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3301 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3302 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
3303 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3304 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3305 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
3306 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
3307 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
3308 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3309 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
3310 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i8@rel32@lo+4
3311 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i8@rel32@hi+12
3312 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off
3313 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3314 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3315 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3316 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0
3317 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3318 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3319 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
3320 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
3321 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
3322 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3323 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
3324 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3325 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3326 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3327 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
3328 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3329 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
3330 %val = load <3 x i8>, ptr addrspace(1) null
3331 call amdgpu_gfx void @external_void_func_v3i8(<3 x i8> %val)
3335 define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 {
3336 ; GFX9-LABEL: test_call_external_void_func_v4i8:
3338 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3339 ; GFX9-NEXT: s_mov_b32 s34, s33
3340 ; GFX9-NEXT: s_mov_b32 s33, s32
3341 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3342 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3343 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3344 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
3345 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
3346 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
3347 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
3348 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3349 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3350 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3351 ; GFX9-NEXT: s_getpc_b64 s[34:35]
3352 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i8@rel32@lo+4
3353 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i8@rel32@hi+12
3354 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3355 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3356 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0
3357 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0
3358 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
3359 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
3360 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
3361 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
3362 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3363 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3364 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3365 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3366 ; GFX9-NEXT: s_mov_b32 s33, s34
3367 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3368 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3370 ; GFX10-LABEL: test_call_external_void_func_v4i8:
3372 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3373 ; GFX10-NEXT: s_mov_b32 s34, s33
3374 ; GFX10-NEXT: s_mov_b32 s33, s32
3375 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3376 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3377 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3378 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3379 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
3380 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
3381 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
3382 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3383 ; GFX10-NEXT: s_getpc_b64 s[34:35]
3384 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i8@rel32@lo+4
3385 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i8@rel32@hi+12
3386 ; GFX10-NEXT: global_load_dword v0, v[0:1], off
3387 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3388 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3389 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3390 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0
3391 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3392 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0
3393 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
3394 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
3395 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
3396 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
3397 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3398 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3399 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3400 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3401 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3402 ; GFX10-NEXT: s_mov_b32 s33, s34
3403 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3404 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3406 ; GFX11-LABEL: test_call_external_void_func_v4i8:
3408 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3409 ; GFX11-NEXT: s_mov_b32 s0, s33
3410 ; GFX11-NEXT: s_mov_b32 s33, s32
3411 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3412 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
3413 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3414 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
3415 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
3416 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
3417 ; GFX11-NEXT: s_add_i32 s32, s32, 16
3418 ; GFX11-NEXT: s_getpc_b64 s[0:1]
3419 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i8@rel32@lo+4
3420 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i8@rel32@hi+12
3421 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
3422 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
3423 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
3424 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3425 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0
3426 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3427 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0
3428 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
3429 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
3430 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
3431 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
3432 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3433 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
3434 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3435 ; GFX11-NEXT: s_add_i32 s32, s32, -16
3436 ; GFX11-NEXT: s_mov_b32 s33, s0
3437 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3438 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3440 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i8:
3441 ; GFX10-SCRATCH: ; %bb.0:
3442 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3443 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
3444 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3445 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3446 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
3447 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3448 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3449 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
3450 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
3451 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
3452 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3453 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
3454 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i8@rel32@lo+4
3455 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i8@rel32@hi+12
3456 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off
3457 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3458 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3459 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3460 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0
3461 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3462 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v3, 24, v0
3463 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3464 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
3465 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
3466 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
3467 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3468 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
3469 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3470 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3471 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3472 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
3473 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3474 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
3475 %val = load <4 x i8>, ptr addrspace(1) null
3476 call amdgpu_gfx void @external_void_func_v4i8(<4 x i8> %val)
3480 define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 {
3481 ; GFX9-LABEL: test_call_external_void_func_v5i8:
3483 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3484 ; GFX9-NEXT: s_mov_b32 s34, s33
3485 ; GFX9-NEXT: s_mov_b32 s33, s32
3486 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3487 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3488 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3489 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
3490 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
3491 ; GFX9-NEXT: global_load_dwordx2 v[5:6], v[0:1], off
3492 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
3493 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3494 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3495 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3496 ; GFX9-NEXT: s_getpc_b64 s[34:35]
3497 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5i8@rel32@lo+4
3498 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5i8@rel32@hi+12
3499 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3500 ; GFX9-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6]
3501 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v5
3502 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v5
3503 ; GFX9-NEXT: v_mov_b32_e32 v0, v5
3504 ; GFX9-NEXT: v_mov_b32_e32 v4, v6
3505 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
3506 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
3507 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
3508 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
3509 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3510 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3511 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3512 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3513 ; GFX9-NEXT: s_mov_b32 s33, s34
3514 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3515 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3517 ; GFX10-LABEL: test_call_external_void_func_v5i8:
3519 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3520 ; GFX10-NEXT: s_mov_b32 s34, s33
3521 ; GFX10-NEXT: s_mov_b32 s33, s32
3522 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3523 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3524 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3525 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3526 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
3527 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
3528 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
3529 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3530 ; GFX10-NEXT: s_getpc_b64 s[34:35]
3531 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5i8@rel32@lo+4
3532 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5i8@rel32@hi+12
3533 ; GFX10-NEXT: global_load_dwordx2 v[5:6], v[0:1], off
3534 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3535 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3536 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3537 ; GFX10-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6]
3538 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v5
3539 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v5
3540 ; GFX10-NEXT: v_mov_b32_e32 v0, v5
3541 ; GFX10-NEXT: v_mov_b32_e32 v4, v6
3542 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
3543 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
3544 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
3545 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
3546 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3547 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3548 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3549 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3550 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3551 ; GFX10-NEXT: s_mov_b32 s33, s34
3552 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3553 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3555 ; GFX11-LABEL: test_call_external_void_func_v5i8:
3557 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3558 ; GFX11-NEXT: s_mov_b32 s0, s33
3559 ; GFX11-NEXT: s_mov_b32 s33, s32
3560 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3561 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
3562 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3563 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
3564 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
3565 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
3566 ; GFX11-NEXT: s_add_i32 s32, s32, 16
3567 ; GFX11-NEXT: s_getpc_b64 s[0:1]
3568 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v5i8@rel32@lo+4
3569 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v5i8@rel32@hi+12
3570 ; GFX11-NEXT: global_load_b64 v[5:6], v[0:1], off
3571 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
3572 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
3573 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3574 ; GFX11-NEXT: v_mov_b32_e32 v0, v5
3575 ; GFX11-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6]
3576 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v5
3577 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v5
3578 ; GFX11-NEXT: v_mov_b32_e32 v4, v6
3579 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
3580 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
3581 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
3582 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
3583 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3584 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
3585 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3586 ; GFX11-NEXT: s_add_i32 s32, s32, -16
3587 ; GFX11-NEXT: s_mov_b32 s33, s0
3588 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3589 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3591 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i8:
3592 ; GFX10-SCRATCH: ; %bb.0:
3593 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3594 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
3595 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3596 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3597 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
3598 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3599 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3600 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
3601 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
3602 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
3603 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3604 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
3605 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5i8@rel32@lo+4
3606 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i8@rel32@hi+12
3607 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[5:6], v[0:1], off
3608 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3609 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3610 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3611 ; GFX10-SCRATCH-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6]
3612 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v5
3613 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v5
3614 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, v5
3615 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v6
3616 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3617 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
3618 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
3619 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
3620 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3621 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
3622 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3623 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3624 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3625 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
3626 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3627 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
3628 %val = load <5 x i8>, ptr addrspace(1) null
3629 call amdgpu_gfx void @external_void_func_v5i8(<5 x i8> %val)
3633 define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 {
3634 ; GFX9-LABEL: test_call_external_void_func_v8i8:
3636 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3637 ; GFX9-NEXT: s_mov_b32 s34, s33
3638 ; GFX9-NEXT: s_mov_b32 s33, s32
3639 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3640 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3641 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3642 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
3643 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
3644 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
3645 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
3646 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3647 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3648 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3649 ; GFX9-NEXT: s_getpc_b64 s[34:35]
3650 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i8@rel32@lo+4
3651 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i8@rel32@hi+12
3652 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3653 ; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v0
3654 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3655 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0
3656 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1
3657 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1
3658 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1
3659 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
3660 ; GFX9-NEXT: v_mov_b32_e32 v1, v8
3661 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
3662 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
3663 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
3664 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
3665 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3666 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3667 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3668 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3669 ; GFX9-NEXT: s_mov_b32 s33, s34
3670 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3671 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3673 ; GFX10-LABEL: test_call_external_void_func_v8i8:
3675 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3676 ; GFX10-NEXT: s_mov_b32 s34, s33
3677 ; GFX10-NEXT: s_mov_b32 s33, s32
3678 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3679 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3680 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3681 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3682 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
3683 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
3684 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
3685 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3686 ; GFX10-NEXT: s_getpc_b64 s[34:35]
3687 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i8@rel32@lo+4
3688 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i8@rel32@hi+12
3689 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
3690 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3691 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3692 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3693 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v0
3694 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3695 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0
3696 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
3697 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v1
3698 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1
3699 ; GFX10-NEXT: v_mov_b32_e32 v4, v1
3700 ; GFX10-NEXT: v_mov_b32_e32 v1, v8
3701 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
3702 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
3703 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
3704 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
3705 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3706 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3707 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3708 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3709 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3710 ; GFX10-NEXT: s_mov_b32 s33, s34
3711 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3712 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3714 ; GFX11-LABEL: test_call_external_void_func_v8i8:
3716 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3717 ; GFX11-NEXT: s_mov_b32 s0, s33
3718 ; GFX11-NEXT: s_mov_b32 s33, s32
3719 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3720 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
3721 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3722 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
3723 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
3724 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
3725 ; GFX11-NEXT: s_add_i32 s32, s32, 16
3726 ; GFX11-NEXT: s_getpc_b64 s[0:1]
3727 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v8i8@rel32@lo+4
3728 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v8i8@rel32@hi+12
3729 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
3730 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
3731 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
3732 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3733 ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 8, v0
3734 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3735 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0
3736 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1
3737 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1
3738 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1
3739 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v8
3740 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
3741 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
3742 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
3743 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
3744 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3745 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
3746 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3747 ; GFX11-NEXT: s_add_i32 s32, s32, -16
3748 ; GFX11-NEXT: s_mov_b32 s33, s0
3749 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3750 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3752 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i8:
3753 ; GFX10-SCRATCH: ; %bb.0:
3754 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3755 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
3756 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3757 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3758 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
3759 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3760 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3761 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
3762 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
3763 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
3764 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3765 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
3766 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i8@rel32@lo+4
3767 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i8@rel32@hi+12
3768 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
3769 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3770 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3771 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3772 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v8, 8, v0
3773 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3774 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v3, 24, v0
3775 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v5, 8, v1
3776 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v6, 16, v1
3777 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v7, 24, v1
3778 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v1
3779 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, v8
3780 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3781 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
3782 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
3783 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
3784 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
3785 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
3786 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3787 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
3788 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3789 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
3790 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3791 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
3792 %val = load <8 x i8>, ptr addrspace(1) null
3793 call amdgpu_gfx void @external_void_func_v8i8(<8 x i8> %val)
3797 define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 {
3798 ; GFX9-LABEL: test_call_external_void_func_v32i8:
3800 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3801 ; GFX9-NEXT: s_mov_b32 s34, s33
3802 ; GFX9-NEXT: s_mov_b32 s33, s32
3803 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3804 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3805 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3806 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
3807 ; GFX9-NEXT: v_mov_b32_e32 v4, 16
3808 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
3809 ; GFX9-NEXT: v_mov_b32_e32 v5, 0
3810 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
3811 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
3812 ; GFX9-NEXT: global_load_dwordx4 v[16:19], v[4:5], off
3813 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3814 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3815 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3816 ; GFX9-NEXT: s_getpc_b64 s[34:35]
3817 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i8@rel32@lo+4
3818 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i8@rel32@hi+12
3819 ; GFX9-NEXT: s_waitcnt vmcnt(1)
3820 ; GFX9-NEXT: v_lshrrev_b32_e32 v35, 8, v0
3821 ; GFX9-NEXT: v_lshrrev_b32_e32 v36, 16, v0
3822 ; GFX9-NEXT: v_lshrrev_b32_e32 v37, 24, v0
3823 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3824 ; GFX9-NEXT: v_lshrrev_b32_e32 v32, 8, v16
3825 ; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v16
3826 ; GFX9-NEXT: v_lshrrev_b32_e32 v34, 24, v16
3827 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1
3828 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1
3829 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1
3830 ; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v2
3831 ; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v2
3832 ; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2
3833 ; GFX9-NEXT: v_lshrrev_b32_e32 v13, 8, v3
3834 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v3
3835 ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 24, v3
3836 ; GFX9-NEXT: v_lshrrev_b32_e32 v21, 8, v17
3837 ; GFX9-NEXT: v_lshrrev_b32_e32 v22, 16, v17
3838 ; GFX9-NEXT: v_lshrrev_b32_e32 v23, 24, v17
3839 ; GFX9-NEXT: v_lshrrev_b32_e32 v25, 8, v18
3840 ; GFX9-NEXT: v_lshrrev_b32_e32 v26, 16, v18
3841 ; GFX9-NEXT: v_lshrrev_b32_e32 v27, 24, v18
3842 ; GFX9-NEXT: v_lshrrev_b32_e32 v29, 8, v19
3843 ; GFX9-NEXT: v_lshrrev_b32_e32 v30, 16, v19
3844 ; GFX9-NEXT: v_lshrrev_b32_e32 v31, 24, v19
3845 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
3846 ; GFX9-NEXT: v_mov_b32_e32 v8, v2
3847 ; GFX9-NEXT: v_mov_b32_e32 v12, v3
3848 ; GFX9-NEXT: v_mov_b32_e32 v20, v17
3849 ; GFX9-NEXT: v_mov_b32_e32 v24, v18
3850 ; GFX9-NEXT: v_mov_b32_e32 v28, v19
3851 ; GFX9-NEXT: v_mov_b32_e32 v1, v35
3852 ; GFX9-NEXT: v_mov_b32_e32 v2, v36
3853 ; GFX9-NEXT: v_mov_b32_e32 v3, v37
3854 ; GFX9-NEXT: v_mov_b32_e32 v17, v32
3855 ; GFX9-NEXT: v_mov_b32_e32 v18, v33
3856 ; GFX9-NEXT: v_mov_b32_e32 v19, v34
3857 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
3858 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
3859 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
3860 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
3861 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
3862 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3863 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
3864 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3865 ; GFX9-NEXT: s_mov_b32 s33, s34
3866 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3867 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3869 ; GFX10-LABEL: test_call_external_void_func_v32i8:
3871 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3872 ; GFX10-NEXT: s_mov_b32 s34, s33
3873 ; GFX10-NEXT: s_mov_b32 s33, s32
3874 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3875 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
3876 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3877 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3878 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
3879 ; GFX10-NEXT: v_mov_b32_e32 v4, 16
3880 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
3881 ; GFX10-NEXT: v_mov_b32_e32 v5, 0
3882 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
3883 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3884 ; GFX10-NEXT: s_getpc_b64 s[34:35]
3885 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i8@rel32@lo+4
3886 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i8@rel32@hi+12
3887 ; GFX10-NEXT: s_clause 0x1
3888 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
3889 ; GFX10-NEXT: global_load_dwordx4 v[16:19], v[4:5], off
3890 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3891 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3892 ; GFX10-NEXT: s_waitcnt vmcnt(1)
3893 ; GFX10-NEXT: v_lshrrev_b32_e32 v35, 8, v0
3894 ; GFX10-NEXT: v_lshrrev_b32_e32 v36, 16, v0
3895 ; GFX10-NEXT: v_lshrrev_b32_e32 v37, 24, v0
3896 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3897 ; GFX10-NEXT: v_lshrrev_b32_e32 v32, 8, v16
3898 ; GFX10-NEXT: v_lshrrev_b32_e32 v33, 16, v16
3899 ; GFX10-NEXT: v_lshrrev_b32_e32 v34, 24, v16
3900 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
3901 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v1
3902 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1
3903 ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 8, v2
3904 ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2
3905 ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v2
3906 ; GFX10-NEXT: v_lshrrev_b32_e32 v13, 8, v3
3907 ; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v3
3908 ; GFX10-NEXT: v_lshrrev_b32_e32 v15, 24, v3
3909 ; GFX10-NEXT: v_lshrrev_b32_e32 v21, 8, v17
3910 ; GFX10-NEXT: v_lshrrev_b32_e32 v22, 16, v17
3911 ; GFX10-NEXT: v_lshrrev_b32_e32 v23, 24, v17
3912 ; GFX10-NEXT: v_lshrrev_b32_e32 v25, 8, v18
3913 ; GFX10-NEXT: v_lshrrev_b32_e32 v26, 16, v18
3914 ; GFX10-NEXT: v_lshrrev_b32_e32 v27, 24, v18
3915 ; GFX10-NEXT: v_lshrrev_b32_e32 v29, 8, v19
3916 ; GFX10-NEXT: v_lshrrev_b32_e32 v30, 16, v19
3917 ; GFX10-NEXT: v_lshrrev_b32_e32 v31, 24, v19
3918 ; GFX10-NEXT: v_mov_b32_e32 v4, v1
3919 ; GFX10-NEXT: v_mov_b32_e32 v8, v2
3920 ; GFX10-NEXT: v_mov_b32_e32 v12, v3
3921 ; GFX10-NEXT: v_mov_b32_e32 v20, v17
3922 ; GFX10-NEXT: v_mov_b32_e32 v24, v18
3923 ; GFX10-NEXT: v_mov_b32_e32 v28, v19
3924 ; GFX10-NEXT: v_mov_b32_e32 v1, v35
3925 ; GFX10-NEXT: v_mov_b32_e32 v2, v36
3926 ; GFX10-NEXT: v_mov_b32_e32 v3, v37
3927 ; GFX10-NEXT: v_mov_b32_e32 v17, v32
3928 ; GFX10-NEXT: v_mov_b32_e32 v18, v33
3929 ; GFX10-NEXT: v_mov_b32_e32 v19, v34
3930 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
3931 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
3932 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
3933 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
3934 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
3935 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
3936 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3937 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
3938 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3939 ; GFX10-NEXT: s_mov_b32 s33, s34
3940 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3941 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3943 ; GFX11-LABEL: test_call_external_void_func_v32i8:
3945 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3946 ; GFX11-NEXT: s_mov_b32 s0, s33
3947 ; GFX11-NEXT: s_mov_b32 s33, s32
3948 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
3949 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
3950 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
3951 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
3952 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 16
3953 ; GFX11-NEXT: v_mov_b32_e32 v5, 0
3954 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
3955 ; GFX11-NEXT: s_add_i32 s32, s32, 16
3956 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
3957 ; GFX11-NEXT: s_getpc_b64 s[0:1]
3958 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v32i8@rel32@lo+4
3959 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v32i8@rel32@hi+12
3960 ; GFX11-NEXT: global_load_b128 v[16:19], v[4:5], off
3961 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
3962 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
3963 ; GFX11-NEXT: s_waitcnt vmcnt(1)
3964 ; GFX11-NEXT: v_lshrrev_b32_e32 v35, 8, v0
3965 ; GFX11-NEXT: v_lshrrev_b32_e32 v36, 16, v0
3966 ; GFX11-NEXT: v_lshrrev_b32_e32 v37, 24, v0
3967 ; GFX11-NEXT: s_waitcnt vmcnt(0)
3968 ; GFX11-NEXT: v_lshrrev_b32_e32 v32, 8, v16
3969 ; GFX11-NEXT: v_lshrrev_b32_e32 v33, 16, v16
3970 ; GFX11-NEXT: v_lshrrev_b32_e32 v34, 24, v16
3971 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1
3972 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1
3973 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1
3974 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 8, v2
3975 ; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2
3976 ; GFX11-NEXT: v_lshrrev_b32_e32 v11, 24, v2
3977 ; GFX11-NEXT: v_lshrrev_b32_e32 v13, 8, v3
3978 ; GFX11-NEXT: v_lshrrev_b32_e32 v14, 16, v3
3979 ; GFX11-NEXT: v_lshrrev_b32_e32 v15, 24, v3
3980 ; GFX11-NEXT: v_lshrrev_b32_e32 v21, 8, v17
3981 ; GFX11-NEXT: v_lshrrev_b32_e32 v22, 16, v17
3982 ; GFX11-NEXT: v_lshrrev_b32_e32 v23, 24, v17
3983 ; GFX11-NEXT: v_lshrrev_b32_e32 v25, 8, v18
3984 ; GFX11-NEXT: v_lshrrev_b32_e32 v26, 16, v18
3985 ; GFX11-NEXT: v_lshrrev_b32_e32 v27, 24, v18
3986 ; GFX11-NEXT: v_lshrrev_b32_e32 v29, 8, v19
3987 ; GFX11-NEXT: v_lshrrev_b32_e32 v30, 16, v19
3988 ; GFX11-NEXT: v_lshrrev_b32_e32 v31, 24, v19
3989 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v35
3990 ; GFX11-NEXT: v_mov_b32_e32 v8, v2
3991 ; GFX11-NEXT: v_mov_b32_e32 v12, v3
3992 ; GFX11-NEXT: v_mov_b32_e32 v20, v17
3993 ; GFX11-NEXT: v_mov_b32_e32 v24, v18
3994 ; GFX11-NEXT: v_dual_mov_b32 v28, v19 :: v_dual_mov_b32 v19, v34
3995 ; GFX11-NEXT: v_dual_mov_b32 v2, v36 :: v_dual_mov_b32 v3, v37
3996 ; GFX11-NEXT: v_dual_mov_b32 v17, v32 :: v_dual_mov_b32 v18, v33
3997 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
3998 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
3999 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
4000 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
4001 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4002 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
4003 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4004 ; GFX11-NEXT: s_add_i32 s32, s32, -16
4005 ; GFX11-NEXT: s_mov_b32 s33, s0
4006 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4007 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4009 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i8:
4010 ; GFX10-SCRATCH: ; %bb.0:
4011 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4012 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
4013 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4014 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4015 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
4016 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4017 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4018 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
4019 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 16
4020 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
4021 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0
4022 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
4023 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
4024 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
4025 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i8@rel32@lo+4
4026 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i8@rel32@hi+12
4027 ; GFX10-SCRATCH-NEXT: s_clause 0x1
4028 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
4029 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v[4:5], off
4030 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4031 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4032 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1)
4033 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v35, 8, v0
4034 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v36, 16, v0
4035 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v37, 24, v0
4036 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4037 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v32, 8, v16
4038 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v33, 16, v16
4039 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v34, 24, v16
4040 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v5, 8, v1
4041 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v6, 16, v1
4042 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v7, 24, v1
4043 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v9, 8, v2
4044 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v10, 16, v2
4045 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v11, 24, v2
4046 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v13, 8, v3
4047 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v14, 16, v3
4048 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v15, 24, v3
4049 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v21, 8, v17
4050 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v22, 16, v17
4051 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v23, 24, v17
4052 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v25, 8, v18
4053 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v26, 16, v18
4054 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v27, 24, v18
4055 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v29, 8, v19
4056 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v30, 16, v19
4057 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v31, 24, v19
4058 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v1
4059 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, v2
4060 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, v3
4061 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v20, v17
4062 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v24, v18
4063 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v28, v19
4064 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, v35
4065 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, v36
4066 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, v37
4067 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v17, v32
4068 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v18, v33
4069 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v19, v34
4070 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4071 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
4072 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
4073 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
4074 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4075 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
4076 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4077 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4078 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
4079 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
4080 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4081 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
4082 %val = load <32 x i8>, ptr addrspace(1) null
4083 call amdgpu_gfx void @external_void_func_v32i8(<32 x i8> %val)
4088 define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 {
4089 ; GFX9-LABEL: test_call_external_void_func_i8_ret:
4091 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4092 ; GFX9-NEXT: s_mov_b32 s34, s33
4093 ; GFX9-NEXT: s_mov_b32 s33, s32
4094 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4095 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4096 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4097 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4098 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4099 ; GFX9-NEXT: v_mov_b32_e32 v41, 0
4100 ; GFX9-NEXT: v_mov_b32_e32 v42, 0
4101 ; GFX9-NEXT: global_load_ubyte v0, v[41:42], off
4102 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
4103 ; GFX9-NEXT: s_addk_i32 s32, 0x400
4104 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
4105 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
4106 ; GFX9-NEXT: s_getpc_b64 s[34:35]
4107 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_ret@rel32@lo+4
4108 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_ret@rel32@hi+12
4109 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
4110 ; GFX9-NEXT: global_store_byte v[41:42], v0, off
4111 ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
4112 ; GFX9-NEXT: s_nop 0
4113 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
4114 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
4115 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
4116 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
4117 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4118 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4119 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4120 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
4121 ; GFX9-NEXT: s_mov_b32 s33, s34
4122 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4123 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4125 ; GFX10-LABEL: test_call_external_void_func_i8_ret:
4127 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4128 ; GFX10-NEXT: s_mov_b32 s34, s33
4129 ; GFX10-NEXT: s_mov_b32 s33, s32
4130 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4131 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4132 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4133 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4134 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4135 ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4136 ; GFX10-NEXT: v_mov_b32_e32 v41, 0
4137 ; GFX10-NEXT: v_mov_b32_e32 v42, 0
4138 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
4139 ; GFX10-NEXT: s_addk_i32 s32, 0x200
4140 ; GFX10-NEXT: s_getpc_b64 s[34:35]
4141 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_ret@rel32@lo+4
4142 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_ret@rel32@hi+12
4143 ; GFX10-NEXT: global_load_ubyte v0, v[41:42], off
4144 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4145 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4146 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
4147 ; GFX10-NEXT: global_store_byte v[41:42], v0, off
4148 ; GFX10-NEXT: s_clause 0x1
4149 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33
4150 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
4151 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
4152 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
4153 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
4154 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4155 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4156 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4157 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4158 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
4159 ; GFX10-NEXT: s_mov_b32 s33, s34
4160 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4161 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4163 ; GFX11-LABEL: test_call_external_void_func_i8_ret:
4165 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4166 ; GFX11-NEXT: s_mov_b32 s0, s33
4167 ; GFX11-NEXT: s_mov_b32 s33, s32
4168 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4169 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill
4170 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4171 ; GFX11-NEXT: s_clause 0x1
4172 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
4173 ; GFX11-NEXT: scratch_store_b32 off, v42, s33
4174 ; GFX11-NEXT: v_mov_b32_e32 v41, 0
4175 ; GFX11-NEXT: v_mov_b32_e32 v42, 0
4176 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
4177 ; GFX11-NEXT: s_add_i32 s32, s32, 16
4178 ; GFX11-NEXT: s_getpc_b64 s[0:1]
4179 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i8_ret@rel32@lo+4
4180 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i8_ret@rel32@hi+12
4181 ; GFX11-NEXT: global_load_u8 v0, v[41:42], off
4182 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
4183 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
4184 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
4185 ; GFX11-NEXT: global_store_b8 v[41:42], v0, off
4186 ; GFX11-NEXT: s_clause 0x1
4187 ; GFX11-NEXT: scratch_load_b32 v42, off, s33
4188 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
4189 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
4190 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
4191 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
4192 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4193 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload
4194 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4195 ; GFX11-NEXT: s_add_i32 s32, s32, -16
4196 ; GFX11-NEXT: s_mov_b32 s33, s0
4197 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4198 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4200 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_ret:
4201 ; GFX10-SCRATCH: ; %bb.0:
4202 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4203 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
4204 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4205 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4206 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill
4207 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4208 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4209 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill
4210 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 ; 4-byte Folded Spill
4211 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0
4212 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0
4213 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
4214 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
4215 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
4216 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_ret@rel32@lo+4
4217 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_ret@rel32@hi+12
4218 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[41:42], off
4219 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4220 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4221 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4222 ; GFX10-SCRATCH-NEXT: global_store_byte v[41:42], v0, off
4223 ; GFX10-SCRATCH-NEXT: s_clause 0x1
4224 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33
4225 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4
4226 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
4227 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
4228 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
4229 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4230 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload
4231 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4232 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4233 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
4234 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
4235 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4236 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
4237 %val = load i8, ptr addrspace(1) null
4238 %tmp = call amdgpu_gfx i8 @external_void_func_i8_ret(i8 %val)
4239 store i8 %tmp, ptr addrspace(1) null
4244 define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 {
4245 ; GFX9-LABEL: test_call_external_void_func_v2i8_ret:
4247 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4248 ; GFX9-NEXT: s_mov_b32 s34, s33
4249 ; GFX9-NEXT: s_mov_b32 s33, s32
4250 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4251 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4252 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4253 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4254 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4255 ; GFX9-NEXT: v_mov_b32_e32 v41, 0
4256 ; GFX9-NEXT: v_mov_b32_e32 v42, 0
4257 ; GFX9-NEXT: global_load_ushort v0, v[41:42], off
4258 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
4259 ; GFX9-NEXT: s_addk_i32 s32, 0x400
4260 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
4261 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
4262 ; GFX9-NEXT: s_getpc_b64 s[34:35]
4263 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i8_ret@rel32@lo+4
4264 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i8_ret@rel32@hi+12
4265 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4266 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, 8, v0
4267 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
4268 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
4269 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4270 ; GFX9-NEXT: global_store_short v[41:42], v0, off
4271 ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
4272 ; GFX9-NEXT: s_nop 0
4273 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
4274 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
4275 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
4276 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
4277 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4278 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4279 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4280 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
4281 ; GFX9-NEXT: s_mov_b32 s33, s34
4282 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4283 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4285 ; GFX10-LABEL: test_call_external_void_func_v2i8_ret:
4287 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4288 ; GFX10-NEXT: s_mov_b32 s34, s33
4289 ; GFX10-NEXT: s_mov_b32 s33, s32
4290 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4291 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4292 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4293 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4294 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4295 ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4296 ; GFX10-NEXT: v_mov_b32_e32 v41, 0
4297 ; GFX10-NEXT: v_mov_b32_e32 v42, 0
4298 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
4299 ; GFX10-NEXT: s_addk_i32 s32, 0x200
4300 ; GFX10-NEXT: s_getpc_b64 s[34:35]
4301 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i8_ret@rel32@lo+4
4302 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i8_ret@rel32@hi+12
4303 ; GFX10-NEXT: global_load_ushort v0, v[41:42], off
4304 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4305 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4306 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4307 ; GFX10-NEXT: v_lshrrev_b16 v1, 8, v0
4308 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
4309 ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1
4310 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
4311 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
4312 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
4313 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4314 ; GFX10-NEXT: global_store_short v[41:42], v0, off
4315 ; GFX10-NEXT: s_clause 0x1
4316 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33
4317 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
4318 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4319 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4320 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4321 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4322 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
4323 ; GFX10-NEXT: s_mov_b32 s33, s34
4324 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4325 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4327 ; GFX11-LABEL: test_call_external_void_func_v2i8_ret:
4329 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4330 ; GFX11-NEXT: s_mov_b32 s0, s33
4331 ; GFX11-NEXT: s_mov_b32 s33, s32
4332 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4333 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill
4334 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4335 ; GFX11-NEXT: s_clause 0x1
4336 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
4337 ; GFX11-NEXT: scratch_store_b32 off, v42, s33
4338 ; GFX11-NEXT: v_mov_b32_e32 v41, 0
4339 ; GFX11-NEXT: v_mov_b32_e32 v42, 0
4340 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
4341 ; GFX11-NEXT: s_add_i32 s32, s32, 16
4342 ; GFX11-NEXT: s_getpc_b64 s[0:1]
4343 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i8_ret@rel32@lo+4
4344 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i8_ret@rel32@hi+12
4345 ; GFX11-NEXT: global_load_u16 v0, v[41:42], off
4346 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
4347 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
4348 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4349 ; GFX11-NEXT: v_lshrrev_b16 v1, 8, v0
4350 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
4351 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4)
4352 ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
4353 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
4354 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
4355 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
4356 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
4357 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
4358 ; GFX11-NEXT: global_store_b16 v[41:42], v0, off
4359 ; GFX11-NEXT: s_clause 0x1
4360 ; GFX11-NEXT: scratch_load_b32 v42, off, s33
4361 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
4362 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4363 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload
4364 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4365 ; GFX11-NEXT: s_add_i32 s32, s32, -16
4366 ; GFX11-NEXT: s_mov_b32 s33, s0
4367 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4368 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4370 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i8_ret:
4371 ; GFX10-SCRATCH: ; %bb.0:
4372 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4373 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
4374 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4375 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4376 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill
4377 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4378 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4379 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill
4380 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 ; 4-byte Folded Spill
4381 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0
4382 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0
4383 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
4384 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
4385 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
4386 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i8_ret@rel32@lo+4
4387 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i8_ret@rel32@hi+12
4388 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[41:42], off
4389 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4390 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4391 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4392 ; GFX10-SCRATCH-NEXT: v_lshrrev_b16 v1, 8, v0
4393 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4394 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1
4395 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
4396 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
4397 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
4398 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4399 ; GFX10-SCRATCH-NEXT: global_store_short v[41:42], v0, off
4400 ; GFX10-SCRATCH-NEXT: s_clause 0x1
4401 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33
4402 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4
4403 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4404 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload
4405 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4406 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4407 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
4408 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
4409 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4410 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
4411 %val = load <2 x i8>, ptr addrspace(1) null
4412 %tmp = call amdgpu_gfx <2 x i8> @external_void_func_v2i8_ret(<2 x i8> %val)
4413 store <2 x i8> %tmp, ptr addrspace(1) null
4418 define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 {
4419 ; GFX9-LABEL: test_call_external_void_func_v3i8_ret:
4421 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4422 ; GFX9-NEXT: s_mov_b32 s34, s33
4423 ; GFX9-NEXT: s_mov_b32 s33, s32
4424 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4425 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4426 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4427 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4428 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4429 ; GFX9-NEXT: v_mov_b32_e32 v41, 0
4430 ; GFX9-NEXT: v_mov_b32_e32 v42, 0
4431 ; GFX9-NEXT: global_load_dword v0, v[41:42], off
4432 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
4433 ; GFX9-NEXT: s_addk_i32 s32, 0x400
4434 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
4435 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
4436 ; GFX9-NEXT: s_getpc_b64 s[34:35]
4437 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i8_ret@rel32@lo+4
4438 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i8_ret@rel32@hi+12
4439 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4440 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0
4441 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4442 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
4443 ; GFX9-NEXT: v_mov_b32_e32 v3, 2
4444 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
4445 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
4446 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4447 ; GFX9-NEXT: global_store_byte v[3:4], v2, off
4448 ; GFX9-NEXT: global_store_short v[41:42], v0, off
4449 ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
4450 ; GFX9-NEXT: s_nop 0
4451 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
4452 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
4453 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
4454 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
4455 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4456 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4457 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4458 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
4459 ; GFX9-NEXT: s_mov_b32 s33, s34
4460 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4461 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4463 ; GFX10-LABEL: test_call_external_void_func_v3i8_ret:
4465 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4466 ; GFX10-NEXT: s_mov_b32 s34, s33
4467 ; GFX10-NEXT: s_mov_b32 s33, s32
4468 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4469 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4470 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4471 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4472 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4473 ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4474 ; GFX10-NEXT: v_mov_b32_e32 v41, 0
4475 ; GFX10-NEXT: v_mov_b32_e32 v42, 0
4476 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
4477 ; GFX10-NEXT: s_addk_i32 s32, 0x200
4478 ; GFX10-NEXT: s_getpc_b64 s[34:35]
4479 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i8_ret@rel32@lo+4
4480 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i8_ret@rel32@hi+12
4481 ; GFX10-NEXT: global_load_dword v0, v[41:42], off
4482 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4483 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4484 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4485 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0
4486 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4487 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
4488 ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1
4489 ; GFX10-NEXT: v_mov_b32_e32 v3, 2
4490 ; GFX10-NEXT: v_mov_b32_e32 v4, 0
4491 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
4492 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
4493 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4494 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
4495 ; GFX10-NEXT: global_store_byte v[3:4], v2, off
4496 ; GFX10-NEXT: global_store_short v[41:42], v0, off
4497 ; GFX10-NEXT: s_clause 0x1
4498 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33
4499 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
4500 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4501 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4502 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4503 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4504 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
4505 ; GFX10-NEXT: s_mov_b32 s33, s34
4506 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4507 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4509 ; GFX11-LABEL: test_call_external_void_func_v3i8_ret:
4511 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4512 ; GFX11-NEXT: s_mov_b32 s0, s33
4513 ; GFX11-NEXT: s_mov_b32 s33, s32
4514 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4515 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill
4516 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4517 ; GFX11-NEXT: s_clause 0x1
4518 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
4519 ; GFX11-NEXT: scratch_store_b32 off, v42, s33
4520 ; GFX11-NEXT: v_mov_b32_e32 v41, 0
4521 ; GFX11-NEXT: v_mov_b32_e32 v42, 0
4522 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
4523 ; GFX11-NEXT: s_add_i32 s32, s32, 16
4524 ; GFX11-NEXT: s_getpc_b64 s[0:1]
4525 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i8_ret@rel32@lo+4
4526 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i8_ret@rel32@hi+12
4527 ; GFX11-NEXT: global_load_b32 v0, v[41:42], off
4528 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
4529 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
4530 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4531 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0
4532 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4533 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
4534 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
4535 ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v1
4536 ; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v0
4537 ; GFX11-NEXT: v_mov_b32_e32 v0, 2
4538 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
4539 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
4540 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
4541 ; GFX11-NEXT: v_or_b32_e32 v3, v4, v3
4542 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
4543 ; GFX11-NEXT: s_clause 0x1
4544 ; GFX11-NEXT: global_store_b8 v[0:1], v2, off
4545 ; GFX11-NEXT: global_store_b16 v[41:42], v3, off
4546 ; GFX11-NEXT: s_clause 0x1
4547 ; GFX11-NEXT: scratch_load_b32 v42, off, s33
4548 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
4549 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4550 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload
4551 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4552 ; GFX11-NEXT: s_add_i32 s32, s32, -16
4553 ; GFX11-NEXT: s_mov_b32 s33, s0
4554 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4555 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4557 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i8_ret:
4558 ; GFX10-SCRATCH: ; %bb.0:
4559 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4560 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
4561 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4562 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4563 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill
4564 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4565 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4566 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill
4567 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 ; 4-byte Folded Spill
4568 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0
4569 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0
4570 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
4571 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
4572 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
4573 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i8_ret@rel32@lo+4
4574 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i8_ret@rel32@hi+12
4575 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[41:42], off
4576 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4577 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4578 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4579 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0
4580 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4581 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4582 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1
4583 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 2
4584 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0
4585 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
4586 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
4587 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4588 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
4589 ; GFX10-SCRATCH-NEXT: global_store_byte v[3:4], v2, off
4590 ; GFX10-SCRATCH-NEXT: global_store_short v[41:42], v0, off
4591 ; GFX10-SCRATCH-NEXT: s_clause 0x1
4592 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33
4593 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4
4594 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4595 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload
4596 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4597 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4598 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
4599 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
4600 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4601 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
4602 %val = load <3 x i8>, ptr addrspace(1) null
4603 %tmp = call amdgpu_gfx <3 x i8> @external_void_func_v3i8_ret(<3 x i8> %val)
4604 store <3 x i8> %tmp, ptr addrspace(1) null
4609 define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 {
4610 ; GFX9-LABEL: test_call_external_void_func_v4i8_ret:
4612 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4613 ; GFX9-NEXT: s_mov_b32 s34, s33
4614 ; GFX9-NEXT: s_mov_b32 s33, s32
4615 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4616 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4617 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4618 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4619 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4620 ; GFX9-NEXT: v_mov_b32_e32 v41, 0
4621 ; GFX9-NEXT: v_mov_b32_e32 v42, 0
4622 ; GFX9-NEXT: global_load_dword v0, v[41:42], off
4623 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
4624 ; GFX9-NEXT: s_addk_i32 s32, 0x400
4625 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
4626 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
4627 ; GFX9-NEXT: s_getpc_b64 s[34:35]
4628 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i8_ret@rel32@lo+4
4629 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i8_ret@rel32@hi+12
4630 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4631 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4632 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0
4633 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0
4634 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
4635 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
4636 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4637 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v3
4638 ; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4639 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4640 ; GFX9-NEXT: global_store_dword v[41:42], v0, off
4641 ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
4642 ; GFX9-NEXT: s_nop 0
4643 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
4644 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
4645 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
4646 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
4647 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4648 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4649 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4650 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
4651 ; GFX9-NEXT: s_mov_b32 s33, s34
4652 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4653 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4655 ; GFX10-LABEL: test_call_external_void_func_v4i8_ret:
4657 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4658 ; GFX10-NEXT: s_mov_b32 s34, s33
4659 ; GFX10-NEXT: s_mov_b32 s33, s32
4660 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4661 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4662 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4663 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4664 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4665 ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4666 ; GFX10-NEXT: v_mov_b32_e32 v41, 0
4667 ; GFX10-NEXT: v_mov_b32_e32 v42, 0
4668 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
4669 ; GFX10-NEXT: s_addk_i32 s32, 0x200
4670 ; GFX10-NEXT: s_getpc_b64 s[34:35]
4671 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i8_ret@rel32@lo+4
4672 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i8_ret@rel32@hi+12
4673 ; GFX10-NEXT: global_load_dword v0, v[41:42], off
4674 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4675 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4676 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4677 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0
4678 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4679 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0
4680 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
4681 ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1
4682 ; GFX10-NEXT: v_lshlrev_b16 v3, 8, v3
4683 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
4684 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
4685 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
4686 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4687 ; GFX10-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4688 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4689 ; GFX10-NEXT: global_store_dword v[41:42], v0, off
4690 ; GFX10-NEXT: s_clause 0x1
4691 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33
4692 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
4693 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4694 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4695 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4696 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4697 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
4698 ; GFX10-NEXT: s_mov_b32 s33, s34
4699 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4700 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4702 ; GFX11-LABEL: test_call_external_void_func_v4i8_ret:
4704 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4705 ; GFX11-NEXT: s_mov_b32 s0, s33
4706 ; GFX11-NEXT: s_mov_b32 s33, s32
4707 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4708 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill
4709 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4710 ; GFX11-NEXT: s_clause 0x1
4711 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
4712 ; GFX11-NEXT: scratch_store_b32 off, v42, s33
4713 ; GFX11-NEXT: v_mov_b32_e32 v41, 0
4714 ; GFX11-NEXT: v_mov_b32_e32 v42, 0
4715 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
4716 ; GFX11-NEXT: s_add_i32 s32, s32, 16
4717 ; GFX11-NEXT: s_getpc_b64 s[0:1]
4718 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i8_ret@rel32@lo+4
4719 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i8_ret@rel32@hi+12
4720 ; GFX11-NEXT: global_load_b32 v0, v[41:42], off
4721 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
4722 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
4723 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4724 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0
4725 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4726 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0
4727 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
4728 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
4729 ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
4730 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
4731 ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
4732 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
4733 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
4734 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
4735 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
4736 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
4737 ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
4738 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
4739 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
4740 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1
4741 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4742 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
4743 ; GFX11-NEXT: global_store_b32 v[41:42], v0, off
4744 ; GFX11-NEXT: s_clause 0x1
4745 ; GFX11-NEXT: scratch_load_b32 v42, off, s33
4746 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
4747 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4748 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload
4749 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4750 ; GFX11-NEXT: s_add_i32 s32, s32, -16
4751 ; GFX11-NEXT: s_mov_b32 s33, s0
4752 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4753 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4755 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i8_ret:
4756 ; GFX10-SCRATCH: ; %bb.0:
4757 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4758 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
4759 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4760 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4761 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill
4762 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4763 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4764 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill
4765 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 ; 4-byte Folded Spill
4766 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0
4767 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0
4768 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
4769 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
4770 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
4771 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i8_ret@rel32@lo+4
4772 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i8_ret@rel32@hi+12
4773 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[41:42], off
4774 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4775 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4776 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4777 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0
4778 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4779 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v3, 24, v0
4780 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4781 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1
4782 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v3, 8, v3
4783 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
4784 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
4785 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
4786 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4787 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4788 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4789 ; GFX10-SCRATCH-NEXT: global_store_dword v[41:42], v0, off
4790 ; GFX10-SCRATCH-NEXT: s_clause 0x1
4791 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33
4792 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4
4793 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4794 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload
4795 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4796 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4797 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
4798 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
4799 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4800 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
4801 %val = load <4 x i8>, ptr addrspace(1) null
4802 %tmp = call amdgpu_gfx <4 x i8> @external_void_func_v4i8_ret(<4 x i8> %val)
4803 store <4 x i8> %tmp, ptr addrspace(1) null
4808 define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 {
4809 ; GFX9-LABEL: test_call_external_void_func_v5i8_ret:
4811 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4812 ; GFX9-NEXT: s_mov_b32 s34, s33
4813 ; GFX9-NEXT: s_mov_b32 s33, s32
4814 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4815 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4816 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4817 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4818 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4819 ; GFX9-NEXT: v_mov_b32_e32 v41, 0
4820 ; GFX9-NEXT: v_mov_b32_e32 v42, 0
4821 ; GFX9-NEXT: global_load_dwordx2 v[5:6], v[41:42], off
4822 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
4823 ; GFX9-NEXT: s_addk_i32 s32, 0x400
4824 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
4825 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
4826 ; GFX9-NEXT: s_getpc_b64 s[34:35]
4827 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5i8_ret@rel32@lo+4
4828 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5i8_ret@rel32@hi+12
4829 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4830 ; GFX9-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6]
4831 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v5
4832 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v5
4833 ; GFX9-NEXT: v_mov_b32_e32 v0, v5
4834 ; GFX9-NEXT: v_mov_b32_e32 v4, v6
4835 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
4836 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
4837 ; GFX9-NEXT: v_or_b32_sdwa v5, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4838 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v3
4839 ; GFX9-NEXT: v_or_b32_sdwa v2, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4840 ; GFX9-NEXT: v_mov_b32_e32 v0, 4
4841 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
4842 ; GFX9-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4843 ; GFX9-NEXT: global_store_byte v[0:1], v4, off
4844 ; GFX9-NEXT: global_store_dword v[41:42], v2, off
4845 ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
4846 ; GFX9-NEXT: s_nop 0
4847 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
4848 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
4849 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
4850 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
4851 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
4852 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4853 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
4854 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
4855 ; GFX9-NEXT: s_mov_b32 s33, s34
4856 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4857 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4859 ; GFX10-LABEL: test_call_external_void_func_v5i8_ret:
4861 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4862 ; GFX10-NEXT: s_mov_b32 s34, s33
4863 ; GFX10-NEXT: s_mov_b32 s33, s32
4864 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4865 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
4866 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4867 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4868 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4869 ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4870 ; GFX10-NEXT: v_mov_b32_e32 v41, 0
4871 ; GFX10-NEXT: v_mov_b32_e32 v42, 0
4872 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
4873 ; GFX10-NEXT: s_addk_i32 s32, 0x200
4874 ; GFX10-NEXT: s_getpc_b64 s[34:35]
4875 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5i8_ret@rel32@lo+4
4876 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5i8_ret@rel32@hi+12
4877 ; GFX10-NEXT: global_load_dwordx2 v[5:6], v[41:42], off
4878 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4879 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4880 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4881 ; GFX10-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6]
4882 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v5
4883 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v5
4884 ; GFX10-NEXT: v_mov_b32_e32 v0, v5
4885 ; GFX10-NEXT: v_mov_b32_e32 v4, v6
4886 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
4887 ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1
4888 ; GFX10-NEXT: v_lshlrev_b16 v3, 8, v3
4889 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
4890 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
4891 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
4892 ; GFX10-NEXT: v_or_b32_sdwa v5, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4893 ; GFX10-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4894 ; GFX10-NEXT: v_mov_b32_e32 v0, 4
4895 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
4896 ; GFX10-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4897 ; GFX10-NEXT: global_store_byte v[0:1], v4, off
4898 ; GFX10-NEXT: global_store_dword v[41:42], v2, off
4899 ; GFX10-NEXT: s_clause 0x1
4900 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33
4901 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
4902 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
4903 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
4904 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4905 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
4906 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
4907 ; GFX10-NEXT: s_mov_b32 s33, s34
4908 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4909 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4911 ; GFX11-LABEL: test_call_external_void_func_v5i8_ret:
4913 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4914 ; GFX11-NEXT: s_mov_b32 s0, s33
4915 ; GFX11-NEXT: s_mov_b32 s33, s32
4916 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4917 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill
4918 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4919 ; GFX11-NEXT: s_clause 0x1
4920 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
4921 ; GFX11-NEXT: scratch_store_b32 off, v42, s33
4922 ; GFX11-NEXT: v_mov_b32_e32 v41, 0
4923 ; GFX11-NEXT: v_mov_b32_e32 v42, 0
4924 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
4925 ; GFX11-NEXT: s_add_i32 s32, s32, 16
4926 ; GFX11-NEXT: s_getpc_b64 s[0:1]
4927 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v5i8_ret@rel32@lo+4
4928 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v5i8_ret@rel32@hi+12
4929 ; GFX11-NEXT: global_load_b64 v[5:6], v[41:42], off
4930 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
4931 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
4932 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4933 ; GFX11-NEXT: v_mov_b32_e32 v0, v5
4934 ; GFX11-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6]
4935 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v5
4936 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v5
4937 ; GFX11-NEXT: v_mov_b32_e32 v4, v6
4938 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
4939 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
4940 ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
4941 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
4942 ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
4943 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
4944 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
4945 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
4946 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
4947 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
4948 ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
4949 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
4950 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v0
4951 ; GFX11-NEXT: v_dual_mov_b32 v0, 4 :: v_dual_lshlrev_b32 v3, 16, v1
4952 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
4953 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
4954 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v3
4955 ; GFX11-NEXT: s_clause 0x1
4956 ; GFX11-NEXT: global_store_b8 v[0:1], v4, off
4957 ; GFX11-NEXT: global_store_b32 v[41:42], v2, off
4958 ; GFX11-NEXT: s_clause 0x1
4959 ; GFX11-NEXT: scratch_load_b32 v42, off, s33
4960 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
4961 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
4962 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload
4963 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
4964 ; GFX11-NEXT: s_add_i32 s32, s32, -16
4965 ; GFX11-NEXT: s_mov_b32 s33, s0
4966 ; GFX11-NEXT: s_waitcnt vmcnt(0)
4967 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4969 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i8_ret:
4970 ; GFX10-SCRATCH: ; %bb.0:
4971 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4972 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
4973 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4974 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
4975 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill
4976 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4977 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
4978 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill
4979 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 ; 4-byte Folded Spill
4980 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0
4981 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0
4982 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
4983 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
4984 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
4985 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5i8_ret@rel32@lo+4
4986 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i8_ret@rel32@hi+12
4987 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[5:6], v[41:42], off
4988 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4989 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4990 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4991 ; GFX10-SCRATCH-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6]
4992 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v5
4993 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v5
4994 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, v5
4995 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v6
4996 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4997 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1
4998 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v3, 8, v3
4999 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
5000 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
5001 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
5002 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v5, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5003 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5004 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 4
5005 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
5006 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5007 ; GFX10-SCRATCH-NEXT: global_store_byte v[0:1], v4, off
5008 ; GFX10-SCRATCH-NEXT: global_store_dword v[41:42], v2, off
5009 ; GFX10-SCRATCH-NEXT: s_clause 0x1
5010 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33
5011 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4
5012 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
5013 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload
5014 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5015 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
5016 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
5017 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
5018 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5019 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
5020 %val = load <5 x i8>, ptr addrspace(1) null
5021 %tmp = call amdgpu_gfx <5 x i8> @external_void_func_v5i8_ret(<5 x i8> %val)
5022 store <5 x i8> %tmp, ptr addrspace(1) null
5027 define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 {
5028 ; GFX9-LABEL: test_call_external_void_func_v8i8_ret:
5030 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5031 ; GFX9-NEXT: s_mov_b32 s34, s33
5032 ; GFX9-NEXT: s_mov_b32 s33, s32
5033 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
5034 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
5035 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
5036 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
5037 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
5038 ; GFX9-NEXT: v_mov_b32_e32 v41, 0
5039 ; GFX9-NEXT: v_mov_b32_e32 v42, 0
5040 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[41:42], off
5041 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
5042 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5043 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5044 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5045 ; GFX9-NEXT: s_getpc_b64 s[34:35]
5046 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i8_ret@rel32@lo+4
5047 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i8_ret@rel32@hi+12
5048 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5049 ; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v0
5050 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
5051 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0
5052 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5053 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5054 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5055 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
5056 ; GFX9-NEXT: v_mov_b32_e32 v1, v8
5057 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
5058 ; GFX9-NEXT: v_lshlrev_b16_e32 v5, 8, v5
5059 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
5060 ; GFX9-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5061 ; GFX9-NEXT: v_lshlrev_b16_e32 v5, 8, v7
5062 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5063 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v3
5064 ; GFX9-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5065 ; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5066 ; GFX9-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5067 ; GFX9-NEXT: v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5068 ; GFX9-NEXT: global_store_dwordx2 v[41:42], v[3:4], off
5069 ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
5070 ; GFX9-NEXT: s_nop 0
5071 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
5072 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
5073 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
5074 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
5075 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
5076 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
5077 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
5078 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
5079 ; GFX9-NEXT: s_mov_b32 s33, s34
5080 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5081 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5083 ; GFX10-LABEL: test_call_external_void_func_v8i8_ret:
5085 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5086 ; GFX10-NEXT: s_mov_b32 s34, s33
5087 ; GFX10-NEXT: s_mov_b32 s33, s32
5088 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
5089 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
5090 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5091 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
5092 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
5093 ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
5094 ; GFX10-NEXT: v_mov_b32_e32 v41, 0
5095 ; GFX10-NEXT: v_mov_b32_e32 v42, 0
5096 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
5097 ; GFX10-NEXT: s_addk_i32 s32, 0x200
5098 ; GFX10-NEXT: s_getpc_b64 s[34:35]
5099 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i8_ret@rel32@lo+4
5100 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i8_ret@rel32@hi+12
5101 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[41:42], off
5102 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5103 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5104 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5105 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v0
5106 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
5107 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0
5108 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5109 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5110 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5111 ; GFX10-NEXT: v_mov_b32_e32 v4, v1
5112 ; GFX10-NEXT: v_mov_b32_e32 v1, v8
5113 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
5114 ; GFX10-NEXT: v_lshlrev_b16 v5, 8, v5
5115 ; GFX10-NEXT: v_lshlrev_b16 v7, 8, v7
5116 ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1
5117 ; GFX10-NEXT: v_lshlrev_b16 v3, 8, v3
5118 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
5119 ; GFX10-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5120 ; GFX10-NEXT: v_or_b32_sdwa v5, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5121 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5122 ; GFX10-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5123 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
5124 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
5125 ; GFX10-NEXT: v_or_b32_sdwa v1, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5126 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5127 ; GFX10-NEXT: global_store_dwordx2 v[41:42], v[0:1], off
5128 ; GFX10-NEXT: s_clause 0x1
5129 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33
5130 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
5131 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
5132 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
5133 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5134 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
5135 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
5136 ; GFX10-NEXT: s_mov_b32 s33, s34
5137 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5138 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5140 ; GFX11-LABEL: test_call_external_void_func_v8i8_ret:
5142 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5143 ; GFX11-NEXT: s_mov_b32 s0, s33
5144 ; GFX11-NEXT: s_mov_b32 s33, s32
5145 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
5146 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill
5147 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
5148 ; GFX11-NEXT: s_clause 0x1
5149 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
5150 ; GFX11-NEXT: scratch_store_b32 off, v42, s33
5151 ; GFX11-NEXT: v_mov_b32_e32 v41, 0
5152 ; GFX11-NEXT: v_mov_b32_e32 v42, 0
5153 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
5154 ; GFX11-NEXT: s_add_i32 s32, s32, 16
5155 ; GFX11-NEXT: s_getpc_b64 s[0:1]
5156 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v8i8_ret@rel32@lo+4
5157 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v8i8_ret@rel32@hi+12
5158 ; GFX11-NEXT: global_load_b64 v[0:1], v[41:42], off
5159 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
5160 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
5161 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5162 ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 8, v0
5163 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
5164 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0
5165 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5166 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5167 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5168 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v8
5169 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
5170 ; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5
5171 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
5172 ; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4
5173 ; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7
5174 ; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6
5175 ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
5176 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
5177 ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
5178 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
5179 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v5
5180 ; GFX11-NEXT: v_or_b32_e32 v5, v6, v7
5181 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
5182 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
5183 ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
5184 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v4
5185 ; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v5
5186 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
5187 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
5188 ; GFX11-NEXT: v_lshlrev_b32_e32 v4, 16, v1
5189 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
5190 ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
5191 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
5192 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v4
5193 ; GFX11-NEXT: global_store_b64 v[41:42], v[0:1], off
5194 ; GFX11-NEXT: s_clause 0x1
5195 ; GFX11-NEXT: scratch_load_b32 v42, off, s33
5196 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
5197 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
5198 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload
5199 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
5200 ; GFX11-NEXT: s_add_i32 s32, s32, -16
5201 ; GFX11-NEXT: s_mov_b32 s33, s0
5202 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5203 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5205 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i8_ret:
5206 ; GFX10-SCRATCH: ; %bb.0:
5207 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5208 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
5209 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5210 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
5211 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill
5212 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5213 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
5214 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill
5215 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 ; 4-byte Folded Spill
5216 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0
5217 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0
5218 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
5219 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
5220 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
5221 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i8_ret@rel32@lo+4
5222 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i8_ret@rel32@hi+12
5223 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[41:42], off
5224 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5225 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5226 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5227 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v8, 8, v0
5228 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0
5229 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v3, 24, v0
5230 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5231 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5232 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5233 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v1
5234 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, v8
5235 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5236 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v5, 8, v5
5237 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v7, 8, v7
5238 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1
5239 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v3, 8, v3
5240 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
5241 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5242 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v5, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5243 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5244 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5245 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
5246 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
5247 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v1, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5248 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5249 ; GFX10-SCRATCH-NEXT: global_store_dwordx2 v[41:42], v[0:1], off
5250 ; GFX10-SCRATCH-NEXT: s_clause 0x1
5251 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33
5252 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4
5253 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
5254 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload
5255 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5256 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
5257 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
5258 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
5259 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5260 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
5261 %val = load <8 x i8>, ptr addrspace(1) null
5262 %tmp = call amdgpu_gfx <8 x i8> @external_void_func_v8i8_ret(<8 x i8> %val)
5263 store <8 x i8> %tmp, ptr addrspace(1) null
5268 define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 {
5269 ; GFX9-LABEL: test_call_external_void_func_v32i8_ret:
5271 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5272 ; GFX9-NEXT: s_mov_b32 s34, s33
5273 ; GFX9-NEXT: s_mov_b32 s33, s32
5274 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
5275 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
5276 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
5277 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
5278 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
5279 ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
5280 ; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill
5281 ; GFX9-NEXT: v_mov_b32_e32 v41, 0
5282 ; GFX9-NEXT: v_mov_b32_e32 v43, 16
5283 ; GFX9-NEXT: v_mov_b32_e32 v42, 0
5284 ; GFX9-NEXT: v_mov_b32_e32 v44, 0
5285 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[41:42], off
5286 ; GFX9-NEXT: global_load_dwordx4 v[16:19], v[43:44], off
5287 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
5288 ; GFX9-NEXT: s_addk_i32 s32, 0x800
5289 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5290 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5291 ; GFX9-NEXT: s_getpc_b64 s[34:35]
5292 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i8_ret@rel32@lo+4
5293 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i8_ret@rel32@hi+12
5294 ; GFX9-NEXT: s_waitcnt vmcnt(1)
5295 ; GFX9-NEXT: v_lshrrev_b32_e32 v35, 8, v0
5296 ; GFX9-NEXT: v_lshrrev_b32_e32 v36, 16, v0
5297 ; GFX9-NEXT: v_lshrrev_b32_e32 v37, 24, v0
5298 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5299 ; GFX9-NEXT: v_lshrrev_b32_e32 v32, 8, v16
5300 ; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v16
5301 ; GFX9-NEXT: v_lshrrev_b32_e32 v34, 24, v16
5302 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5303 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5304 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5305 ; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v2
5306 ; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v2
5307 ; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2
5308 ; GFX9-NEXT: v_lshrrev_b32_e32 v13, 8, v3
5309 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v3
5310 ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 24, v3
5311 ; GFX9-NEXT: v_lshrrev_b32_e32 v21, 8, v17
5312 ; GFX9-NEXT: v_lshrrev_b32_e32 v22, 16, v17
5313 ; GFX9-NEXT: v_lshrrev_b32_e32 v23, 24, v17
5314 ; GFX9-NEXT: v_lshrrev_b32_e32 v25, 8, v18
5315 ; GFX9-NEXT: v_lshrrev_b32_e32 v26, 16, v18
5316 ; GFX9-NEXT: v_lshrrev_b32_e32 v27, 24, v18
5317 ; GFX9-NEXT: v_lshrrev_b32_e32 v29, 8, v19
5318 ; GFX9-NEXT: v_lshrrev_b32_e32 v30, 16, v19
5319 ; GFX9-NEXT: v_lshrrev_b32_e32 v31, 24, v19
5320 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
5321 ; GFX9-NEXT: v_mov_b32_e32 v8, v2
5322 ; GFX9-NEXT: v_mov_b32_e32 v12, v3
5323 ; GFX9-NEXT: v_mov_b32_e32 v20, v17
5324 ; GFX9-NEXT: v_mov_b32_e32 v24, v18
5325 ; GFX9-NEXT: v_mov_b32_e32 v28, v19
5326 ; GFX9-NEXT: v_mov_b32_e32 v1, v35
5327 ; GFX9-NEXT: v_mov_b32_e32 v2, v36
5328 ; GFX9-NEXT: v_mov_b32_e32 v3, v37
5329 ; GFX9-NEXT: v_mov_b32_e32 v17, v32
5330 ; GFX9-NEXT: v_mov_b32_e32 v18, v33
5331 ; GFX9-NEXT: v_mov_b32_e32 v19, v34
5332 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
5333 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
5334 ; GFX9-NEXT: v_lshlrev_b16_e32 v5, 8, v5
5335 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5336 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v3
5337 ; GFX9-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5338 ; GFX9-NEXT: v_lshlrev_b16_e32 v5, 8, v7
5339 ; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5340 ; GFX9-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5341 ; GFX9-NEXT: v_or_b32_sdwa v6, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5342 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v29
5343 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v31
5344 ; GFX9-NEXT: v_or_b32_sdwa v0, v28, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5345 ; GFX9-NEXT: v_or_b32_sdwa v1, v30, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5346 ; GFX9-NEXT: v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5347 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v25
5348 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v27
5349 ; GFX9-NEXT: v_or_b32_sdwa v0, v24, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5350 ; GFX9-NEXT: v_or_b32_sdwa v1, v26, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5351 ; GFX9-NEXT: v_or_b32_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5352 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v21
5353 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v23
5354 ; GFX9-NEXT: v_or_b32_sdwa v0, v20, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5355 ; GFX9-NEXT: v_or_b32_sdwa v1, v22, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5356 ; GFX9-NEXT: v_lshlrev_b16_e32 v13, 8, v13
5357 ; GFX9-NEXT: v_lshlrev_b16_e32 v9, 8, v9
5358 ; GFX9-NEXT: v_or_b32_sdwa v7, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5359 ; GFX9-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5360 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v17
5361 ; GFX9-NEXT: v_lshlrev_b16_e32 v4, 8, v19
5362 ; GFX9-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5363 ; GFX9-NEXT: v_lshlrev_b16_e32 v13, 8, v15
5364 ; GFX9-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5365 ; GFX9-NEXT: v_lshlrev_b16_e32 v9, 8, v11
5366 ; GFX9-NEXT: v_or_b32_sdwa v0, v16, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5367 ; GFX9-NEXT: v_or_b32_sdwa v4, v18, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5368 ; GFX9-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5369 ; GFX9-NEXT: v_or_b32_sdwa v10, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5370 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5371 ; GFX9-NEXT: v_or_b32_sdwa v9, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5372 ; GFX9-NEXT: v_or_b32_sdwa v8, v8, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5373 ; GFX9-NEXT: global_store_dwordx4 v[43:44], v[0:3], off
5374 ; GFX9-NEXT: global_store_dwordx4 v[41:42], v[6:9], off
5375 ; GFX9-NEXT: buffer_load_dword v44, off, s[0:3], s33 ; 4-byte Folded Reload
5376 ; GFX9-NEXT: s_nop 0
5377 ; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
5378 ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
5379 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
5380 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
5381 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
5382 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
5383 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
5384 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
5385 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
5386 ; GFX9-NEXT: s_addk_i32 s32, 0xf800
5387 ; GFX9-NEXT: s_mov_b32 s33, s34
5388 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5389 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5391 ; GFX10-LABEL: test_call_external_void_func_v32i8_ret:
5393 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5394 ; GFX10-NEXT: s_mov_b32 s34, s33
5395 ; GFX10-NEXT: s_mov_b32 s33, s32
5396 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
5397 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
5398 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5399 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
5400 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
5401 ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
5402 ; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
5403 ; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill
5404 ; GFX10-NEXT: v_mov_b32_e32 v41, 0
5405 ; GFX10-NEXT: v_mov_b32_e32 v43, 16
5406 ; GFX10-NEXT: v_mov_b32_e32 v42, 0
5407 ; GFX10-NEXT: v_mov_b32_e32 v44, 0
5408 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
5409 ; GFX10-NEXT: s_addk_i32 s32, 0x400
5410 ; GFX10-NEXT: s_getpc_b64 s[34:35]
5411 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i8_ret@rel32@lo+4
5412 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i8_ret@rel32@hi+12
5413 ; GFX10-NEXT: s_clause 0x1
5414 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[41:42], off
5415 ; GFX10-NEXT: global_load_dwordx4 v[16:19], v[43:44], off
5416 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5417 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5418 ; GFX10-NEXT: s_waitcnt vmcnt(1)
5419 ; GFX10-NEXT: v_lshrrev_b32_e32 v35, 8, v0
5420 ; GFX10-NEXT: v_lshrrev_b32_e32 v36, 16, v0
5421 ; GFX10-NEXT: v_lshrrev_b32_e32 v37, 24, v0
5422 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5423 ; GFX10-NEXT: v_lshrrev_b32_e32 v32, 8, v16
5424 ; GFX10-NEXT: v_lshrrev_b32_e32 v33, 16, v16
5425 ; GFX10-NEXT: v_lshrrev_b32_e32 v34, 24, v16
5426 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5427 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5428 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5429 ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 8, v2
5430 ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2
5431 ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v2
5432 ; GFX10-NEXT: v_lshrrev_b32_e32 v13, 8, v3
5433 ; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v3
5434 ; GFX10-NEXT: v_lshrrev_b32_e32 v15, 24, v3
5435 ; GFX10-NEXT: v_lshrrev_b32_e32 v21, 8, v17
5436 ; GFX10-NEXT: v_lshrrev_b32_e32 v22, 16, v17
5437 ; GFX10-NEXT: v_lshrrev_b32_e32 v23, 24, v17
5438 ; GFX10-NEXT: v_lshrrev_b32_e32 v25, 8, v18
5439 ; GFX10-NEXT: v_lshrrev_b32_e32 v26, 16, v18
5440 ; GFX10-NEXT: v_lshrrev_b32_e32 v27, 24, v18
5441 ; GFX10-NEXT: v_lshrrev_b32_e32 v29, 8, v19
5442 ; GFX10-NEXT: v_lshrrev_b32_e32 v30, 16, v19
5443 ; GFX10-NEXT: v_lshrrev_b32_e32 v31, 24, v19
5444 ; GFX10-NEXT: v_mov_b32_e32 v4, v1
5445 ; GFX10-NEXT: v_mov_b32_e32 v8, v2
5446 ; GFX10-NEXT: v_mov_b32_e32 v12, v3
5447 ; GFX10-NEXT: v_mov_b32_e32 v20, v17
5448 ; GFX10-NEXT: v_mov_b32_e32 v24, v18
5449 ; GFX10-NEXT: v_mov_b32_e32 v28, v19
5450 ; GFX10-NEXT: v_mov_b32_e32 v1, v35
5451 ; GFX10-NEXT: v_mov_b32_e32 v2, v36
5452 ; GFX10-NEXT: v_mov_b32_e32 v3, v37
5453 ; GFX10-NEXT: v_mov_b32_e32 v17, v32
5454 ; GFX10-NEXT: v_mov_b32_e32 v18, v33
5455 ; GFX10-NEXT: v_mov_b32_e32 v19, v34
5456 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
5457 ; GFX10-NEXT: v_lshlrev_b16 v13, 8, v13
5458 ; GFX10-NEXT: v_lshlrev_b16 v9, 8, v9
5459 ; GFX10-NEXT: v_lshlrev_b16 v11, 8, v11
5460 ; GFX10-NEXT: v_lshlrev_b16 v5, 8, v5
5461 ; GFX10-NEXT: v_lshlrev_b16 v7, 8, v7
5462 ; GFX10-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5463 ; GFX10-NEXT: v_lshlrev_b16 v13, 8, v15
5464 ; GFX10-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5465 ; GFX10-NEXT: v_or_b32_sdwa v9, v10, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5466 ; GFX10-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5467 ; GFX10-NEXT: v_or_b32_sdwa v7, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5468 ; GFX10-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5469 ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1
5470 ; GFX10-NEXT: v_or_b32_sdwa v5, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5471 ; GFX10-NEXT: v_lshlrev_b16 v8, 8, v31
5472 ; GFX10-NEXT: v_or_b32_sdwa v4, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5473 ; GFX10-NEXT: v_lshlrev_b16 v7, 8, v29
5474 ; GFX10-NEXT: v_lshlrev_b16 v9, 8, v25
5475 ; GFX10-NEXT: v_or_b32_sdwa v6, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5476 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5477 ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v3
5478 ; GFX10-NEXT: v_or_b32_sdwa v3, v28, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5479 ; GFX10-NEXT: v_or_b32_sdwa v7, v30, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5480 ; GFX10-NEXT: v_or_b32_sdwa v8, v24, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5481 ; GFX10-NEXT: v_lshlrev_b16 v9, 8, v27
5482 ; GFX10-NEXT: v_lshlrev_b16 v10, 8, v21
5483 ; GFX10-NEXT: v_lshlrev_b16 v11, 8, v23
5484 ; GFX10-NEXT: v_lshlrev_b16 v12, 8, v17
5485 ; GFX10-NEXT: v_lshlrev_b16 v13, 8, v19
5486 ; GFX10-NEXT: v_or_b32_sdwa v9, v26, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5487 ; GFX10-NEXT: v_or_b32_sdwa v14, v20, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5488 ; GFX10-NEXT: v_or_b32_sdwa v11, v22, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5489 ; GFX10-NEXT: v_or_b32_sdwa v12, v16, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5490 ; GFX10-NEXT: v_or_b32_sdwa v13, v18, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5491 ; GFX10-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5492 ; GFX10-NEXT: v_or_b32_sdwa v10, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5493 ; GFX10-NEXT: v_or_b32_sdwa v9, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5494 ; GFX10-NEXT: v_or_b32_sdwa v8, v14, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5495 ; GFX10-NEXT: v_or_b32_sdwa v7, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5496 ; GFX10-NEXT: v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5497 ; GFX10-NEXT: global_store_dwordx4 v[43:44], v[7:10], off
5498 ; GFX10-NEXT: global_store_dwordx4 v[41:42], v[3:6], off
5499 ; GFX10-NEXT: s_clause 0x3
5500 ; GFX10-NEXT: buffer_load_dword v44, off, s[0:3], s33
5501 ; GFX10-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:4
5502 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8
5503 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12
5504 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
5505 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
5506 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
5507 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
5508 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
5509 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5510 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
5511 ; GFX10-NEXT: s_addk_i32 s32, 0xfc00
5512 ; GFX10-NEXT: s_mov_b32 s33, s34
5513 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5514 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5516 ; GFX11-LABEL: test_call_external_void_func_v32i8_ret:
5518 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5519 ; GFX11-NEXT: s_mov_b32 s0, s33
5520 ; GFX11-NEXT: s_mov_b32 s33, s32
5521 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
5522 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:16 ; 4-byte Folded Spill
5523 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
5524 ; GFX11-NEXT: s_clause 0x3
5525 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:12
5526 ; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:8
5527 ; GFX11-NEXT: scratch_store_b32 off, v43, s33 offset:4
5528 ; GFX11-NEXT: scratch_store_b32 off, v44, s33
5529 ; GFX11-NEXT: v_mov_b32_e32 v41, 0
5530 ; GFX11-NEXT: v_dual_mov_b32 v42, 0 :: v_dual_mov_b32 v43, 16
5531 ; GFX11-NEXT: v_mov_b32_e32 v44, 0
5532 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
5533 ; GFX11-NEXT: s_add_i32 s32, s32, 32
5534 ; GFX11-NEXT: global_load_b128 v[0:3], v[41:42], off
5535 ; GFX11-NEXT: s_getpc_b64 s[0:1]
5536 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i8_ret@rel32@lo+4
5537 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i8_ret@rel32@hi+12
5538 ; GFX11-NEXT: global_load_b128 v[16:19], v[43:44], off
5539 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
5540 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
5541 ; GFX11-NEXT: s_waitcnt vmcnt(1)
5542 ; GFX11-NEXT: v_lshrrev_b32_e32 v35, 8, v0
5543 ; GFX11-NEXT: v_lshrrev_b32_e32 v36, 16, v0
5544 ; GFX11-NEXT: v_lshrrev_b32_e32 v37, 24, v0
5545 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5546 ; GFX11-NEXT: v_lshrrev_b32_e32 v32, 8, v16
5547 ; GFX11-NEXT: v_lshrrev_b32_e32 v33, 16, v16
5548 ; GFX11-NEXT: v_lshrrev_b32_e32 v34, 24, v16
5549 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5550 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5551 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5552 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 8, v2
5553 ; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2
5554 ; GFX11-NEXT: v_lshrrev_b32_e32 v11, 24, v2
5555 ; GFX11-NEXT: v_lshrrev_b32_e32 v13, 8, v3
5556 ; GFX11-NEXT: v_lshrrev_b32_e32 v14, 16, v3
5557 ; GFX11-NEXT: v_lshrrev_b32_e32 v15, 24, v3
5558 ; GFX11-NEXT: v_lshrrev_b32_e32 v21, 8, v17
5559 ; GFX11-NEXT: v_lshrrev_b32_e32 v22, 16, v17
5560 ; GFX11-NEXT: v_lshrrev_b32_e32 v23, 24, v17
5561 ; GFX11-NEXT: v_lshrrev_b32_e32 v25, 8, v18
5562 ; GFX11-NEXT: v_lshrrev_b32_e32 v26, 16, v18
5563 ; GFX11-NEXT: v_lshrrev_b32_e32 v27, 24, v18
5564 ; GFX11-NEXT: v_lshrrev_b32_e32 v29, 8, v19
5565 ; GFX11-NEXT: v_lshrrev_b32_e32 v30, 16, v19
5566 ; GFX11-NEXT: v_lshrrev_b32_e32 v31, 24, v19
5567 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v35
5568 ; GFX11-NEXT: v_mov_b32_e32 v8, v2
5569 ; GFX11-NEXT: v_mov_b32_e32 v12, v3
5570 ; GFX11-NEXT: v_mov_b32_e32 v20, v17
5571 ; GFX11-NEXT: v_mov_b32_e32 v24, v18
5572 ; GFX11-NEXT: v_dual_mov_b32 v28, v19 :: v_dual_mov_b32 v19, v34
5573 ; GFX11-NEXT: v_dual_mov_b32 v2, v36 :: v_dual_mov_b32 v3, v37
5574 ; GFX11-NEXT: v_dual_mov_b32 v17, v32 :: v_dual_mov_b32 v18, v33
5575 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
5576 ; GFX11-NEXT: v_lshlrev_b16 v9, 8, v9
5577 ; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8
5578 ; GFX11-NEXT: v_lshlrev_b16 v11, 8, v11
5579 ; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10
5580 ; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5
5581 ; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4
5582 ; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7
5583 ; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6
5584 ; GFX11-NEXT: v_lshlrev_b16 v13, 8, v13
5585 ; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12
5586 ; GFX11-NEXT: v_or_b32_e32 v8, v8, v9
5587 ; GFX11-NEXT: v_or_b32_e32 v9, v10, v11
5588 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v5
5589 ; GFX11-NEXT: v_or_b32_e32 v5, v6, v7
5590 ; GFX11-NEXT: v_or_b32_e32 v12, v12, v13
5591 ; GFX11-NEXT: v_lshlrev_b16 v13, 8, v15
5592 ; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v14
5593 ; GFX11-NEXT: v_and_b32_e32 v7, 0xffff, v8
5594 ; GFX11-NEXT: v_lshlrev_b32_e32 v8, 16, v9
5595 ; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v4
5596 ; GFX11-NEXT: v_lshlrev_b32_e32 v9, 16, v5
5597 ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
5598 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
5599 ; GFX11-NEXT: v_or_b32_e32 v13, v14, v13
5600 ; GFX11-NEXT: v_or_b32_e32 v5, v7, v8
5601 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v9
5602 ; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v28
5603 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
5604 ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v29
5605 ; GFX11-NEXT: v_lshlrev_b16 v8, 8, v31
5606 ; GFX11-NEXT: v_and_b32_e32 v9, 0xff, v30
5607 ; GFX11-NEXT: v_lshlrev_b16 v10, 8, v25
5608 ; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v24
5609 ; GFX11-NEXT: v_and_b32_e32 v12, 0xffff, v12
5610 ; GFX11-NEXT: v_lshlrev_b32_e32 v6, 16, v13
5611 ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
5612 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
5613 ; GFX11-NEXT: v_or_b32_e32 v1, v7, v1
5614 ; GFX11-NEXT: v_or_b32_e32 v7, v9, v8
5615 ; GFX11-NEXT: v_or_b32_e32 v8, v11, v10
5616 ; GFX11-NEXT: v_or_b32_e32 v6, v12, v6
5617 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v3
5618 ; GFX11-NEXT: v_and_b32_e32 v9, 0xff, v26
5619 ; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v7
5620 ; GFX11-NEXT: v_and_b32_e32 v7, 0xffff, v8
5621 ; GFX11-NEXT: v_lshlrev_b16 v8, 8, v27
5622 ; GFX11-NEXT: v_lshlrev_b16 v10, 8, v21
5623 ; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v20
5624 ; GFX11-NEXT: v_lshlrev_b16 v12, 8, v23
5625 ; GFX11-NEXT: v_and_b32_e32 v13, 0xff, v22
5626 ; GFX11-NEXT: v_lshlrev_b16 v14, 8, v17
5627 ; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v16
5628 ; GFX11-NEXT: v_lshlrev_b16 v16, 8, v19
5629 ; GFX11-NEXT: v_and_b32_e32 v17, 0xff, v18
5630 ; GFX11-NEXT: v_or_b32_e32 v8, v9, v8
5631 ; GFX11-NEXT: v_or_b32_e32 v9, v11, v10
5632 ; GFX11-NEXT: v_or_b32_e32 v10, v13, v12
5633 ; GFX11-NEXT: v_or_b32_e32 v11, v15, v14
5634 ; GFX11-NEXT: v_or_b32_e32 v12, v17, v16
5635 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
5636 ; GFX11-NEXT: v_lshlrev_b32_e32 v8, 16, v8
5637 ; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v9
5638 ; GFX11-NEXT: v_lshlrev_b32_e32 v14, 16, v10
5639 ; GFX11-NEXT: v_and_b32_e32 v11, 0xffff, v11
5640 ; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v12
5641 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
5642 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2
5643 ; GFX11-NEXT: v_or_b32_e32 v10, v1, v3
5644 ; GFX11-NEXT: v_or_b32_e32 v9, v7, v8
5645 ; GFX11-NEXT: v_or_b32_e32 v8, v13, v14
5646 ; GFX11-NEXT: v_or_b32_e32 v7, v11, v12
5647 ; GFX11-NEXT: v_or_b32_e32 v3, v0, v2
5648 ; GFX11-NEXT: s_clause 0x1
5649 ; GFX11-NEXT: global_store_b128 v[43:44], v[7:10], off
5650 ; GFX11-NEXT: global_store_b128 v[41:42], v[3:6], off
5651 ; GFX11-NEXT: s_clause 0x3
5652 ; GFX11-NEXT: scratch_load_b32 v44, off, s33
5653 ; GFX11-NEXT: scratch_load_b32 v43, off, s33 offset:4
5654 ; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:8
5655 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:12
5656 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
5657 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
5658 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
5659 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
5660 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:16 ; 4-byte Folded Reload
5661 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
5662 ; GFX11-NEXT: s_addk_i32 s32, 0xffe0
5663 ; GFX11-NEXT: s_mov_b32 s33, s0
5664 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5665 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5667 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i8_ret:
5668 ; GFX10-SCRATCH: ; %bb.0:
5669 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5670 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
5671 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5672 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
5673 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:16 ; 4-byte Folded Spill
5674 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5675 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
5676 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:12 ; 4-byte Folded Spill
5677 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 offset:8 ; 4-byte Folded Spill
5678 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v43, s33 offset:4 ; 4-byte Folded Spill
5679 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v44, s33 ; 4-byte Folded Spill
5680 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0
5681 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v43, 16
5682 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0
5683 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v44, 0
5684 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
5685 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32
5686 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
5687 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i8_ret@rel32@lo+4
5688 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i8_ret@rel32@hi+12
5689 ; GFX10-SCRATCH-NEXT: s_clause 0x1
5690 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[41:42], off
5691 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v[43:44], off
5692 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5693 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5694 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1)
5695 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v35, 8, v0
5696 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v36, 16, v0
5697 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v37, 24, v0
5698 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5699 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v32, 8, v16
5700 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v33, 16, v16
5701 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v34, 24, v16
5702 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v5, 8, v1
5703 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v6, 16, v1
5704 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v7, 24, v1
5705 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v9, 8, v2
5706 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v10, 16, v2
5707 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v11, 24, v2
5708 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v13, 8, v3
5709 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v14, 16, v3
5710 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v15, 24, v3
5711 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v21, 8, v17
5712 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v22, 16, v17
5713 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v23, 24, v17
5714 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v25, 8, v18
5715 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v26, 16, v18
5716 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v27, 24, v18
5717 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v29, 8, v19
5718 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v30, 16, v19
5719 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v31, 24, v19
5720 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v1
5721 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, v2
5722 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, v3
5723 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v20, v17
5724 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v24, v18
5725 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v28, v19
5726 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, v35
5727 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, v36
5728 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, v37
5729 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v17, v32
5730 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v18, v33
5731 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v19, v34
5732 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5733 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v13, 8, v13
5734 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v9, 8, v9
5735 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v11, 8, v11
5736 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v5, 8, v5
5737 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v7, 8, v7
5738 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5739 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v13, 8, v15
5740 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5741 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v9, v10, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5742 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5743 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v7, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5744 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5745 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1
5746 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v5, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5747 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v8, 8, v31
5748 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v4, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5749 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v7, 8, v29
5750 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v9, 8, v25
5751 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v6, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5752 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5753 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v3
5754 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v3, v28, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5755 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v7, v30, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5756 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v8, v24, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5757 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v9, 8, v27
5758 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v10, 8, v21
5759 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v11, 8, v23
5760 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v12, 8, v17
5761 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v13, 8, v19
5762 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v9, v26, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5763 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v14, v20, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5764 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v11, v22, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5765 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v12, v16, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5766 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v13, v18, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5767 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5768 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v10, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5769 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v9, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5770 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v8, v14, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5771 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v7, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5772 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5773 ; GFX10-SCRATCH-NEXT: global_store_dwordx4 v[43:44], v[7:10], off
5774 ; GFX10-SCRATCH-NEXT: global_store_dwordx4 v[41:42], v[3:6], off
5775 ; GFX10-SCRATCH-NEXT: s_clause 0x3
5776 ; GFX10-SCRATCH-NEXT: scratch_load_dword v44, off, s33
5777 ; GFX10-SCRATCH-NEXT: scratch_load_dword v43, off, s33 offset:4
5778 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33 offset:8
5779 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:12
5780 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
5781 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
5782 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
5783 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
5784 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:16 ; 4-byte Folded Reload
5785 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5786 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
5787 ; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0
5788 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
5789 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5790 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
5791 %val = load <32 x i8>, ptr addrspace(1) null
5792 %tmp = call amdgpu_gfx <32 x i8> @external_void_func_v3i8_ret(<32 x i8> %val)
5793 store <32 x i8> %tmp, ptr addrspace(1) null
5799 define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 {
5800 ; GFX9-LABEL: test_call_external_void_func_v2i16:
5802 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5803 ; GFX9-NEXT: s_mov_b32 s34, s33
5804 ; GFX9-NEXT: s_mov_b32 s33, s32
5805 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
5806 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5807 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
5808 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
5809 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
5810 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5811 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5812 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5813 ; GFX9-NEXT: s_getpc_b64 s[34:35]
5814 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i16@rel32@lo+4
5815 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16@rel32@hi+12
5816 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
5817 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
5818 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
5819 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
5820 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
5821 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
5822 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
5823 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
5824 ; GFX9-NEXT: s_mov_b32 s33, s34
5825 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5826 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5828 ; GFX10-LABEL: test_call_external_void_func_v2i16:
5830 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5831 ; GFX10-NEXT: s_mov_b32 s34, s33
5832 ; GFX10-NEXT: s_mov_b32 s33, s32
5833 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
5834 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5835 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5836 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
5837 ; GFX10-NEXT: global_load_dword v0, v[0:1], off
5838 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
5839 ; GFX10-NEXT: s_addk_i32 s32, 0x200
5840 ; GFX10-NEXT: s_getpc_b64 s[34:35]
5841 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i16@rel32@lo+4
5842 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16@rel32@hi+12
5843 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5844 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5845 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
5846 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
5847 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
5848 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
5849 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
5850 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
5851 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5852 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
5853 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
5854 ; GFX10-NEXT: s_mov_b32 s33, s34
5855 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5856 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5858 ; GFX11-LABEL: test_call_external_void_func_v2i16:
5860 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5861 ; GFX11-NEXT: s_mov_b32 s0, s33
5862 ; GFX11-NEXT: s_mov_b32 s33, s32
5863 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
5864 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
5865 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
5866 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
5867 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
5868 ; GFX11-NEXT: s_add_i32 s32, s32, 16
5869 ; GFX11-NEXT: s_getpc_b64 s[0:1]
5870 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i16@rel32@lo+4
5871 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i16@rel32@hi+12
5872 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
5873 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
5874 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
5875 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
5876 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
5877 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
5878 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
5879 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
5880 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
5881 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
5882 ; GFX11-NEXT: s_add_i32 s32, s32, -16
5883 ; GFX11-NEXT: s_mov_b32 s33, s0
5884 ; GFX11-NEXT: s_waitcnt vmcnt(0)
5885 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5887 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i16:
5888 ; GFX10-SCRATCH: ; %bb.0:
5889 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5890 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
5891 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5892 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
5893 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
5894 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5895 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
5896 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off
5897 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
5898 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
5899 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
5900 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i16@rel32@lo+4
5901 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i16@rel32@hi+12
5902 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5903 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5904 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5905 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
5906 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
5907 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
5908 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
5909 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
5910 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5911 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
5912 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
5913 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
5914 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5915 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
5916 %val = load <2 x i16>, ptr addrspace(1) undef
5917 call amdgpu_gfx void @external_void_func_v2i16(<2 x i16> %val)
5921 define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 {
5922 ; GFX9-LABEL: test_call_external_void_func_v3i16:
5924 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5925 ; GFX9-NEXT: s_mov_b32 s34, s33
5926 ; GFX9-NEXT: s_mov_b32 s33, s32
5927 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
5928 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5929 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
5930 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
5931 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
5932 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5933 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5934 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5935 ; GFX9-NEXT: s_getpc_b64 s[34:35]
5936 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4
5937 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12
5938 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
5939 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
5940 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
5941 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
5942 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
5943 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
5944 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
5945 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
5946 ; GFX9-NEXT: s_mov_b32 s33, s34
5947 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5948 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5950 ; GFX10-LABEL: test_call_external_void_func_v3i16:
5952 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5953 ; GFX10-NEXT: s_mov_b32 s34, s33
5954 ; GFX10-NEXT: s_mov_b32 s33, s32
5955 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
5956 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5957 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5958 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
5959 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
5960 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
5961 ; GFX10-NEXT: s_addk_i32 s32, 0x200
5962 ; GFX10-NEXT: s_getpc_b64 s[34:35]
5963 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4
5964 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12
5965 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5966 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5967 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
5968 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
5969 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
5970 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
5971 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
5972 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
5973 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5974 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
5975 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
5976 ; GFX10-NEXT: s_mov_b32 s33, s34
5977 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5978 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5980 ; GFX11-LABEL: test_call_external_void_func_v3i16:
5982 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5983 ; GFX11-NEXT: s_mov_b32 s0, s33
5984 ; GFX11-NEXT: s_mov_b32 s33, s32
5985 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
5986 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
5987 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
5988 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
5989 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
5990 ; GFX11-NEXT: s_add_i32 s32, s32, 16
5991 ; GFX11-NEXT: s_getpc_b64 s[0:1]
5992 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4
5993 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12
5994 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
5995 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
5996 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
5997 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
5998 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
5999 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6000 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6001 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6002 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6003 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6004 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6005 ; GFX11-NEXT: s_mov_b32 s33, s0
6006 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6007 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6009 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16:
6010 ; GFX10-SCRATCH: ; %bb.0:
6011 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6012 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
6013 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6014 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6015 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
6016 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6017 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6018 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
6019 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
6020 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6021 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
6022 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4
6023 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12
6024 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6025 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6026 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6027 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
6028 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
6029 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
6030 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6031 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
6032 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6033 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6034 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6035 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
6036 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6037 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
6038 %val = load <3 x i16>, ptr addrspace(1) undef
6039 call amdgpu_gfx void @external_void_func_v3i16(<3 x i16> %val)
6043 define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 {
6044 ; GFX9-LABEL: test_call_external_void_func_v3f16:
6046 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6047 ; GFX9-NEXT: s_mov_b32 s34, s33
6048 ; GFX9-NEXT: s_mov_b32 s33, s32
6049 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6050 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6051 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6052 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
6053 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
6054 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6055 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6056 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6057 ; GFX9-NEXT: s_getpc_b64 s[34:35]
6058 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4
6059 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12
6060 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
6061 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6062 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6063 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
6064 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6065 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6066 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6067 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6068 ; GFX9-NEXT: s_mov_b32 s33, s34
6069 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6070 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6072 ; GFX10-LABEL: test_call_external_void_func_v3f16:
6074 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6075 ; GFX10-NEXT: s_mov_b32 s34, s33
6076 ; GFX10-NEXT: s_mov_b32 s33, s32
6077 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6078 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6079 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6080 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6081 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
6082 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
6083 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6084 ; GFX10-NEXT: s_getpc_b64 s[34:35]
6085 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4
6086 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12
6087 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6088 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6089 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
6090 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
6091 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
6092 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
6093 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6094 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6095 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6096 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6097 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6098 ; GFX10-NEXT: s_mov_b32 s33, s34
6099 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6100 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6102 ; GFX11-LABEL: test_call_external_void_func_v3f16:
6104 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6105 ; GFX11-NEXT: s_mov_b32 s0, s33
6106 ; GFX11-NEXT: s_mov_b32 s33, s32
6107 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6108 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6109 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6110 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
6111 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6112 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6113 ; GFX11-NEXT: s_getpc_b64 s[0:1]
6114 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4
6115 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12
6116 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6117 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6118 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6119 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6120 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6121 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6122 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6123 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6124 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6125 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6126 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6127 ; GFX11-NEXT: s_mov_b32 s33, s0
6128 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6129 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6131 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16:
6132 ; GFX10-SCRATCH: ; %bb.0:
6133 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6134 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
6135 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6136 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6137 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
6138 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6139 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6140 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
6141 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
6142 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6143 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
6144 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4
6145 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12
6146 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6147 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6148 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6149 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
6150 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
6151 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
6152 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6153 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
6154 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6155 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6156 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6157 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
6158 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6159 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
6160 %val = load <3 x half>, ptr addrspace(1) undef
6161 call amdgpu_gfx void @external_void_func_v3f16(<3 x half> %val)
6165 define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 {
6166 ; GFX9-LABEL: test_call_external_void_func_v3i16_imm:
6168 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6169 ; GFX9-NEXT: s_mov_b32 s34, s33
6170 ; GFX9-NEXT: s_mov_b32 s33, s32
6171 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6172 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6173 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6174 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
6175 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6176 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6177 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
6178 ; GFX9-NEXT: v_mov_b32_e32 v1, 3
6179 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6180 ; GFX9-NEXT: s_getpc_b64 s[34:35]
6181 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4
6182 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12
6183 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
6184 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6185 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6186 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
6187 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6188 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6189 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6190 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6191 ; GFX9-NEXT: s_mov_b32 s33, s34
6192 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6193 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6195 ; GFX10-LABEL: test_call_external_void_func_v3i16_imm:
6197 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6198 ; GFX10-NEXT: s_mov_b32 s34, s33
6199 ; GFX10-NEXT: s_mov_b32 s33, s32
6200 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6201 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6202 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6203 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6204 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
6205 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001
6206 ; GFX10-NEXT: v_mov_b32_e32 v1, 3
6207 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6208 ; GFX10-NEXT: s_getpc_b64 s[34:35]
6209 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4
6210 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12
6211 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6212 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6213 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
6214 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
6215 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
6216 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
6217 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6218 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6219 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6220 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6221 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6222 ; GFX10-NEXT: s_mov_b32 s33, s34
6223 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6224 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6226 ; GFX11-LABEL: test_call_external_void_func_v3i16_imm:
6228 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6229 ; GFX11-NEXT: s_mov_b32 s0, s33
6230 ; GFX11-NEXT: s_mov_b32 s33, s32
6231 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6232 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6233 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6234 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6235 ; GFX11-NEXT: v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3
6236 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6237 ; GFX11-NEXT: s_getpc_b64 s[0:1]
6238 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4
6239 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12
6240 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6241 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6242 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6243 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6244 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6245 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6246 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6247 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6248 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6249 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6250 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6251 ; GFX11-NEXT: s_mov_b32 s33, s0
6252 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6253 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6255 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_imm:
6256 ; GFX10-SCRATCH: ; %bb.0:
6257 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6258 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
6259 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6260 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6261 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
6262 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6263 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6264 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
6265 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x20001
6266 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 3
6267 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6268 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
6269 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4
6270 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12
6271 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6272 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6273 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6274 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
6275 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
6276 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
6277 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6278 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
6279 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6280 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6281 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6282 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
6283 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6284 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
6285 call amdgpu_gfx void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>)
6289 define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 {
6290 ; GFX9-LABEL: test_call_external_void_func_v3f16_imm:
6292 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6293 ; GFX9-NEXT: s_mov_b32 s34, s33
6294 ; GFX9-NEXT: s_mov_b32 s33, s32
6295 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6296 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6297 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6298 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
6299 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6300 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6301 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00
6302 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400
6303 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6304 ; GFX9-NEXT: s_getpc_b64 s[34:35]
6305 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4
6306 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12
6307 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
6308 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6309 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6310 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
6311 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6312 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6313 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6314 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6315 ; GFX9-NEXT: s_mov_b32 s33, s34
6316 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6317 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6319 ; GFX10-LABEL: test_call_external_void_func_v3f16_imm:
6321 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6322 ; GFX10-NEXT: s_mov_b32 s34, s33
6323 ; GFX10-NEXT: s_mov_b32 s33, s32
6324 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6325 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6326 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6327 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6328 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
6329 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x40003c00
6330 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x4400
6331 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6332 ; GFX10-NEXT: s_getpc_b64 s[34:35]
6333 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4
6334 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12
6335 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6336 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6337 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
6338 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
6339 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
6340 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
6341 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6342 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6343 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6344 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6345 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6346 ; GFX10-NEXT: s_mov_b32 s33, s34
6347 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6348 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6350 ; GFX11-LABEL: test_call_external_void_func_v3f16_imm:
6352 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6353 ; GFX11-NEXT: s_mov_b32 s0, s33
6354 ; GFX11-NEXT: s_mov_b32 s33, s32
6355 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6356 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6357 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6358 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6359 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x40003c00
6360 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x4400
6361 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6362 ; GFX11-NEXT: s_getpc_b64 s[0:1]
6363 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4
6364 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12
6365 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6366 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6367 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6368 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6369 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6370 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6371 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6372 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6373 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6374 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6375 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6376 ; GFX11-NEXT: s_mov_b32 s33, s0
6377 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6378 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6380 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_imm:
6381 ; GFX10-SCRATCH: ; %bb.0:
6382 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6383 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
6384 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6385 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6386 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
6387 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6388 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6389 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
6390 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x40003c00
6391 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x4400
6392 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6393 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
6394 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4
6395 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12
6396 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6397 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6398 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6399 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
6400 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
6401 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
6402 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6403 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
6404 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6405 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6406 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6407 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
6408 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6409 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
6410 call amdgpu_gfx void @external_void_func_v3f16(<3 x half> <half 1.0, half 2.0, half 4.0>)
6414 define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 {
6415 ; GFX9-LABEL: test_call_external_void_func_v4i16:
6417 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6418 ; GFX9-NEXT: s_mov_b32 s34, s33
6419 ; GFX9-NEXT: s_mov_b32 s33, s32
6420 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6421 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6422 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6423 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
6424 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
6425 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6426 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6427 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6428 ; GFX9-NEXT: s_getpc_b64 s[34:35]
6429 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4
6430 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12
6431 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
6432 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6433 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6434 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
6435 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6436 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6437 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6438 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6439 ; GFX9-NEXT: s_mov_b32 s33, s34
6440 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6441 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6443 ; GFX10-LABEL: test_call_external_void_func_v4i16:
6445 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6446 ; GFX10-NEXT: s_mov_b32 s34, s33
6447 ; GFX10-NEXT: s_mov_b32 s33, s32
6448 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6449 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6450 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6451 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6452 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
6453 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
6454 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6455 ; GFX10-NEXT: s_getpc_b64 s[34:35]
6456 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4
6457 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12
6458 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6459 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6460 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
6461 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
6462 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
6463 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
6464 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6465 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6466 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6467 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6468 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6469 ; GFX10-NEXT: s_mov_b32 s33, s34
6470 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6471 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6473 ; GFX11-LABEL: test_call_external_void_func_v4i16:
6475 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6476 ; GFX11-NEXT: s_mov_b32 s0, s33
6477 ; GFX11-NEXT: s_mov_b32 s33, s32
6478 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6479 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6480 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6481 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
6482 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6483 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6484 ; GFX11-NEXT: s_getpc_b64 s[0:1]
6485 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4
6486 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12
6487 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6488 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6489 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6490 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6491 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6492 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6493 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6494 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6495 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6496 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6497 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6498 ; GFX11-NEXT: s_mov_b32 s33, s0
6499 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6500 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6502 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16:
6503 ; GFX10-SCRATCH: ; %bb.0:
6504 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6505 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
6506 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6507 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6508 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
6509 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6510 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6511 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
6512 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
6513 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6514 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
6515 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4
6516 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12
6517 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6518 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6519 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6520 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
6521 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
6522 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
6523 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6524 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
6525 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6526 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6527 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6528 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
6529 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6530 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
6531 %val = load <4 x i16>, ptr addrspace(1) undef
6532 call amdgpu_gfx void @external_void_func_v4i16(<4 x i16> %val)
6536 define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 {
6537 ; GFX9-LABEL: test_call_external_void_func_v4i16_imm:
6539 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6540 ; GFX9-NEXT: s_mov_b32 s34, s33
6541 ; GFX9-NEXT: s_mov_b32 s33, s32
6542 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6543 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6544 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6545 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
6546 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6547 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6548 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
6549 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003
6550 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6551 ; GFX9-NEXT: s_getpc_b64 s[34:35]
6552 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4
6553 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12
6554 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
6555 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6556 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6557 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
6558 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6559 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6560 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6561 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6562 ; GFX9-NEXT: s_mov_b32 s33, s34
6563 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6564 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6566 ; GFX10-LABEL: test_call_external_void_func_v4i16_imm:
6568 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6569 ; GFX10-NEXT: s_mov_b32 s34, s33
6570 ; GFX10-NEXT: s_mov_b32 s33, s32
6571 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6572 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6573 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6574 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6575 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
6576 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001
6577 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x40003
6578 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6579 ; GFX10-NEXT: s_getpc_b64 s[34:35]
6580 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4
6581 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12
6582 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6583 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6584 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
6585 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
6586 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
6587 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
6588 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6589 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6590 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6591 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6592 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6593 ; GFX10-NEXT: s_mov_b32 s33, s34
6594 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6595 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6597 ; GFX11-LABEL: test_call_external_void_func_v4i16_imm:
6599 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6600 ; GFX11-NEXT: s_mov_b32 s0, s33
6601 ; GFX11-NEXT: s_mov_b32 s33, s32
6602 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6603 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6604 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6605 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6606 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001
6607 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x40003
6608 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6609 ; GFX11-NEXT: s_getpc_b64 s[0:1]
6610 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4
6611 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12
6612 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6613 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6614 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6615 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6616 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6617 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6618 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6619 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6620 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6621 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6622 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6623 ; GFX11-NEXT: s_mov_b32 s33, s0
6624 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6625 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6627 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_imm:
6628 ; GFX10-SCRATCH: ; %bb.0:
6629 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6630 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
6631 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6632 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6633 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
6634 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6635 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6636 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
6637 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x20001
6638 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40003
6639 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6640 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
6641 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4
6642 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12
6643 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6644 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6645 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6646 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
6647 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
6648 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
6649 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6650 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
6651 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6652 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6653 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6654 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
6655 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6656 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
6657 call amdgpu_gfx void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
6661 define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 {
6662 ; GFX9-LABEL: test_call_external_void_func_v2f16:
6664 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6665 ; GFX9-NEXT: s_mov_b32 s34, s33
6666 ; GFX9-NEXT: s_mov_b32 s33, s32
6667 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6668 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6669 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6670 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
6671 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
6672 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6673 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6674 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6675 ; GFX9-NEXT: s_getpc_b64 s[34:35]
6676 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f16@rel32@lo+4
6677 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16@rel32@hi+12
6678 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
6679 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6680 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6681 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
6682 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6683 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6684 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6685 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6686 ; GFX9-NEXT: s_mov_b32 s33, s34
6687 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6688 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6690 ; GFX10-LABEL: test_call_external_void_func_v2f16:
6692 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6693 ; GFX10-NEXT: s_mov_b32 s34, s33
6694 ; GFX10-NEXT: s_mov_b32 s33, s32
6695 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6696 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6697 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6698 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6699 ; GFX10-NEXT: global_load_dword v0, v[0:1], off
6700 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
6701 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6702 ; GFX10-NEXT: s_getpc_b64 s[34:35]
6703 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f16@rel32@lo+4
6704 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16@rel32@hi+12
6705 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6706 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6707 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
6708 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
6709 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
6710 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
6711 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6712 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6713 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6714 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6715 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6716 ; GFX10-NEXT: s_mov_b32 s33, s34
6717 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6718 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6720 ; GFX11-LABEL: test_call_external_void_func_v2f16:
6722 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6723 ; GFX11-NEXT: s_mov_b32 s0, s33
6724 ; GFX11-NEXT: s_mov_b32 s33, s32
6725 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6726 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6727 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6728 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off
6729 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6730 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6731 ; GFX11-NEXT: s_getpc_b64 s[0:1]
6732 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2f16@rel32@lo+4
6733 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2f16@rel32@hi+12
6734 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6735 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6736 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6737 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6738 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6739 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6740 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6741 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6742 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6743 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6744 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6745 ; GFX11-NEXT: s_mov_b32 s33, s0
6746 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6747 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6749 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f16:
6750 ; GFX10-SCRATCH: ; %bb.0:
6751 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6752 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
6753 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6754 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6755 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
6756 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6757 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6758 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off
6759 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
6760 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6761 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
6762 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f16@rel32@lo+4
6763 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f16@rel32@hi+12
6764 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6765 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6766 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6767 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
6768 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
6769 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
6770 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6771 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
6772 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6773 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6774 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6775 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
6776 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6777 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
6778 %val = load <2 x half>, ptr addrspace(1) undef
6779 call amdgpu_gfx void @external_void_func_v2f16(<2 x half> %val)
6783 define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 {
6784 ; GFX9-LABEL: test_call_external_void_func_v2i32:
6786 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6787 ; GFX9-NEXT: s_mov_b32 s34, s33
6788 ; GFX9-NEXT: s_mov_b32 s33, s32
6789 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6790 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6791 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6792 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
6793 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
6794 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6795 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6796 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6797 ; GFX9-NEXT: s_getpc_b64 s[34:35]
6798 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4
6799 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12
6800 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
6801 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6802 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6803 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
6804 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6805 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6806 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6807 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6808 ; GFX9-NEXT: s_mov_b32 s33, s34
6809 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6810 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6812 ; GFX10-LABEL: test_call_external_void_func_v2i32:
6814 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6815 ; GFX10-NEXT: s_mov_b32 s34, s33
6816 ; GFX10-NEXT: s_mov_b32 s33, s32
6817 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6818 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6819 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6820 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6821 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
6822 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
6823 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6824 ; GFX10-NEXT: s_getpc_b64 s[34:35]
6825 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4
6826 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12
6827 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6828 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6829 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
6830 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
6831 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
6832 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
6833 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6834 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6835 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6836 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6837 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6838 ; GFX10-NEXT: s_mov_b32 s33, s34
6839 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6840 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6842 ; GFX11-LABEL: test_call_external_void_func_v2i32:
6844 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6845 ; GFX11-NEXT: s_mov_b32 s0, s33
6846 ; GFX11-NEXT: s_mov_b32 s33, s32
6847 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6848 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6849 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6850 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
6851 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6852 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6853 ; GFX11-NEXT: s_getpc_b64 s[0:1]
6854 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4
6855 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12
6856 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6857 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6858 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6859 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6860 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6861 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6862 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6863 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6864 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6865 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6866 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6867 ; GFX11-NEXT: s_mov_b32 s33, s0
6868 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6869 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6871 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32:
6872 ; GFX10-SCRATCH: ; %bb.0:
6873 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6874 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
6875 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6876 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6877 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
6878 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6879 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6880 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
6881 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
6882 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6883 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
6884 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4
6885 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12
6886 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6887 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6888 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6889 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
6890 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
6891 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
6892 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
6893 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
6894 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6895 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
6896 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6897 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
6898 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6899 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
6900 %val = load <2 x i32>, ptr addrspace(1) undef
6901 call amdgpu_gfx void @external_void_func_v2i32(<2 x i32> %val)
6905 define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 {
6906 ; GFX9-LABEL: test_call_external_void_func_v2i32_imm:
6908 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6909 ; GFX9-NEXT: s_mov_b32 s34, s33
6910 ; GFX9-NEXT: s_mov_b32 s33, s32
6911 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6912 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6913 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6914 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
6915 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6916 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6917 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
6918 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
6919 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6920 ; GFX9-NEXT: s_getpc_b64 s[34:35]
6921 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4
6922 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12
6923 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
6924 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
6925 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
6926 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
6927 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
6928 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6929 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
6930 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6931 ; GFX9-NEXT: s_mov_b32 s33, s34
6932 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6933 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6935 ; GFX10-LABEL: test_call_external_void_func_v2i32_imm:
6937 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6938 ; GFX10-NEXT: s_mov_b32 s34, s33
6939 ; GFX10-NEXT: s_mov_b32 s33, s32
6940 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6941 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6942 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6943 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6944 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
6945 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
6946 ; GFX10-NEXT: v_mov_b32_e32 v1, 2
6947 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6948 ; GFX10-NEXT: s_getpc_b64 s[34:35]
6949 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4
6950 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12
6951 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6952 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6953 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
6954 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
6955 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
6956 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
6957 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
6958 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6959 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6960 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
6961 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6962 ; GFX10-NEXT: s_mov_b32 s33, s34
6963 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6964 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6966 ; GFX11-LABEL: test_call_external_void_func_v2i32_imm:
6968 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6969 ; GFX11-NEXT: s_mov_b32 s0, s33
6970 ; GFX11-NEXT: s_mov_b32 s33, s32
6971 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6972 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6973 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6974 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
6975 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
6976 ; GFX11-NEXT: s_add_i32 s32, s32, 16
6977 ; GFX11-NEXT: s_getpc_b64 s[0:1]
6978 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4
6979 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12
6980 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
6981 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
6982 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
6983 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6984 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
6985 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
6986 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
6987 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
6988 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6989 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
6990 ; GFX11-NEXT: s_add_i32 s32, s32, -16
6991 ; GFX11-NEXT: s_mov_b32 s33, s0
6992 ; GFX11-NEXT: s_waitcnt vmcnt(0)
6993 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6995 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_imm:
6996 ; GFX10-SCRATCH: ; %bb.0:
6997 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6998 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
6999 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7000 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7001 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
7002 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7003 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7004 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
7005 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
7006 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2
7007 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7008 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7009 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4
7010 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12
7011 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7012 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7013 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7014 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
7015 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
7016 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
7017 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7018 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
7019 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7020 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7021 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7022 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
7023 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7024 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
7025 call amdgpu_gfx void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>)
7029 define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 {
7030 ; GFX9-LABEL: test_call_external_void_func_v3i32_imm:
7032 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7033 ; GFX9-NEXT: s_mov_b32 s34, s33
7034 ; GFX9-NEXT: s_mov_b32 s33, s32
7035 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7036 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7037 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7038 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
7039 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7040 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7041 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
7042 ; GFX9-NEXT: v_mov_b32_e32 v1, 4
7043 ; GFX9-NEXT: v_mov_b32_e32 v2, 5
7044 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7045 ; GFX9-NEXT: s_getpc_b64 s[34:35]
7046 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32@rel32@lo+4
7047 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32@rel32@hi+12
7048 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
7049 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
7050 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
7051 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
7052 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7053 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7054 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7055 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7056 ; GFX9-NEXT: s_mov_b32 s33, s34
7057 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7058 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7060 ; GFX10-LABEL: test_call_external_void_func_v3i32_imm:
7062 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7063 ; GFX10-NEXT: s_mov_b32 s34, s33
7064 ; GFX10-NEXT: s_mov_b32 s33, s32
7065 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7066 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7067 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7068 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7069 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
7070 ; GFX10-NEXT: v_mov_b32_e32 v0, 3
7071 ; GFX10-NEXT: v_mov_b32_e32 v1, 4
7072 ; GFX10-NEXT: v_mov_b32_e32 v2, 5
7073 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7074 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7075 ; GFX10-NEXT: s_getpc_b64 s[34:35]
7076 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32@rel32@lo+4
7077 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32@rel32@hi+12
7078 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7079 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
7080 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
7081 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
7082 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
7083 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7084 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7085 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7086 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7087 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7088 ; GFX10-NEXT: s_mov_b32 s33, s34
7089 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7090 ; GFX10-NEXT: s_setpc_b64 s[30:31]
7092 ; GFX11-LABEL: test_call_external_void_func_v3i32_imm:
7094 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7095 ; GFX11-NEXT: s_mov_b32 s0, s33
7096 ; GFX11-NEXT: s_mov_b32 s33, s32
7097 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7098 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
7099 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7100 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
7101 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4
7102 ; GFX11-NEXT: v_mov_b32_e32 v2, 5
7103 ; GFX11-NEXT: s_add_i32 s32, s32, 16
7104 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
7105 ; GFX11-NEXT: s_getpc_b64 s[0:1]
7106 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i32@rel32@lo+4
7107 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32@rel32@hi+12
7108 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
7109 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
7110 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
7111 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
7112 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
7113 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
7114 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7115 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
7116 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7117 ; GFX11-NEXT: s_add_i32 s32, s32, -16
7118 ; GFX11-NEXT: s_mov_b32 s33, s0
7119 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7120 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7122 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_imm:
7123 ; GFX10-SCRATCH: ; %bb.0:
7124 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7125 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
7126 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7127 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7128 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
7129 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7130 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7131 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
7132 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3
7133 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 4
7134 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 5
7135 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7136 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7137 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7138 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32@rel32@lo+4
7139 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32@rel32@hi+12
7140 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7141 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7142 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
7143 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
7144 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
7145 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7146 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
7147 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7148 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7149 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7150 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
7151 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7152 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
7153 call amdgpu_gfx void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>)
7157 define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 {
7158 ; GFX9-LABEL: test_call_external_void_func_v3i32_i32:
7160 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7161 ; GFX9-NEXT: s_mov_b32 s34, s33
7162 ; GFX9-NEXT: s_mov_b32 s33, s32
7163 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7164 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7165 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7166 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
7167 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7168 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7169 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
7170 ; GFX9-NEXT: v_mov_b32_e32 v1, 4
7171 ; GFX9-NEXT: v_mov_b32_e32 v2, 5
7172 ; GFX9-NEXT: v_mov_b32_e32 v3, 6
7173 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7174 ; GFX9-NEXT: s_getpc_b64 s[34:35]
7175 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32@rel32@lo+4
7176 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32@rel32@hi+12
7177 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
7178 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
7179 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
7180 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
7181 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7182 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7183 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7184 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7185 ; GFX9-NEXT: s_mov_b32 s33, s34
7186 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7187 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7189 ; GFX10-LABEL: test_call_external_void_func_v3i32_i32:
7191 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7192 ; GFX10-NEXT: s_mov_b32 s34, s33
7193 ; GFX10-NEXT: s_mov_b32 s33, s32
7194 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7195 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7196 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7197 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7198 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
7199 ; GFX10-NEXT: v_mov_b32_e32 v0, 3
7200 ; GFX10-NEXT: v_mov_b32_e32 v1, 4
7201 ; GFX10-NEXT: v_mov_b32_e32 v2, 5
7202 ; GFX10-NEXT: v_mov_b32_e32 v3, 6
7203 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7204 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7205 ; GFX10-NEXT: s_getpc_b64 s[34:35]
7206 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32@rel32@lo+4
7207 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32@rel32@hi+12
7208 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7209 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
7210 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
7211 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
7212 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
7213 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7214 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7215 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7216 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7217 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7218 ; GFX10-NEXT: s_mov_b32 s33, s34
7219 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7220 ; GFX10-NEXT: s_setpc_b64 s[30:31]
7222 ; GFX11-LABEL: test_call_external_void_func_v3i32_i32:
7224 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7225 ; GFX11-NEXT: s_mov_b32 s0, s33
7226 ; GFX11-NEXT: s_mov_b32 s33, s32
7227 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7228 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
7229 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7230 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
7231 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4
7232 ; GFX11-NEXT: v_dual_mov_b32 v2, 5 :: v_dual_mov_b32 v3, 6
7233 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
7234 ; GFX11-NEXT: s_add_i32 s32, s32, 16
7235 ; GFX11-NEXT: s_getpc_b64 s[0:1]
7236 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_i32@rel32@lo+4
7237 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32@rel32@hi+12
7238 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
7239 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
7240 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
7241 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
7242 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
7243 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
7244 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7245 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
7246 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7247 ; GFX11-NEXT: s_add_i32 s32, s32, -16
7248 ; GFX11-NEXT: s_mov_b32 s33, s0
7249 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7250 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7252 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_i32:
7253 ; GFX10-SCRATCH: ; %bb.0:
7254 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7255 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
7256 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7257 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7258 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
7259 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7260 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7261 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
7262 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3
7263 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 4
7264 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 5
7265 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 6
7266 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7267 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7268 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7269 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_i32@rel32@lo+4
7270 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32@rel32@hi+12
7271 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7272 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7273 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
7274 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
7275 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
7276 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7277 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
7278 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7279 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7280 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7281 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
7282 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7283 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
7284 call amdgpu_gfx void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
7288 define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 {
7289 ; GFX9-LABEL: test_call_external_void_func_v4i32:
7291 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7292 ; GFX9-NEXT: s_mov_b32 s34, s33
7293 ; GFX9-NEXT: s_mov_b32 s33, s32
7294 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7295 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7296 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7297 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
7298 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
7299 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7300 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7301 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7302 ; GFX9-NEXT: s_getpc_b64 s[34:35]
7303 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4
7304 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12
7305 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
7306 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
7307 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
7308 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
7309 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7310 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7311 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7312 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7313 ; GFX9-NEXT: s_mov_b32 s33, s34
7314 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7315 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7317 ; GFX10-LABEL: test_call_external_void_func_v4i32:
7319 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7320 ; GFX10-NEXT: s_mov_b32 s34, s33
7321 ; GFX10-NEXT: s_mov_b32 s33, s32
7322 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7323 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7324 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7325 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7326 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
7327 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
7328 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7329 ; GFX10-NEXT: s_getpc_b64 s[34:35]
7330 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4
7331 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12
7332 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7333 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7334 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
7335 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
7336 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
7337 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
7338 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7339 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7340 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7341 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7342 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7343 ; GFX10-NEXT: s_mov_b32 s33, s34
7344 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7345 ; GFX10-NEXT: s_setpc_b64 s[30:31]
7347 ; GFX11-LABEL: test_call_external_void_func_v4i32:
7349 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7350 ; GFX11-NEXT: s_mov_b32 s0, s33
7351 ; GFX11-NEXT: s_mov_b32 s33, s32
7352 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7353 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
7354 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7355 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
7356 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
7357 ; GFX11-NEXT: s_add_i32 s32, s32, 16
7358 ; GFX11-NEXT: s_getpc_b64 s[0:1]
7359 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4
7360 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12
7361 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
7362 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
7363 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
7364 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
7365 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
7366 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
7367 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
7368 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7369 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
7370 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7371 ; GFX11-NEXT: s_add_i32 s32, s32, -16
7372 ; GFX11-NEXT: s_mov_b32 s33, s0
7373 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7374 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7376 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32:
7377 ; GFX10-SCRATCH: ; %bb.0:
7378 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7379 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
7380 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7381 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7382 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
7383 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7384 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7385 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
7386 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
7387 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7388 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7389 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4
7390 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12
7391 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7392 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7393 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7394 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
7395 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
7396 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
7397 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7398 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
7399 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7400 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7401 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7402 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
7403 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7404 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
7405 %val = load <4 x i32>, ptr addrspace(1) undef
7406 call amdgpu_gfx void @external_void_func_v4i32(<4 x i32> %val)
7410 define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 {
7411 ; GFX9-LABEL: test_call_external_void_func_v4i32_imm:
7413 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7414 ; GFX9-NEXT: s_mov_b32 s34, s33
7415 ; GFX9-NEXT: s_mov_b32 s33, s32
7416 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7417 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7418 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7419 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
7420 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7421 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7422 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
7423 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
7424 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
7425 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
7426 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7427 ; GFX9-NEXT: s_getpc_b64 s[34:35]
7428 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4
7429 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12
7430 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
7431 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
7432 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
7433 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
7434 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7435 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7436 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7437 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7438 ; GFX9-NEXT: s_mov_b32 s33, s34
7439 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7440 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7442 ; GFX10-LABEL: test_call_external_void_func_v4i32_imm:
7444 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7445 ; GFX10-NEXT: s_mov_b32 s34, s33
7446 ; GFX10-NEXT: s_mov_b32 s33, s32
7447 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7448 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7449 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7450 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7451 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
7452 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
7453 ; GFX10-NEXT: v_mov_b32_e32 v1, 2
7454 ; GFX10-NEXT: v_mov_b32_e32 v2, 3
7455 ; GFX10-NEXT: v_mov_b32_e32 v3, 4
7456 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7457 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7458 ; GFX10-NEXT: s_getpc_b64 s[34:35]
7459 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4
7460 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12
7461 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7462 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
7463 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
7464 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
7465 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
7466 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7467 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7468 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7469 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7470 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7471 ; GFX10-NEXT: s_mov_b32 s33, s34
7472 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7473 ; GFX10-NEXT: s_setpc_b64 s[30:31]
7475 ; GFX11-LABEL: test_call_external_void_func_v4i32_imm:
7477 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7478 ; GFX11-NEXT: s_mov_b32 s0, s33
7479 ; GFX11-NEXT: s_mov_b32 s33, s32
7480 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7481 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
7482 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7483 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
7484 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
7485 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
7486 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
7487 ; GFX11-NEXT: s_add_i32 s32, s32, 16
7488 ; GFX11-NEXT: s_getpc_b64 s[0:1]
7489 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4
7490 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12
7491 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
7492 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
7493 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
7494 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
7495 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
7496 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
7497 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7498 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
7499 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7500 ; GFX11-NEXT: s_add_i32 s32, s32, -16
7501 ; GFX11-NEXT: s_mov_b32 s33, s0
7502 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7503 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7505 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_imm:
7506 ; GFX10-SCRATCH: ; %bb.0:
7507 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7508 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
7509 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7510 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7511 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
7512 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7513 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7514 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
7515 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
7516 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2
7517 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3
7518 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4
7519 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7520 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7521 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7522 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4
7523 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12
7524 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7525 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7526 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
7527 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
7528 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
7529 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7530 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
7531 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7532 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7533 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7534 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
7535 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7536 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
7537 call amdgpu_gfx void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
7541 define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 {
7542 ; GFX9-LABEL: test_call_external_void_func_v5i32_imm:
7544 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7545 ; GFX9-NEXT: s_mov_b32 s34, s33
7546 ; GFX9-NEXT: s_mov_b32 s33, s32
7547 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7548 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7549 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7550 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
7551 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7552 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7553 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
7554 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
7555 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
7556 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
7557 ; GFX9-NEXT: v_mov_b32_e32 v4, 5
7558 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7559 ; GFX9-NEXT: s_getpc_b64 s[34:35]
7560 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5i32@rel32@lo+4
7561 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32@rel32@hi+12
7562 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
7563 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
7564 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
7565 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
7566 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7567 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7568 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7569 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7570 ; GFX9-NEXT: s_mov_b32 s33, s34
7571 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7572 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7574 ; GFX10-LABEL: test_call_external_void_func_v5i32_imm:
7576 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7577 ; GFX10-NEXT: s_mov_b32 s34, s33
7578 ; GFX10-NEXT: s_mov_b32 s33, s32
7579 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7580 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7581 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7582 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7583 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
7584 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
7585 ; GFX10-NEXT: v_mov_b32_e32 v1, 2
7586 ; GFX10-NEXT: v_mov_b32_e32 v2, 3
7587 ; GFX10-NEXT: v_mov_b32_e32 v3, 4
7588 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7589 ; GFX10-NEXT: v_mov_b32_e32 v4, 5
7590 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7591 ; GFX10-NEXT: s_getpc_b64 s[34:35]
7592 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5i32@rel32@lo+4
7593 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32@rel32@hi+12
7594 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7595 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
7596 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
7597 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
7598 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
7599 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7600 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7601 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7602 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7603 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7604 ; GFX10-NEXT: s_mov_b32 s33, s34
7605 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7606 ; GFX10-NEXT: s_setpc_b64 s[30:31]
7608 ; GFX11-LABEL: test_call_external_void_func_v5i32_imm:
7610 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7611 ; GFX11-NEXT: s_mov_b32 s0, s33
7612 ; GFX11-NEXT: s_mov_b32 s33, s32
7613 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7614 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
7615 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7616 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
7617 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
7618 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
7619 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
7620 ; GFX11-NEXT: v_mov_b32_e32 v4, 5
7621 ; GFX11-NEXT: s_add_i32 s32, s32, 16
7622 ; GFX11-NEXT: s_getpc_b64 s[0:1]
7623 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v5i32@rel32@lo+4
7624 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32@rel32@hi+12
7625 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
7626 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
7627 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
7628 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
7629 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
7630 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
7631 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7632 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
7633 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7634 ; GFX11-NEXT: s_add_i32 s32, s32, -16
7635 ; GFX11-NEXT: s_mov_b32 s33, s0
7636 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7637 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7639 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i32_imm:
7640 ; GFX10-SCRATCH: ; %bb.0:
7641 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7642 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
7643 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7644 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7645 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
7646 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7647 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7648 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
7649 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
7650 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2
7651 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3
7652 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4
7653 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7654 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 5
7655 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7656 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7657 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5i32@rel32@lo+4
7658 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32@rel32@hi+12
7659 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7660 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7661 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
7662 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
7663 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
7664 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7665 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
7666 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7667 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7668 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7669 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
7670 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7671 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
7672 call amdgpu_gfx void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
7676 define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 {
7677 ; GFX9-LABEL: test_call_external_void_func_v8i32:
7679 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7680 ; GFX9-NEXT: s_mov_b32 s34, s33
7681 ; GFX9-NEXT: s_mov_b32 s33, s32
7682 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7683 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7684 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7685 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
7686 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
7687 ; GFX9-NEXT: v_mov_b32_e32 v8, 0
7688 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7689 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7690 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
7691 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35]
7692 ; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16
7693 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7694 ; GFX9-NEXT: s_getpc_b64 s[34:35]
7695 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4
7696 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12
7697 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
7698 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
7699 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
7700 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
7701 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7702 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7703 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7704 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7705 ; GFX9-NEXT: s_mov_b32 s33, s34
7706 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7707 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7709 ; GFX10-LABEL: test_call_external_void_func_v8i32:
7711 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7712 ; GFX10-NEXT: s_mov_b32 s34, s33
7713 ; GFX10-NEXT: s_mov_b32 s33, s32
7714 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7715 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7716 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7717 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7718 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
7719 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
7720 ; GFX10-NEXT: v_mov_b32_e32 v8, 0
7721 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7722 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
7723 ; GFX10-NEXT: s_clause 0x1
7724 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35]
7725 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16
7726 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7727 ; GFX10-NEXT: s_getpc_b64 s[34:35]
7728 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4
7729 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12
7730 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7731 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
7732 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
7733 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
7734 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
7735 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7736 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7737 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7738 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7739 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7740 ; GFX10-NEXT: s_mov_b32 s33, s34
7741 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7742 ; GFX10-NEXT: s_setpc_b64 s[30:31]
7744 ; GFX11-LABEL: test_call_external_void_func_v8i32:
7746 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7747 ; GFX11-NEXT: s_mov_b32 s0, s33
7748 ; GFX11-NEXT: s_mov_b32 s33, s32
7749 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7750 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
7751 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7752 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
7753 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
7754 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
7755 ; GFX11-NEXT: s_add_i32 s32, s32, 16
7756 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
7757 ; GFX11-NEXT: s_clause 0x1
7758 ; GFX11-NEXT: global_load_b128 v[0:3], v4, s[0:1]
7759 ; GFX11-NEXT: global_load_b128 v[4:7], v4, s[0:1] offset:16
7760 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
7761 ; GFX11-NEXT: s_getpc_b64 s[0:1]
7762 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4
7763 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12
7764 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
7765 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
7766 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
7767 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
7768 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
7769 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
7770 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7771 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
7772 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7773 ; GFX11-NEXT: s_add_i32 s32, s32, -16
7774 ; GFX11-NEXT: s_mov_b32 s33, s0
7775 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7776 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7778 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32:
7779 ; GFX10-SCRATCH: ; %bb.0:
7780 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7781 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
7782 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7783 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7784 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
7785 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7786 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7787 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
7788 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
7789 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 0
7790 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7791 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
7792 ; GFX10-SCRATCH-NEXT: s_clause 0x1
7793 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v8, s[0:1]
7794 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v8, s[0:1] offset:16
7795 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7796 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7797 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4
7798 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12
7799 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7800 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7801 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
7802 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
7803 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
7804 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7805 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
7806 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7807 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7808 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7809 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
7810 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7811 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
7812 %ptr = load ptr addrspace(1), ptr addrspace(4) undef
7813 %val = load <8 x i32>, ptr addrspace(1) %ptr
7814 call amdgpu_gfx void @external_void_func_v8i32(<8 x i32> %val)
7818 define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 {
7819 ; GFX9-LABEL: test_call_external_void_func_v8i32_imm:
7821 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7822 ; GFX9-NEXT: s_mov_b32 s34, s33
7823 ; GFX9-NEXT: s_mov_b32 s33, s32
7824 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7825 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7826 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7827 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
7828 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7829 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7830 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
7831 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
7832 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
7833 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
7834 ; GFX9-NEXT: v_mov_b32_e32 v4, 5
7835 ; GFX9-NEXT: v_mov_b32_e32 v5, 6
7836 ; GFX9-NEXT: v_mov_b32_e32 v6, 7
7837 ; GFX9-NEXT: v_mov_b32_e32 v7, 8
7838 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7839 ; GFX9-NEXT: s_getpc_b64 s[34:35]
7840 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4
7841 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12
7842 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
7843 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
7844 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
7845 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
7846 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7847 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7848 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7849 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7850 ; GFX9-NEXT: s_mov_b32 s33, s34
7851 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7852 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7854 ; GFX10-LABEL: test_call_external_void_func_v8i32_imm:
7856 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7857 ; GFX10-NEXT: s_mov_b32 s34, s33
7858 ; GFX10-NEXT: s_mov_b32 s33, s32
7859 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7860 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7861 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7862 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7863 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
7864 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
7865 ; GFX10-NEXT: v_mov_b32_e32 v1, 2
7866 ; GFX10-NEXT: v_mov_b32_e32 v2, 3
7867 ; GFX10-NEXT: v_mov_b32_e32 v3, 4
7868 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7869 ; GFX10-NEXT: v_mov_b32_e32 v4, 5
7870 ; GFX10-NEXT: v_mov_b32_e32 v5, 6
7871 ; GFX10-NEXT: v_mov_b32_e32 v6, 7
7872 ; GFX10-NEXT: v_mov_b32_e32 v7, 8
7873 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7874 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7875 ; GFX10-NEXT: s_getpc_b64 s[34:35]
7876 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4
7877 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12
7878 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
7879 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
7880 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
7881 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
7882 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
7883 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7884 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7885 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
7886 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7887 ; GFX10-NEXT: s_mov_b32 s33, s34
7888 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7889 ; GFX10-NEXT: s_setpc_b64 s[30:31]
7891 ; GFX11-LABEL: test_call_external_void_func_v8i32_imm:
7893 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7894 ; GFX11-NEXT: s_mov_b32 s0, s33
7895 ; GFX11-NEXT: s_mov_b32 s33, s32
7896 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7897 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
7898 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7899 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
7900 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
7901 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
7902 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
7903 ; GFX11-NEXT: v_dual_mov_b32 v4, 5 :: v_dual_mov_b32 v5, 6
7904 ; GFX11-NEXT: v_dual_mov_b32 v6, 7 :: v_dual_mov_b32 v7, 8
7905 ; GFX11-NEXT: s_add_i32 s32, s32, 16
7906 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
7907 ; GFX11-NEXT: s_getpc_b64 s[0:1]
7908 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4
7909 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12
7910 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
7911 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
7912 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
7913 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
7914 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
7915 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
7916 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
7917 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
7918 ; GFX11-NEXT: s_add_i32 s32, s32, -16
7919 ; GFX11-NEXT: s_mov_b32 s33, s0
7920 ; GFX11-NEXT: s_waitcnt vmcnt(0)
7921 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7923 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_imm:
7924 ; GFX10-SCRATCH: ; %bb.0:
7925 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7926 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
7927 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7928 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7929 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
7930 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7931 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7932 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
7933 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
7934 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2
7935 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3
7936 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4
7937 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7938 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 5
7939 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 6
7940 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 7
7941 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 8
7942 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7943 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7944 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7945 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4
7946 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12
7947 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7948 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
7949 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
7950 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
7951 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
7952 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
7953 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7954 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
7955 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7956 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
7957 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7958 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
7959 call amdgpu_gfx void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
7963 define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 {
7964 ; GFX9-LABEL: test_call_external_void_func_v16i32:
7966 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7967 ; GFX9-NEXT: s_mov_b32 s34, s33
7968 ; GFX9-NEXT: s_mov_b32 s33, s32
7969 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7970 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7971 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7972 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
7973 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
7974 ; GFX9-NEXT: v_mov_b32_e32 v16, 0
7975 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7976 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7977 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
7978 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35]
7979 ; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16
7980 ; GFX9-NEXT: global_load_dwordx4 v[8:11], v16, s[34:35] offset:32
7981 ; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48
7982 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7983 ; GFX9-NEXT: s_getpc_b64 s[34:35]
7984 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i32@rel32@lo+4
7985 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32@rel32@hi+12
7986 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
7987 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
7988 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
7989 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
7990 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
7991 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7992 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
7993 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7994 ; GFX9-NEXT: s_mov_b32 s33, s34
7995 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7996 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7998 ; GFX10-LABEL: test_call_external_void_func_v16i32:
8000 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8001 ; GFX10-NEXT: s_mov_b32 s34, s33
8002 ; GFX10-NEXT: s_mov_b32 s33, s32
8003 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8004 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
8005 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8006 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8007 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
8008 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
8009 ; GFX10-NEXT: v_mov_b32_e32 v16, 0
8010 ; GFX10-NEXT: s_addk_i32 s32, 0x200
8011 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
8012 ; GFX10-NEXT: s_clause 0x3
8013 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35]
8014 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16
8015 ; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[34:35] offset:32
8016 ; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48
8017 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
8018 ; GFX10-NEXT: s_getpc_b64 s[34:35]
8019 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i32@rel32@lo+4
8020 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32@rel32@hi+12
8021 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
8022 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
8023 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
8024 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
8025 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
8026 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8027 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
8028 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8029 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8030 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
8031 ; GFX10-NEXT: s_mov_b32 s33, s34
8032 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8033 ; GFX10-NEXT: s_setpc_b64 s[30:31]
8035 ; GFX11-LABEL: test_call_external_void_func_v16i32:
8037 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8038 ; GFX11-NEXT: s_mov_b32 s0, s33
8039 ; GFX11-NEXT: s_mov_b32 s33, s32
8040 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8041 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
8042 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8043 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
8044 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
8045 ; GFX11-NEXT: v_mov_b32_e32 v12, 0
8046 ; GFX11-NEXT: s_add_i32 s32, s32, 16
8047 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
8048 ; GFX11-NEXT: s_clause 0x3
8049 ; GFX11-NEXT: global_load_b128 v[0:3], v12, s[0:1]
8050 ; GFX11-NEXT: global_load_b128 v[4:7], v12, s[0:1] offset:16
8051 ; GFX11-NEXT: global_load_b128 v[8:11], v12, s[0:1] offset:32
8052 ; GFX11-NEXT: global_load_b128 v[12:15], v12, s[0:1] offset:48
8053 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
8054 ; GFX11-NEXT: s_getpc_b64 s[0:1]
8055 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v16i32@rel32@lo+4
8056 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32@rel32@hi+12
8057 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
8058 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
8059 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
8060 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
8061 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
8062 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
8063 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8064 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
8065 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8066 ; GFX11-NEXT: s_add_i32 s32, s32, -16
8067 ; GFX11-NEXT: s_mov_b32 s33, s0
8068 ; GFX11-NEXT: s_waitcnt vmcnt(0)
8069 ; GFX11-NEXT: s_setpc_b64 s[30:31]
8071 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i32:
8072 ; GFX10-SCRATCH: ; %bb.0:
8073 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8074 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
8075 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
8076 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8077 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
8078 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8079 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8080 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
8081 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
8082 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 0
8083 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
8084 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
8085 ; GFX10-SCRATCH-NEXT: s_clause 0x3
8086 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v16, s[0:1]
8087 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v16, s[0:1] offset:16
8088 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[8:11], v16, s[0:1] offset:32
8089 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[12:15], v16, s[0:1] offset:48
8090 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
8091 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
8092 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v16i32@rel32@lo+4
8093 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32@rel32@hi+12
8094 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
8095 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
8096 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
8097 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
8098 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
8099 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8100 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
8101 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8102 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8103 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
8104 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
8105 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8106 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
8107 %ptr = load ptr addrspace(1), ptr addrspace(4) undef
8108 %val = load <16 x i32>, ptr addrspace(1) %ptr
8109 call amdgpu_gfx void @external_void_func_v16i32(<16 x i32> %val)
8113 define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 {
8114 ; GFX9-LABEL: test_call_external_void_func_v32i32:
8116 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8117 ; GFX9-NEXT: s_mov_b32 s34, s33
8118 ; GFX9-NEXT: s_mov_b32 s33, s32
8119 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8120 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
8121 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8122 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
8123 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
8124 ; GFX9-NEXT: v_mov_b32_e32 v28, 0
8125 ; GFX9-NEXT: s_addk_i32 s32, 0x400
8126 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
8127 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
8128 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35]
8129 ; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16
8130 ; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[34:35] offset:32
8131 ; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[34:35] offset:48
8132 ; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[34:35] offset:64
8133 ; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[34:35] offset:80
8134 ; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[34:35] offset:96
8135 ; GFX9-NEXT: s_nop 0
8136 ; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112
8137 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8138 ; GFX9-NEXT: s_getpc_b64 s[34:35]
8139 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32@rel32@lo+4
8140 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32@rel32@hi+12
8141 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
8142 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
8143 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
8144 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
8145 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8146 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
8147 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8148 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
8149 ; GFX9-NEXT: s_mov_b32 s33, s34
8150 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8151 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8153 ; GFX10-LABEL: test_call_external_void_func_v32i32:
8155 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8156 ; GFX10-NEXT: s_mov_b32 s34, s33
8157 ; GFX10-NEXT: s_mov_b32 s33, s32
8158 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8159 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
8160 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8161 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8162 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
8163 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
8164 ; GFX10-NEXT: v_mov_b32_e32 v32, 0
8165 ; GFX10-NEXT: s_addk_i32 s32, 0x200
8166 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
8167 ; GFX10-NEXT: s_clause 0x7
8168 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[34:35]
8169 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[34:35] offset:16
8170 ; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[34:35] offset:32
8171 ; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[34:35] offset:48
8172 ; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[34:35] offset:64
8173 ; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[34:35] offset:80
8174 ; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96
8175 ; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112
8176 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
8177 ; GFX10-NEXT: s_getpc_b64 s[34:35]
8178 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32@rel32@lo+4
8179 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32@rel32@hi+12
8180 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
8181 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
8182 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
8183 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
8184 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
8185 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8186 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
8187 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8188 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8189 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
8190 ; GFX10-NEXT: s_mov_b32 s33, s34
8191 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8192 ; GFX10-NEXT: s_setpc_b64 s[30:31]
8194 ; GFX11-LABEL: test_call_external_void_func_v32i32:
8196 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8197 ; GFX11-NEXT: s_mov_b32 s0, s33
8198 ; GFX11-NEXT: s_mov_b32 s33, s32
8199 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8200 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
8201 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8202 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
8203 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
8204 ; GFX11-NEXT: v_mov_b32_e32 v28, 0
8205 ; GFX11-NEXT: s_add_i32 s32, s32, 16
8206 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
8207 ; GFX11-NEXT: s_clause 0x7
8208 ; GFX11-NEXT: global_load_b128 v[0:3], v28, s[0:1]
8209 ; GFX11-NEXT: global_load_b128 v[4:7], v28, s[0:1] offset:16
8210 ; GFX11-NEXT: global_load_b128 v[8:11], v28, s[0:1] offset:32
8211 ; GFX11-NEXT: global_load_b128 v[12:15], v28, s[0:1] offset:48
8212 ; GFX11-NEXT: global_load_b128 v[16:19], v28, s[0:1] offset:64
8213 ; GFX11-NEXT: global_load_b128 v[20:23], v28, s[0:1] offset:80
8214 ; GFX11-NEXT: global_load_b128 v[24:27], v28, s[0:1] offset:96
8215 ; GFX11-NEXT: global_load_b128 v[28:31], v28, s[0:1] offset:112
8216 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
8217 ; GFX11-NEXT: s_getpc_b64 s[0:1]
8218 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v32i32@rel32@lo+4
8219 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32@rel32@hi+12
8220 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
8221 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
8222 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
8223 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
8224 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
8225 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
8226 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8227 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
8228 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8229 ; GFX11-NEXT: s_add_i32 s32, s32, -16
8230 ; GFX11-NEXT: s_mov_b32 s33, s0
8231 ; GFX11-NEXT: s_waitcnt vmcnt(0)
8232 ; GFX11-NEXT: s_setpc_b64 s[30:31]
8234 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32:
8235 ; GFX10-SCRATCH: ; %bb.0:
8236 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8237 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
8238 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
8239 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8240 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
8241 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8242 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8243 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
8244 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
8245 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v32, 0
8246 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
8247 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
8248 ; GFX10-SCRATCH-NEXT: s_clause 0x7
8249 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1]
8250 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v32, s[0:1] offset:16
8251 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[8:11], v32, s[0:1] offset:32
8252 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[12:15], v32, s[0:1] offset:48
8253 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v32, s[0:1] offset:64
8254 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:80
8255 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:96
8256 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:112
8257 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
8258 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
8259 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32@rel32@lo+4
8260 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32@rel32@hi+12
8261 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
8262 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
8263 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
8264 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
8265 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
8266 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8267 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
8268 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8269 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8270 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
8271 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
8272 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8273 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
8274 %ptr = load ptr addrspace(1), ptr addrspace(4) undef
8275 %val = load <32 x i32>, ptr addrspace(1) %ptr
8276 call amdgpu_gfx void @external_void_func_v32i32(<32 x i32> %val)
8280 define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 {
8281 ; GFX9-LABEL: test_call_external_void_func_v32i32_i32:
8283 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8284 ; GFX9-NEXT: s_mov_b32 s34, s33
8285 ; GFX9-NEXT: s_mov_b32 s33, s32
8286 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8287 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
8288 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8289 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
8290 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
8291 ; GFX9-NEXT: v_mov_b32_e32 v28, 0
8292 ; GFX9-NEXT: global_load_dword v32, v[0:1], off
8293 ; GFX9-NEXT: s_addk_i32 s32, 0x400
8294 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
8295 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35]
8296 ; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16
8297 ; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[34:35] offset:32
8298 ; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[34:35] offset:48
8299 ; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[34:35] offset:64
8300 ; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[34:35] offset:80
8301 ; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[34:35] offset:96
8302 ; GFX9-NEXT: s_nop 0
8303 ; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112
8304 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
8305 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8306 ; GFX9-NEXT: s_getpc_b64 s[34:35]
8307 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32@rel32@lo+4
8308 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32@rel32@hi+12
8309 ; GFX9-NEXT: s_waitcnt vmcnt(8)
8310 ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32
8311 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
8312 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
8313 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
8314 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
8315 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8316 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
8317 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8318 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
8319 ; GFX9-NEXT: s_mov_b32 s33, s34
8320 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8321 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8323 ; GFX10-LABEL: test_call_external_void_func_v32i32_i32:
8325 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8326 ; GFX10-NEXT: s_mov_b32 s34, s33
8327 ; GFX10-NEXT: s_mov_b32 s33, s32
8328 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8329 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
8330 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8331 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8332 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
8333 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
8334 ; GFX10-NEXT: v_mov_b32_e32 v32, 0
8335 ; GFX10-NEXT: s_addk_i32 s32, 0x200
8336 ; GFX10-NEXT: global_load_dword v33, v[0:1], off
8337 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
8338 ; GFX10-NEXT: s_clause 0x7
8339 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[34:35]
8340 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[34:35] offset:16
8341 ; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[34:35] offset:32
8342 ; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[34:35] offset:48
8343 ; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[34:35] offset:64
8344 ; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[34:35] offset:80
8345 ; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96
8346 ; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112
8347 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
8348 ; GFX10-NEXT: s_getpc_b64 s[34:35]
8349 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32@rel32@lo+4
8350 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32@rel32@hi+12
8351 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
8352 ; GFX10-NEXT: s_waitcnt vmcnt(8)
8353 ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32
8354 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
8355 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
8356 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
8357 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
8358 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8359 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
8360 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8361 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8362 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
8363 ; GFX10-NEXT: s_mov_b32 s33, s34
8364 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8365 ; GFX10-NEXT: s_setpc_b64 s[30:31]
8367 ; GFX11-LABEL: test_call_external_void_func_v32i32_i32:
8369 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8370 ; GFX11-NEXT: s_mov_b32 s0, s33
8371 ; GFX11-NEXT: s_mov_b32 s33, s32
8372 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8373 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
8374 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8375 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
8376 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
8377 ; GFX11-NEXT: v_mov_b32_e32 v28, 0
8378 ; GFX11-NEXT: s_add_i32 s32, s32, 16
8379 ; GFX11-NEXT: global_load_b32 v32, v[0:1], off
8380 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
8381 ; GFX11-NEXT: s_clause 0x7
8382 ; GFX11-NEXT: global_load_b128 v[0:3], v28, s[0:1]
8383 ; GFX11-NEXT: global_load_b128 v[4:7], v28, s[0:1] offset:16
8384 ; GFX11-NEXT: global_load_b128 v[8:11], v28, s[0:1] offset:32
8385 ; GFX11-NEXT: global_load_b128 v[12:15], v28, s[0:1] offset:48
8386 ; GFX11-NEXT: global_load_b128 v[16:19], v28, s[0:1] offset:64
8387 ; GFX11-NEXT: global_load_b128 v[20:23], v28, s[0:1] offset:80
8388 ; GFX11-NEXT: global_load_b128 v[24:27], v28, s[0:1] offset:96
8389 ; GFX11-NEXT: global_load_b128 v[28:31], v28, s[0:1] offset:112
8390 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
8391 ; GFX11-NEXT: s_getpc_b64 s[0:1]
8392 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_i32@rel32@lo+4
8393 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_i32@rel32@hi+12
8394 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
8395 ; GFX11-NEXT: s_waitcnt vmcnt(8)
8396 ; GFX11-NEXT: scratch_store_b32 off, v32, s32
8397 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
8398 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
8399 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
8400 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
8401 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8402 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
8403 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8404 ; GFX11-NEXT: s_add_i32 s32, s32, -16
8405 ; GFX11-NEXT: s_mov_b32 s33, s0
8406 ; GFX11-NEXT: s_waitcnt vmcnt(0)
8407 ; GFX11-NEXT: s_setpc_b64 s[30:31]
8409 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_i32:
8410 ; GFX10-SCRATCH: ; %bb.0:
8411 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8412 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
8413 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
8414 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8415 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
8416 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8417 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8418 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
8419 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
8420 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v32, 0
8421 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
8422 ; GFX10-SCRATCH-NEXT: global_load_dword v33, v[0:1], off
8423 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
8424 ; GFX10-SCRATCH-NEXT: s_clause 0x7
8425 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1]
8426 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v32, s[0:1] offset:16
8427 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[8:11], v32, s[0:1] offset:32
8428 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[12:15], v32, s[0:1] offset:48
8429 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v32, s[0:1] offset:64
8430 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:80
8431 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:96
8432 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:112
8433 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
8434 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
8435 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_i32@rel32@lo+4
8436 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_i32@rel32@hi+12
8437 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
8438 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(8)
8439 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v33, s32
8440 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
8441 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
8442 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
8443 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
8444 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8445 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
8446 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8447 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8448 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
8449 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
8450 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8451 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
8452 %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
8453 %val0 = load <32 x i32>, ptr addrspace(1) %ptr0
8454 %val1 = load i32, ptr addrspace(1) undef
8455 call amdgpu_gfx void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1)
8459 define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {
8460 ; GFX9-LABEL: test_call_external_i32_func_i32_imm:
8462 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8463 ; GFX9-NEXT: s_mov_b32 s34, s33
8464 ; GFX9-NEXT: s_mov_b32 s33, s32
8465 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8466 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
8467 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8468 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
8469 ; GFX9-NEXT: s_addk_i32 s32, 0x400
8470 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
8471 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
8472 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
8473 ; GFX9-NEXT: v_mov_b32_e32 v41, v0
8474 ; GFX9-NEXT: v_mov_b32_e32 v0, 42
8475 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8476 ; GFX9-NEXT: v_mov_b32_e32 v42, v1
8477 ; GFX9-NEXT: s_getpc_b64 s[34:35]
8478 ; GFX9-NEXT: s_add_u32 s34, s34, external_i32_func_i32@rel32@lo+4
8479 ; GFX9-NEXT: s_addc_u32 s35, s35, external_i32_func_i32@rel32@hi+12
8480 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
8481 ; GFX9-NEXT: global_store_dword v[41:42], v0, off
8482 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8483 ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
8484 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
8485 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
8486 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
8487 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
8488 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8489 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
8490 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8491 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
8492 ; GFX9-NEXT: s_mov_b32 s33, s34
8493 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8494 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8496 ; GFX10-LABEL: test_call_external_i32_func_i32_imm:
8498 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8499 ; GFX10-NEXT: s_mov_b32 s34, s33
8500 ; GFX10-NEXT: s_mov_b32 s33, s32
8501 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8502 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
8503 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8504 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8505 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
8506 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
8507 ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
8508 ; GFX10-NEXT: v_mov_b32_e32 v41, v0
8509 ; GFX10-NEXT: v_mov_b32_e32 v0, 42
8510 ; GFX10-NEXT: s_addk_i32 s32, 0x200
8511 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
8512 ; GFX10-NEXT: v_mov_b32_e32 v42, v1
8513 ; GFX10-NEXT: s_getpc_b64 s[34:35]
8514 ; GFX10-NEXT: s_add_u32 s34, s34, external_i32_func_i32@rel32@lo+4
8515 ; GFX10-NEXT: s_addc_u32 s35, s35, external_i32_func_i32@rel32@hi+12
8516 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
8517 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
8518 ; GFX10-NEXT: global_store_dword v[41:42], v0, off
8519 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
8520 ; GFX10-NEXT: s_clause 0x1
8521 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33
8522 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
8523 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
8524 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
8525 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
8526 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8527 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
8528 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8529 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8530 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
8531 ; GFX10-NEXT: s_mov_b32 s33, s34
8532 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8533 ; GFX10-NEXT: s_setpc_b64 s[30:31]
8535 ; GFX11-LABEL: test_call_external_i32_func_i32_imm:
8537 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8538 ; GFX11-NEXT: s_mov_b32 s0, s33
8539 ; GFX11-NEXT: s_mov_b32 s33, s32
8540 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8541 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill
8542 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8543 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
8544 ; GFX11-NEXT: s_clause 0x1
8545 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
8546 ; GFX11-NEXT: scratch_store_b32 off, v42, s33
8547 ; GFX11-NEXT: v_dual_mov_b32 v42, v1 :: v_dual_mov_b32 v41, v0
8548 ; GFX11-NEXT: v_mov_b32_e32 v0, 42
8549 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
8550 ; GFX11-NEXT: s_add_i32 s32, s32, 16
8551 ; GFX11-NEXT: s_getpc_b64 s[0:1]
8552 ; GFX11-NEXT: s_add_u32 s0, s0, external_i32_func_i32@rel32@lo+4
8553 ; GFX11-NEXT: s_addc_u32 s1, s1, external_i32_func_i32@rel32@hi+12
8554 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
8555 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
8556 ; GFX11-NEXT: global_store_b32 v[41:42], v0, off dlc
8557 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
8558 ; GFX11-NEXT: s_clause 0x1
8559 ; GFX11-NEXT: scratch_load_b32 v42, off, s33
8560 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
8561 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
8562 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
8563 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
8564 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8565 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload
8566 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8567 ; GFX11-NEXT: s_add_i32 s32, s32, -16
8568 ; GFX11-NEXT: s_mov_b32 s33, s0
8569 ; GFX11-NEXT: s_waitcnt vmcnt(0)
8570 ; GFX11-NEXT: s_setpc_b64 s[30:31]
8572 ; GFX10-SCRATCH-LABEL: test_call_external_i32_func_i32_imm:
8573 ; GFX10-SCRATCH: ; %bb.0:
8574 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8575 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
8576 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
8577 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8578 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill
8579 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8580 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8581 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
8582 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill
8583 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 ; 4-byte Folded Spill
8584 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, v0
8585 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 42
8586 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
8587 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
8588 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, v1
8589 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
8590 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_i32_func_i32@rel32@lo+4
8591 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_i32_func_i32@rel32@hi+12
8592 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
8593 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
8594 ; GFX10-SCRATCH-NEXT: global_store_dword v[41:42], v0, off
8595 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
8596 ; GFX10-SCRATCH-NEXT: s_clause 0x1
8597 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33
8598 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4
8599 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
8600 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
8601 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
8602 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8603 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload
8604 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8605 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8606 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
8607 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
8608 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8609 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
8610 %val = call amdgpu_gfx i32 @external_i32_func_i32(i32 42)
8611 store volatile i32 %val, ptr addrspace(1) %out
8615 define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 {
8616 ; GFX9-LABEL: test_call_external_void_func_struct_i8_i32:
8618 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8619 ; GFX9-NEXT: s_mov_b32 s34, s33
8620 ; GFX9-NEXT: s_mov_b32 s33, s32
8621 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8622 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
8623 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8624 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
8625 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
8626 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
8627 ; GFX9-NEXT: s_addk_i32 s32, 0x400
8628 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
8629 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
8630 ; GFX9-NEXT: global_load_ubyte v0, v2, s[34:35]
8631 ; GFX9-NEXT: global_load_dword v1, v2, s[34:35] offset:4
8632 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8633 ; GFX9-NEXT: s_getpc_b64 s[34:35]
8634 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_struct_i8_i32@rel32@lo+4
8635 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_struct_i8_i32@rel32@hi+12
8636 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
8637 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
8638 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
8639 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
8640 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8641 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
8642 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8643 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
8644 ; GFX9-NEXT: s_mov_b32 s33, s34
8645 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8646 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8648 ; GFX10-LABEL: test_call_external_void_func_struct_i8_i32:
8650 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8651 ; GFX10-NEXT: s_mov_b32 s34, s33
8652 ; GFX10-NEXT: s_mov_b32 s33, s32
8653 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8654 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
8655 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8656 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8657 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
8658 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
8659 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
8660 ; GFX10-NEXT: s_addk_i32 s32, 0x200
8661 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
8662 ; GFX10-NEXT: s_clause 0x1
8663 ; GFX10-NEXT: global_load_ubyte v0, v2, s[34:35]
8664 ; GFX10-NEXT: global_load_dword v1, v2, s[34:35] offset:4
8665 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
8666 ; GFX10-NEXT: s_getpc_b64 s[34:35]
8667 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_struct_i8_i32@rel32@lo+4
8668 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_struct_i8_i32@rel32@hi+12
8669 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
8670 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
8671 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
8672 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
8673 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
8674 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8675 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
8676 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8677 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8678 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
8679 ; GFX10-NEXT: s_mov_b32 s33, s34
8680 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8681 ; GFX10-NEXT: s_setpc_b64 s[30:31]
8683 ; GFX11-LABEL: test_call_external_void_func_struct_i8_i32:
8685 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8686 ; GFX11-NEXT: s_mov_b32 s0, s33
8687 ; GFX11-NEXT: s_mov_b32 s33, s32
8688 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8689 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
8690 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8691 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
8692 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
8693 ; GFX11-NEXT: v_mov_b32_e32 v1, 0
8694 ; GFX11-NEXT: s_add_i32 s32, s32, 16
8695 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
8696 ; GFX11-NEXT: s_clause 0x1
8697 ; GFX11-NEXT: global_load_u8 v0, v1, s[0:1]
8698 ; GFX11-NEXT: global_load_b32 v1, v1, s[0:1] offset:4
8699 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
8700 ; GFX11-NEXT: s_getpc_b64 s[0:1]
8701 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_struct_i8_i32@rel32@lo+4
8702 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_struct_i8_i32@rel32@hi+12
8703 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
8704 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
8705 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
8706 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
8707 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
8708 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
8709 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8710 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
8711 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8712 ; GFX11-NEXT: s_add_i32 s32, s32, -16
8713 ; GFX11-NEXT: s_mov_b32 s33, s0
8714 ; GFX11-NEXT: s_waitcnt vmcnt(0)
8715 ; GFX11-NEXT: s_setpc_b64 s[30:31]
8717 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_struct_i8_i32:
8718 ; GFX10-SCRATCH: ; %bb.0:
8719 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8720 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
8721 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
8722 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8723 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
8724 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8725 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8726 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
8727 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
8728 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0
8729 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
8730 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
8731 ; GFX10-SCRATCH-NEXT: s_clause 0x1
8732 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v2, s[0:1]
8733 ; GFX10-SCRATCH-NEXT: global_load_dword v1, v2, s[0:1] offset:4
8734 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
8735 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
8736 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_struct_i8_i32@rel32@lo+4
8737 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_struct_i8_i32@rel32@hi+12
8738 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
8739 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
8740 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
8741 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
8742 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
8743 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8744 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
8745 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8746 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8747 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
8748 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
8749 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8750 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
8751 %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
8752 %val = load { i8, i32 }, ptr addrspace(1) %ptr0
8753 call amdgpu_gfx void @external_void_func_struct_i8_i32({ i8, i32 } %val)
8757 define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 {
8758 ; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32:
8760 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8761 ; GFX9-NEXT: s_mov_b32 s34, s33
8762 ; GFX9-NEXT: s_mov_b32 s33, s32
8763 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8764 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
8765 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8766 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
8767 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
8768 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s33
8769 ; GFX9-NEXT: v_mov_b32_e32 v0, 8
8770 ; GFX9-NEXT: s_addk_i32 s32, 0x400
8771 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
8772 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
8773 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33
8774 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8775 ; GFX9-NEXT: s_getpc_b64 s[34:35]
8776 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_byval_struct_i8_i32@rel32@lo+4
8777 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_byval_struct_i8_i32@rel32@hi+12
8778 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
8779 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
8780 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
8781 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
8782 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8783 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
8784 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8785 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
8786 ; GFX9-NEXT: s_mov_b32 s33, s34
8787 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8788 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8790 ; GFX10-LABEL: test_call_external_void_func_byval_struct_i8_i32:
8792 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8793 ; GFX10-NEXT: s_mov_b32 s34, s33
8794 ; GFX10-NEXT: s_mov_b32 s33, s32
8795 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8796 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
8797 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8798 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8799 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
8800 ; GFX10-NEXT: v_mov_b32_e32 v0, 3
8801 ; GFX10-NEXT: v_mov_b32_e32 v1, 8
8802 ; GFX10-NEXT: s_addk_i32 s32, 0x200
8803 ; GFX10-NEXT: s_getpc_b64 s[34:35]
8804 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_byval_struct_i8_i32@rel32@lo+4
8805 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_byval_struct_i8_i32@rel32@hi+12
8806 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
8807 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33
8808 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4
8809 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33
8810 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
8811 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
8812 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
8813 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
8814 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
8815 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8816 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
8817 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8818 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8819 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
8820 ; GFX10-NEXT: s_mov_b32 s33, s34
8821 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8822 ; GFX10-NEXT: s_setpc_b64 s[30:31]
8824 ; GFX11-LABEL: test_call_external_void_func_byval_struct_i8_i32:
8826 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8827 ; GFX11-NEXT: s_mov_b32 s0, s33
8828 ; GFX11-NEXT: s_mov_b32 s33, s32
8829 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8830 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill
8831 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8832 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
8833 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8
8834 ; GFX11-NEXT: s_add_i32 s32, s32, 16
8835 ; GFX11-NEXT: s_getpc_b64 s[0:1]
8836 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_byval_struct_i8_i32@rel32@lo+4
8837 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_byval_struct_i8_i32@rel32@hi+12
8838 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
8839 ; GFX11-NEXT: s_clause 0x1
8840 ; GFX11-NEXT: scratch_store_b8 off, v0, s33
8841 ; GFX11-NEXT: scratch_store_b32 off, v1, s33 offset:4
8842 ; GFX11-NEXT: v_mov_b32_e32 v0, s33
8843 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
8844 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
8845 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
8846 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
8847 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
8848 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
8849 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8850 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload
8851 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8852 ; GFX11-NEXT: s_add_i32 s32, s32, -16
8853 ; GFX11-NEXT: s_mov_b32 s33, s0
8854 ; GFX11-NEXT: s_waitcnt vmcnt(0)
8855 ; GFX11-NEXT: s_setpc_b64 s[30:31]
8857 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_byval_struct_i8_i32:
8858 ; GFX10-SCRATCH: ; %bb.0:
8859 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8860 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
8861 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
8862 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8863 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill
8864 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8865 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8866 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
8867 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3
8868 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8
8869 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
8870 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
8871 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_byval_struct_i8_i32@rel32@lo+4
8872 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_byval_struct_i8_i32@rel32@hi+12
8873 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
8874 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s33
8875 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v1, s33 offset:4
8876 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s33
8877 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
8878 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
8879 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
8880 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
8881 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
8882 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
8883 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload
8884 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8885 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
8886 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
8887 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
8888 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8889 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
8890 %val = alloca { i8, i32 }, align 4, addrspace(5)
8891 %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 0
8892 %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 1
8893 store i8 3, ptr addrspace(5) %gep0
8894 store i32 8, ptr addrspace(5) %gep1
8895 call amdgpu_gfx void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %val)
8899 define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
8900 ; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
8902 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8903 ; GFX9-NEXT: s_mov_b32 s34, s33
8904 ; GFX9-NEXT: s_mov_b32 s33, s32
8905 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8906 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
8907 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8908 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
8909 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s33
8910 ; GFX9-NEXT: v_mov_b32_e32 v0, 8
8911 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
8912 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
8913 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33
8914 ; GFX9-NEXT: s_addk_i32 s32, 0x800
8915 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
8916 ; GFX9-NEXT: v_add_u32_e32 v0, 8, v0
8917 ; GFX9-NEXT: v_lshrrev_b32_e64 v1, 6, s33
8918 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8919 ; GFX9-NEXT: s_getpc_b64 s[34:35]
8920 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
8921 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
8922 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
8923 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8
8924 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12
8925 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
8926 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
8927 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
8928 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8929 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
8930 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8931 ; GFX9-NEXT: global_store_dword v[0:1], v1, off
8932 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8933 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
8934 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
8935 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
8936 ; GFX9-NEXT: s_addk_i32 s32, 0xf800
8937 ; GFX9-NEXT: s_mov_b32 s33, s34
8938 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8939 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8941 ; GFX10-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
8943 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8944 ; GFX10-NEXT: s_mov_b32 s34, s33
8945 ; GFX10-NEXT: s_mov_b32 s33, s32
8946 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8947 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
8948 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8949 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8950 ; GFX10-NEXT: v_mov_b32_e32 v0, 3
8951 ; GFX10-NEXT: v_mov_b32_e32 v1, 8
8952 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
8953 ; GFX10-NEXT: s_addk_i32 s32, 0x400
8954 ; GFX10-NEXT: s_getpc_b64 s[34:35]
8955 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
8956 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
8957 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33
8958 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4
8959 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33
8960 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
8961 ; GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s33
8962 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 8, v0
8963 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
8964 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
8965 ; GFX10-NEXT: s_clause 0x1
8966 ; GFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8
8967 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12
8968 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
8969 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
8970 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
8971 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8972 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
8973 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
8974 ; GFX10-NEXT: global_store_dword v[0:1], v1, off
8975 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
8976 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
8977 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
8978 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8979 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
8980 ; GFX10-NEXT: s_addk_i32 s32, 0xfc00
8981 ; GFX10-NEXT: s_mov_b32 s33, s34
8982 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8983 ; GFX10-NEXT: s_setpc_b64 s[30:31]
8985 ; GFX11-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
8987 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8988 ; GFX11-NEXT: s_mov_b32 s0, s33
8989 ; GFX11-NEXT: s_mov_b32 s33, s32
8990 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
8991 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:16 ; 4-byte Folded Spill
8992 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
8993 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
8994 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8
8995 ; GFX11-NEXT: s_add_i32 s32, s32, 32
8996 ; GFX11-NEXT: s_getpc_b64 s[0:1]
8997 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
8998 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
8999 ; GFX11-NEXT: s_add_i32 s2, s33, 8
9000 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
9001 ; GFX11-NEXT: s_clause 0x1
9002 ; GFX11-NEXT: scratch_store_b8 off, v0, s33
9003 ; GFX11-NEXT: scratch_store_b32 off, v1, s33 offset:4
9004 ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s33
9005 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
9006 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
9007 ; GFX11-NEXT: s_clause 0x1
9008 ; GFX11-NEXT: scratch_load_u8 v0, off, s33 offset:8
9009 ; GFX11-NEXT: scratch_load_b32 v1, off, s33 offset:12
9010 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
9011 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
9012 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
9013 ; GFX11-NEXT: s_waitcnt vmcnt(0)
9014 ; GFX11-NEXT: global_store_b8 v[0:1], v0, off dlc
9015 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
9016 ; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
9017 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
9018 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
9019 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:16 ; 4-byte Folded Reload
9020 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
9021 ; GFX11-NEXT: s_addk_i32 s32, 0xffe0
9022 ; GFX11-NEXT: s_mov_b32 s33, s0
9023 ; GFX11-NEXT: s_waitcnt vmcnt(0)
9024 ; GFX11-NEXT: s_setpc_b64 s[30:31]
9026 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
9027 ; GFX10-SCRATCH: ; %bb.0:
9028 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9029 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
9030 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
9031 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
9032 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:16 ; 4-byte Folded Spill
9033 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9034 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
9035 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
9036 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3
9037 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32
9038 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8
9039 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
9040 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
9041 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
9042 ; GFX10-SCRATCH-NEXT: s_add_i32 s2, s33, 8
9043 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
9044 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s33
9045 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v1, s33 offset:4
9046 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s2
9047 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s33
9048 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
9049 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
9050 ; GFX10-SCRATCH-NEXT: s_clause 0x1
9051 ; GFX10-SCRATCH-NEXT: scratch_load_ubyte v0, off, s33 offset:8
9052 ; GFX10-SCRATCH-NEXT: scratch_load_dword v1, off, s33 offset:12
9053 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
9054 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
9055 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
9056 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
9057 ; GFX10-SCRATCH-NEXT: global_store_byte v[0:1], v0, off
9058 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
9059 ; GFX10-SCRATCH-NEXT: global_store_dword v[0:1], v1, off
9060 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
9061 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
9062 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:16 ; 4-byte Folded Reload
9063 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9064 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
9065 ; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0
9066 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
9067 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
9068 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
9069 %in.val = alloca { i8, i32 }, align 4, addrspace(5)
9070 %out.val = alloca { i8, i32 }, align 4, addrspace(5)
9071 %in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0
9072 %in.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 1
9073 store i8 3, ptr addrspace(5) %in.gep0
9074 store i32 8, ptr addrspace(5) %in.gep1
9075 call amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %out.val, ptr addrspace(5) byval({ i8, i32 }) %in.val)
9076 %out.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 0
9077 %out.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 1
9078 %out.val0 = load i8, ptr addrspace(5) %out.gep0
9079 %out.val1 = load i32, ptr addrspace(5) %out.gep1
9081 store volatile i8 %out.val0, ptr addrspace(1) undef
9082 store volatile i32 %out.val1, ptr addrspace(1) undef
9086 define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 {
9087 ; GFX9-LABEL: test_call_external_void_func_v16i8:
9089 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9090 ; GFX9-NEXT: s_mov_b32 s34, s33
9091 ; GFX9-NEXT: s_mov_b32 s33, s32
9092 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
9093 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
9094 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
9095 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
9096 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
9097 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
9098 ; GFX9-NEXT: s_addk_i32 s32, 0x400
9099 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
9100 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
9101 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
9102 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35]
9103 ; GFX9-NEXT: s_getpc_b64 s[34:35]
9104 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i8@rel32@lo+4
9105 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i8@rel32@hi+12
9106 ; GFX9-NEXT: s_waitcnt vmcnt(0)
9107 ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0
9108 ; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v0
9109 ; GFX9-NEXT: v_lshrrev_b32_e32 v18, 24, v0
9110 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1
9111 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1
9112 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1
9113 ; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v2
9114 ; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v2
9115 ; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2
9116 ; GFX9-NEXT: v_lshrrev_b32_e32 v13, 8, v3
9117 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v3
9118 ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 24, v3
9119 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
9120 ; GFX9-NEXT: v_mov_b32_e32 v8, v2
9121 ; GFX9-NEXT: v_mov_b32_e32 v12, v3
9122 ; GFX9-NEXT: v_mov_b32_e32 v1, v16
9123 ; GFX9-NEXT: v_mov_b32_e32 v2, v17
9124 ; GFX9-NEXT: v_mov_b32_e32 v3, v18
9125 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
9126 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
9127 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
9128 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
9129 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
9130 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
9131 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
9132 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
9133 ; GFX9-NEXT: s_mov_b32 s33, s34
9134 ; GFX9-NEXT: s_waitcnt vmcnt(0)
9135 ; GFX9-NEXT: s_setpc_b64 s[30:31]
9137 ; GFX10-LABEL: test_call_external_void_func_v16i8:
9139 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9140 ; GFX10-NEXT: s_mov_b32 s34, s33
9141 ; GFX10-NEXT: s_mov_b32 s33, s32
9142 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
9143 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
9144 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9145 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
9146 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
9147 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
9148 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
9149 ; GFX10-NEXT: s_addk_i32 s32, 0x200
9150 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
9151 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
9152 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
9153 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35]
9154 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9155 ; GFX10-NEXT: s_getpc_b64 s[34:35]
9156 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i8@rel32@lo+4
9157 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i8@rel32@hi+12
9158 ; GFX10-NEXT: s_waitcnt vmcnt(0)
9159 ; GFX10-NEXT: v_lshrrev_b32_e32 v16, 8, v0
9160 ; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v0
9161 ; GFX10-NEXT: v_lshrrev_b32_e32 v18, 24, v0
9162 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
9163 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v1
9164 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1
9165 ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 8, v2
9166 ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2
9167 ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v2
9168 ; GFX10-NEXT: v_lshrrev_b32_e32 v13, 8, v3
9169 ; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v3
9170 ; GFX10-NEXT: v_lshrrev_b32_e32 v15, 24, v3
9171 ; GFX10-NEXT: v_mov_b32_e32 v4, v1
9172 ; GFX10-NEXT: v_mov_b32_e32 v8, v2
9173 ; GFX10-NEXT: v_mov_b32_e32 v12, v3
9174 ; GFX10-NEXT: v_mov_b32_e32 v1, v16
9175 ; GFX10-NEXT: v_mov_b32_e32 v2, v17
9176 ; GFX10-NEXT: v_mov_b32_e32 v3, v18
9177 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
9178 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
9179 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
9180 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
9181 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
9182 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
9183 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9184 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
9185 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
9186 ; GFX10-NEXT: s_mov_b32 s33, s34
9187 ; GFX10-NEXT: s_waitcnt vmcnt(0)
9188 ; GFX10-NEXT: s_setpc_b64 s[30:31]
9190 ; GFX11-LABEL: test_call_external_void_func_v16i8:
9192 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9193 ; GFX11-NEXT: s_mov_b32 s0, s33
9194 ; GFX11-NEXT: s_mov_b32 s33, s32
9195 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
9196 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
9197 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
9198 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
9199 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
9200 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
9201 ; GFX11-NEXT: s_add_i32 s32, s32, 16
9202 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
9203 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
9204 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
9205 ; GFX11-NEXT: global_load_b128 v[0:3], v0, s[0:1]
9206 ; GFX11-NEXT: s_getpc_b64 s[0:1]
9207 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v16i8@rel32@lo+4
9208 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v16i8@rel32@hi+12
9209 ; GFX11-NEXT: s_waitcnt vmcnt(0)
9210 ; GFX11-NEXT: v_lshrrev_b32_e32 v16, 8, v0
9211 ; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v0
9212 ; GFX11-NEXT: v_lshrrev_b32_e32 v18, 24, v0
9213 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1
9214 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1
9215 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1
9216 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 8, v2
9217 ; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2
9218 ; GFX11-NEXT: v_lshrrev_b32_e32 v11, 24, v2
9219 ; GFX11-NEXT: v_lshrrev_b32_e32 v13, 8, v3
9220 ; GFX11-NEXT: v_lshrrev_b32_e32 v14, 16, v3
9221 ; GFX11-NEXT: v_lshrrev_b32_e32 v15, 24, v3
9222 ; GFX11-NEXT: v_mov_b32_e32 v4, v1
9223 ; GFX11-NEXT: v_mov_b32_e32 v8, v2
9224 ; GFX11-NEXT: v_dual_mov_b32 v12, v3 :: v_dual_mov_b32 v3, v18
9225 ; GFX11-NEXT: v_dual_mov_b32 v1, v16 :: v_dual_mov_b32 v2, v17
9226 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
9227 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
9228 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
9229 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
9230 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
9231 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
9232 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
9233 ; GFX11-NEXT: s_add_i32 s32, s32, -16
9234 ; GFX11-NEXT: s_mov_b32 s33, s0
9235 ; GFX11-NEXT: s_waitcnt vmcnt(0)
9236 ; GFX11-NEXT: s_setpc_b64 s[30:31]
9238 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i8:
9239 ; GFX10-SCRATCH: ; %bb.0:
9240 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9241 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
9242 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
9243 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
9244 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
9245 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9246 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
9247 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
9248 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
9249 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
9250 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
9251 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
9252 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
9253 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
9254 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
9255 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9256 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
9257 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v16i8@rel32@lo+4
9258 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i8@rel32@hi+12
9259 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
9260 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v16, 8, v0
9261 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v17, 16, v0
9262 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v18, 24, v0
9263 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v5, 8, v1
9264 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v6, 16, v1
9265 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v7, 24, v1
9266 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v9, 8, v2
9267 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v10, 16, v2
9268 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v11, 24, v2
9269 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v13, 8, v3
9270 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v14, 16, v3
9271 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v15, 24, v3
9272 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v1
9273 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, v2
9274 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, v3
9275 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, v16
9276 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, v17
9277 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, v18
9278 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
9279 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
9280 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
9281 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
9282 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
9283 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
9284 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9285 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
9286 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
9287 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
9288 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
9289 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
9290 %ptr = load ptr addrspace(1), ptr addrspace(4) undef
9291 %val = load <16 x i8>, ptr addrspace(1) %ptr
9292 call amdgpu_gfx void @external_void_func_v16i8(<16 x i8> %val)
9296 define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
9297 ; GFX9-LABEL: tail_call_byval_align16:
9298 ; GFX9: ; %bb.0: ; %entry
9299 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9300 ; GFX9-NEXT: s_mov_b32 s6, s33
9301 ; GFX9-NEXT: s_mov_b32 s33, s32
9302 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
9303 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
9304 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
9305 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:20
9306 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:16
9307 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s33
9308 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
9309 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
9310 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
9311 ; GFX9-NEXT: v_writelane_b32 v40, s35, 3
9312 ; GFX9-NEXT: v_writelane_b32 v40, s36, 4
9313 ; GFX9-NEXT: v_writelane_b32 v40, s37, 5
9314 ; GFX9-NEXT: v_writelane_b32 v40, s38, 6
9315 ; GFX9-NEXT: v_writelane_b32 v40, s39, 7
9316 ; GFX9-NEXT: v_writelane_b32 v40, s40, 8
9317 ; GFX9-NEXT: v_writelane_b32 v40, s41, 9
9318 ; GFX9-NEXT: v_writelane_b32 v40, s42, 10
9319 ; GFX9-NEXT: v_writelane_b32 v40, s43, 11
9320 ; GFX9-NEXT: v_writelane_b32 v40, s44, 12
9321 ; GFX9-NEXT: v_writelane_b32 v40, s45, 13
9322 ; GFX9-NEXT: v_writelane_b32 v40, s46, 14
9323 ; GFX9-NEXT: v_writelane_b32 v40, s47, 15
9324 ; GFX9-NEXT: v_writelane_b32 v40, s48, 16
9325 ; GFX9-NEXT: v_writelane_b32 v40, s49, 17
9326 ; GFX9-NEXT: v_writelane_b32 v40, s50, 18
9327 ; GFX9-NEXT: v_writelane_b32 v40, s51, 19
9328 ; GFX9-NEXT: v_writelane_b32 v40, s52, 20
9329 ; GFX9-NEXT: v_writelane_b32 v40, s53, 21
9330 ; GFX9-NEXT: v_writelane_b32 v40, s54, 22
9331 ; GFX9-NEXT: v_writelane_b32 v40, s55, 23
9332 ; GFX9-NEXT: v_writelane_b32 v40, s56, 24
9333 ; GFX9-NEXT: v_writelane_b32 v40, s57, 25
9334 ; GFX9-NEXT: v_writelane_b32 v40, s58, 26
9335 ; GFX9-NEXT: v_writelane_b32 v40, s59, 27
9336 ; GFX9-NEXT: v_writelane_b32 v40, s60, 28
9337 ; GFX9-NEXT: v_writelane_b32 v40, s61, 29
9338 ; GFX9-NEXT: s_addk_i32 s32, 0x800
9339 ; GFX9-NEXT: v_writelane_b32 v40, s62, 30
9340 ; GFX9-NEXT: v_writelane_b32 v40, s63, 31
9341 ; GFX9-NEXT: s_getpc_b64 s[4:5]
9342 ; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
9343 ; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
9344 ; GFX9-NEXT: s_waitcnt vmcnt(2)
9345 ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
9346 ; GFX9-NEXT: s_waitcnt vmcnt(2)
9347 ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32
9348 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
9349 ; GFX9-NEXT: v_readlane_b32 s63, v40, 31
9350 ; GFX9-NEXT: v_readlane_b32 s62, v40, 30
9351 ; GFX9-NEXT: v_readlane_b32 s61, v40, 29
9352 ; GFX9-NEXT: v_readlane_b32 s60, v40, 28
9353 ; GFX9-NEXT: v_readlane_b32 s59, v40, 27
9354 ; GFX9-NEXT: v_readlane_b32 s58, v40, 26
9355 ; GFX9-NEXT: v_readlane_b32 s57, v40, 25
9356 ; GFX9-NEXT: v_readlane_b32 s56, v40, 24
9357 ; GFX9-NEXT: v_readlane_b32 s55, v40, 23
9358 ; GFX9-NEXT: v_readlane_b32 s54, v40, 22
9359 ; GFX9-NEXT: v_readlane_b32 s53, v40, 21
9360 ; GFX9-NEXT: v_readlane_b32 s52, v40, 20
9361 ; GFX9-NEXT: v_readlane_b32 s51, v40, 19
9362 ; GFX9-NEXT: v_readlane_b32 s50, v40, 18
9363 ; GFX9-NEXT: v_readlane_b32 s49, v40, 17
9364 ; GFX9-NEXT: v_readlane_b32 s48, v40, 16
9365 ; GFX9-NEXT: v_readlane_b32 s47, v40, 15
9366 ; GFX9-NEXT: v_readlane_b32 s46, v40, 14
9367 ; GFX9-NEXT: v_readlane_b32 s45, v40, 13
9368 ; GFX9-NEXT: v_readlane_b32 s44, v40, 12
9369 ; GFX9-NEXT: v_readlane_b32 s43, v40, 11
9370 ; GFX9-NEXT: v_readlane_b32 s42, v40, 10
9371 ; GFX9-NEXT: v_readlane_b32 s41, v40, 9
9372 ; GFX9-NEXT: v_readlane_b32 s40, v40, 8
9373 ; GFX9-NEXT: v_readlane_b32 s39, v40, 7
9374 ; GFX9-NEXT: v_readlane_b32 s38, v40, 6
9375 ; GFX9-NEXT: v_readlane_b32 s37, v40, 5
9376 ; GFX9-NEXT: v_readlane_b32 s36, v40, 4
9377 ; GFX9-NEXT: v_readlane_b32 s35, v40, 3
9378 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
9379 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
9380 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
9381 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
9382 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
9383 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
9384 ; GFX9-NEXT: s_addk_i32 s32, 0xf800
9385 ; GFX9-NEXT: s_mov_b32 s33, s6
9386 ; GFX9-NEXT: s_waitcnt vmcnt(0)
9387 ; GFX9-NEXT: s_setpc_b64 s[30:31]
9389 ; GFX10-LABEL: tail_call_byval_align16:
9390 ; GFX10: ; %bb.0: ; %entry
9391 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9392 ; GFX10-NEXT: s_mov_b32 s6, s33
9393 ; GFX10-NEXT: s_mov_b32 s33, s32
9394 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
9395 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
9396 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9397 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
9398 ; GFX10-NEXT: s_clause 0x2
9399 ; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:20
9400 ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:16
9401 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s33
9402 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
9403 ; GFX10-NEXT: s_addk_i32 s32, 0x400
9404 ; GFX10-NEXT: s_getpc_b64 s[4:5]
9405 ; GFX10-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
9406 ; GFX10-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
9407 ; GFX10-NEXT: s_waitcnt vmcnt(2)
9408 ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
9409 ; GFX10-NEXT: s_waitcnt vmcnt(1)
9410 ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32
9411 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
9412 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
9413 ; GFX10-NEXT: v_writelane_b32 v40, s35, 3
9414 ; GFX10-NEXT: v_writelane_b32 v40, s36, 4
9415 ; GFX10-NEXT: v_writelane_b32 v40, s37, 5
9416 ; GFX10-NEXT: v_writelane_b32 v40, s38, 6
9417 ; GFX10-NEXT: v_writelane_b32 v40, s39, 7
9418 ; GFX10-NEXT: v_writelane_b32 v40, s40, 8
9419 ; GFX10-NEXT: v_writelane_b32 v40, s41, 9
9420 ; GFX10-NEXT: v_writelane_b32 v40, s42, 10
9421 ; GFX10-NEXT: v_writelane_b32 v40, s43, 11
9422 ; GFX10-NEXT: v_writelane_b32 v40, s44, 12
9423 ; GFX10-NEXT: v_writelane_b32 v40, s45, 13
9424 ; GFX10-NEXT: v_writelane_b32 v40, s46, 14
9425 ; GFX10-NEXT: v_writelane_b32 v40, s47, 15
9426 ; GFX10-NEXT: v_writelane_b32 v40, s48, 16
9427 ; GFX10-NEXT: v_writelane_b32 v40, s49, 17
9428 ; GFX10-NEXT: v_writelane_b32 v40, s50, 18
9429 ; GFX10-NEXT: v_writelane_b32 v40, s51, 19
9430 ; GFX10-NEXT: v_writelane_b32 v40, s52, 20
9431 ; GFX10-NEXT: v_writelane_b32 v40, s53, 21
9432 ; GFX10-NEXT: v_writelane_b32 v40, s54, 22
9433 ; GFX10-NEXT: v_writelane_b32 v40, s55, 23
9434 ; GFX10-NEXT: v_writelane_b32 v40, s56, 24
9435 ; GFX10-NEXT: v_writelane_b32 v40, s57, 25
9436 ; GFX10-NEXT: v_writelane_b32 v40, s58, 26
9437 ; GFX10-NEXT: v_writelane_b32 v40, s59, 27
9438 ; GFX10-NEXT: v_writelane_b32 v40, s60, 28
9439 ; GFX10-NEXT: v_writelane_b32 v40, s61, 29
9440 ; GFX10-NEXT: v_writelane_b32 v40, s62, 30
9441 ; GFX10-NEXT: v_writelane_b32 v40, s63, 31
9442 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
9443 ; GFX10-NEXT: v_readlane_b32 s63, v40, 31
9444 ; GFX10-NEXT: v_readlane_b32 s62, v40, 30
9445 ; GFX10-NEXT: v_readlane_b32 s61, v40, 29
9446 ; GFX10-NEXT: v_readlane_b32 s60, v40, 28
9447 ; GFX10-NEXT: v_readlane_b32 s59, v40, 27
9448 ; GFX10-NEXT: v_readlane_b32 s58, v40, 26
9449 ; GFX10-NEXT: v_readlane_b32 s57, v40, 25
9450 ; GFX10-NEXT: v_readlane_b32 s56, v40, 24
9451 ; GFX10-NEXT: v_readlane_b32 s55, v40, 23
9452 ; GFX10-NEXT: v_readlane_b32 s54, v40, 22
9453 ; GFX10-NEXT: v_readlane_b32 s53, v40, 21
9454 ; GFX10-NEXT: v_readlane_b32 s52, v40, 20
9455 ; GFX10-NEXT: v_readlane_b32 s51, v40, 19
9456 ; GFX10-NEXT: v_readlane_b32 s50, v40, 18
9457 ; GFX10-NEXT: v_readlane_b32 s49, v40, 17
9458 ; GFX10-NEXT: v_readlane_b32 s48, v40, 16
9459 ; GFX10-NEXT: v_readlane_b32 s47, v40, 15
9460 ; GFX10-NEXT: v_readlane_b32 s46, v40, 14
9461 ; GFX10-NEXT: v_readlane_b32 s45, v40, 13
9462 ; GFX10-NEXT: v_readlane_b32 s44, v40, 12
9463 ; GFX10-NEXT: v_readlane_b32 s43, v40, 11
9464 ; GFX10-NEXT: v_readlane_b32 s42, v40, 10
9465 ; GFX10-NEXT: v_readlane_b32 s41, v40, 9
9466 ; GFX10-NEXT: v_readlane_b32 s40, v40, 8
9467 ; GFX10-NEXT: v_readlane_b32 s39, v40, 7
9468 ; GFX10-NEXT: v_readlane_b32 s38, v40, 6
9469 ; GFX10-NEXT: v_readlane_b32 s37, v40, 5
9470 ; GFX10-NEXT: v_readlane_b32 s36, v40, 4
9471 ; GFX10-NEXT: v_readlane_b32 s35, v40, 3
9472 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
9473 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
9474 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
9475 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
9476 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
9477 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9478 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
9479 ; GFX10-NEXT: s_addk_i32 s32, 0xfc00
9480 ; GFX10-NEXT: s_mov_b32 s33, s6
9481 ; GFX10-NEXT: s_waitcnt vmcnt(0)
9482 ; GFX10-NEXT: s_setpc_b64 s[30:31]
9484 ; GFX11-LABEL: tail_call_byval_align16:
9485 ; GFX11: ; %bb.0: ; %entry
9486 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9487 ; GFX11-NEXT: s_mov_b32 s4, s33
9488 ; GFX11-NEXT: s_mov_b32 s33, s32
9489 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1
9490 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:24 ; 4-byte Folded Spill
9491 ; GFX11-NEXT: s_mov_b32 exec_lo, s0
9492 ; GFX11-NEXT: s_clause 0x1
9493 ; GFX11-NEXT: scratch_load_b64 v[32:33], off, s33 offset:16
9494 ; GFX11-NEXT: scratch_load_b32 v31, off, s33
9495 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
9496 ; GFX11-NEXT: s_add_i32 s32, s32, 32
9497 ; GFX11-NEXT: s_getpc_b64 s[0:1]
9498 ; GFX11-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4
9499 ; GFX11-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12
9500 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
9501 ; GFX11-NEXT: v_writelane_b32 v40, s34, 2
9502 ; GFX11-NEXT: v_writelane_b32 v40, s35, 3
9503 ; GFX11-NEXT: v_writelane_b32 v40, s36, 4
9504 ; GFX11-NEXT: v_writelane_b32 v40, s37, 5
9505 ; GFX11-NEXT: v_writelane_b32 v40, s38, 6
9506 ; GFX11-NEXT: v_writelane_b32 v40, s39, 7
9507 ; GFX11-NEXT: v_writelane_b32 v40, s40, 8
9508 ; GFX11-NEXT: v_writelane_b32 v40, s41, 9
9509 ; GFX11-NEXT: v_writelane_b32 v40, s42, 10
9510 ; GFX11-NEXT: v_writelane_b32 v40, s43, 11
9511 ; GFX11-NEXT: v_writelane_b32 v40, s44, 12
9512 ; GFX11-NEXT: v_writelane_b32 v40, s45, 13
9513 ; GFX11-NEXT: v_writelane_b32 v40, s46, 14
9514 ; GFX11-NEXT: v_writelane_b32 v40, s47, 15
9515 ; GFX11-NEXT: v_writelane_b32 v40, s48, 16
9516 ; GFX11-NEXT: v_writelane_b32 v40, s49, 17
9517 ; GFX11-NEXT: v_writelane_b32 v40, s50, 18
9518 ; GFX11-NEXT: v_writelane_b32 v40, s51, 19
9519 ; GFX11-NEXT: v_writelane_b32 v40, s52, 20
9520 ; GFX11-NEXT: v_writelane_b32 v40, s53, 21
9521 ; GFX11-NEXT: v_writelane_b32 v40, s54, 22
9522 ; GFX11-NEXT: v_writelane_b32 v40, s55, 23
9523 ; GFX11-NEXT: v_writelane_b32 v40, s56, 24
9524 ; GFX11-NEXT: v_writelane_b32 v40, s57, 25
9525 ; GFX11-NEXT: v_writelane_b32 v40, s58, 26
9526 ; GFX11-NEXT: v_writelane_b32 v40, s59, 27
9527 ; GFX11-NEXT: v_writelane_b32 v40, s60, 28
9528 ; GFX11-NEXT: v_writelane_b32 v40, s61, 29
9529 ; GFX11-NEXT: v_writelane_b32 v40, s62, 30
9530 ; GFX11-NEXT: v_writelane_b32 v40, s63, 31
9531 ; GFX11-NEXT: s_waitcnt vmcnt(1)
9532 ; GFX11-NEXT: scratch_store_b64 off, v[32:33], s32
9533 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
9534 ; GFX11-NEXT: v_readlane_b32 s63, v40, 31
9535 ; GFX11-NEXT: v_readlane_b32 s62, v40, 30
9536 ; GFX11-NEXT: v_readlane_b32 s61, v40, 29
9537 ; GFX11-NEXT: v_readlane_b32 s60, v40, 28
9538 ; GFX11-NEXT: v_readlane_b32 s59, v40, 27
9539 ; GFX11-NEXT: v_readlane_b32 s58, v40, 26
9540 ; GFX11-NEXT: v_readlane_b32 s57, v40, 25
9541 ; GFX11-NEXT: v_readlane_b32 s56, v40, 24
9542 ; GFX11-NEXT: v_readlane_b32 s55, v40, 23
9543 ; GFX11-NEXT: v_readlane_b32 s54, v40, 22
9544 ; GFX11-NEXT: v_readlane_b32 s53, v40, 21
9545 ; GFX11-NEXT: v_readlane_b32 s52, v40, 20
9546 ; GFX11-NEXT: v_readlane_b32 s51, v40, 19
9547 ; GFX11-NEXT: v_readlane_b32 s50, v40, 18
9548 ; GFX11-NEXT: v_readlane_b32 s49, v40, 17
9549 ; GFX11-NEXT: v_readlane_b32 s48, v40, 16
9550 ; GFX11-NEXT: v_readlane_b32 s47, v40, 15
9551 ; GFX11-NEXT: v_readlane_b32 s46, v40, 14
9552 ; GFX11-NEXT: v_readlane_b32 s45, v40, 13
9553 ; GFX11-NEXT: v_readlane_b32 s44, v40, 12
9554 ; GFX11-NEXT: v_readlane_b32 s43, v40, 11
9555 ; GFX11-NEXT: v_readlane_b32 s42, v40, 10
9556 ; GFX11-NEXT: v_readlane_b32 s41, v40, 9
9557 ; GFX11-NEXT: v_readlane_b32 s40, v40, 8
9558 ; GFX11-NEXT: v_readlane_b32 s39, v40, 7
9559 ; GFX11-NEXT: v_readlane_b32 s38, v40, 6
9560 ; GFX11-NEXT: v_readlane_b32 s37, v40, 5
9561 ; GFX11-NEXT: v_readlane_b32 s36, v40, 4
9562 ; GFX11-NEXT: v_readlane_b32 s35, v40, 3
9563 ; GFX11-NEXT: v_readlane_b32 s34, v40, 2
9564 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
9565 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
9566 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1
9567 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:24 ; 4-byte Folded Reload
9568 ; GFX11-NEXT: s_mov_b32 exec_lo, s0
9569 ; GFX11-NEXT: s_addk_i32 s32, 0xffe0
9570 ; GFX11-NEXT: s_mov_b32 s33, s4
9571 ; GFX11-NEXT: s_waitcnt vmcnt(0)
9572 ; GFX11-NEXT: s_setpc_b64 s[30:31]
9574 ; GFX10-SCRATCH-LABEL: tail_call_byval_align16:
9575 ; GFX10-SCRATCH: ; %bb.0: ; %entry
9576 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9577 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, s33
9578 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
9579 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
9580 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:24 ; 4-byte Folded Spill
9581 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9582 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
9583 ; GFX10-SCRATCH-NEXT: s_clause 0x1
9584 ; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 offset:16
9585 ; GFX10-SCRATCH-NEXT: scratch_load_dword v31, off, s33
9586 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
9587 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32
9588 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
9589 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4
9590 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12
9591 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
9592 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s34, 2
9593 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 3
9594 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 4
9595 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 5
9596 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 6
9597 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 7
9598 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 8
9599 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 9
9600 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 10
9601 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 11
9602 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 12
9603 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 13
9604 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 14
9605 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 15
9606 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 16
9607 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 17
9608 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 18
9609 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 19
9610 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 20
9611 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 21
9612 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 22
9613 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 23
9614 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s56, 24
9615 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s57, 25
9616 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s58, 26
9617 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s59, 27
9618 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s60, 28
9619 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s61, 29
9620 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s62, 30
9621 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s63, 31
9622 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1)
9623 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32
9624 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
9625 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s63, v40, 31
9626 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s62, v40, 30
9627 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s61, v40, 29
9628 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s60, v40, 28
9629 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s59, v40, 27
9630 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s58, v40, 26
9631 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s57, v40, 25
9632 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s56, v40, 24
9633 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 23
9634 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 22
9635 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 21
9636 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 20
9637 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 19
9638 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 18
9639 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 17
9640 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 16
9641 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 15
9642 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 14
9643 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 13
9644 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 12
9645 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 11
9646 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 10
9647 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 9
9648 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 8
9649 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 7
9650 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 6
9651 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 5
9652 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 4
9653 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 3
9654 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s34, v40, 2
9655 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
9656 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
9657 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
9658 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:24 ; 4-byte Folded Reload
9659 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9660 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
9661 ; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0
9662 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s4
9663 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
9664 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
9666 %alloca = alloca double, align 8, addrspace(5)
9667 tail call amdgpu_gfx void @byval_align16_f64_arg(<32 x i32> %val, ptr addrspace(5) byval(double) align 16 %alloca)
9671 ; inreg arguments are put in sgprs
9672 define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 {
9673 ; GFX9-LABEL: test_call_external_void_func_i1_imm_inreg:
9675 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9676 ; GFX9-NEXT: s_mov_b32 s34, s33
9677 ; GFX9-NEXT: s_mov_b32 s33, s32
9678 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
9679 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
9680 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
9681 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
9682 ; GFX9-NEXT: s_addk_i32 s32, 0x400
9683 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
9684 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
9685 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
9686 ; GFX9-NEXT: s_getpc_b64 s[34:35]
9687 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_inreg@rel32@lo+4
9688 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_inreg@rel32@hi+12
9689 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32
9690 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
9691 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
9692 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
9693 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
9694 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
9695 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
9696 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
9697 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
9698 ; GFX9-NEXT: s_mov_b32 s33, s34
9699 ; GFX9-NEXT: s_waitcnt vmcnt(0)
9700 ; GFX9-NEXT: s_setpc_b64 s[30:31]
9702 ; GFX10-LABEL: test_call_external_void_func_i1_imm_inreg:
9704 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9705 ; GFX10-NEXT: s_mov_b32 s34, s33
9706 ; GFX10-NEXT: s_mov_b32 s33, s32
9707 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
9708 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
9709 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9710 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
9711 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
9712 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
9713 ; GFX10-NEXT: s_addk_i32 s32, 0x200
9714 ; GFX10-NEXT: s_getpc_b64 s[34:35]
9715 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_inreg@rel32@lo+4
9716 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_inreg@rel32@hi+12
9717 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
9718 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32
9719 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
9720 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
9721 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
9722 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
9723 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
9724 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
9725 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
9726 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9727 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
9728 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
9729 ; GFX10-NEXT: s_mov_b32 s33, s34
9730 ; GFX10-NEXT: s_waitcnt vmcnt(0)
9731 ; GFX10-NEXT: s_setpc_b64 s[30:31]
9733 ; GFX11-LABEL: test_call_external_void_func_i1_imm_inreg:
9735 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9736 ; GFX11-NEXT: s_mov_b32 s0, s33
9737 ; GFX11-NEXT: s_mov_b32 s33, s32
9738 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
9739 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
9740 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
9741 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
9742 ; GFX11-NEXT: v_mov_b32_e32 v0, 1
9743 ; GFX11-NEXT: s_add_i32 s32, s32, 16
9744 ; GFX11-NEXT: s_getpc_b64 s[0:1]
9745 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i1_inreg@rel32@lo+4
9746 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i1_inreg@rel32@hi+12
9747 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
9748 ; GFX11-NEXT: scratch_store_b8 off, v0, s32
9749 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
9750 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
9751 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
9752 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
9753 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
9754 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
9755 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
9756 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
9757 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
9758 ; GFX11-NEXT: s_add_i32 s32, s32, -16
9759 ; GFX11-NEXT: s_mov_b32 s33, s0
9760 ; GFX11-NEXT: s_waitcnt vmcnt(0)
9761 ; GFX11-NEXT: s_setpc_b64 s[30:31]
9763 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_imm_inreg:
9764 ; GFX10-SCRATCH: ; %bb.0:
9765 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9766 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
9767 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
9768 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
9769 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
9770 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9771 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
9772 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
9773 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
9774 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
9775 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
9776 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1_inreg@rel32@lo+4
9777 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1_inreg@rel32@hi+12
9778 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
9779 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32
9780 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
9781 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
9782 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
9783 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
9784 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
9785 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
9786 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
9787 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9788 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
9789 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
9790 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
9791 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
9792 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
9793 call amdgpu_gfx void @external_void_func_i1_inreg(i1 inreg true)
9797 define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 {
9798 ; GFX9-LABEL: test_call_external_void_func_i8_imm_inreg:
9800 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9801 ; GFX9-NEXT: s_mov_b32 s34, s33
9802 ; GFX9-NEXT: s_mov_b32 s33, s32
9803 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
9804 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
9805 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
9806 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
9807 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
9808 ; GFX9-NEXT: s_addk_i32 s32, 0x400
9809 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
9810 ; GFX9-NEXT: s_movk_i32 s4, 0x7b
9811 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
9812 ; GFX9-NEXT: s_getpc_b64 s[34:35]
9813 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_inreg@rel32@lo+4
9814 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_inreg@rel32@hi+12
9815 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
9816 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
9817 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
9818 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
9819 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
9820 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
9821 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
9822 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
9823 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
9824 ; GFX9-NEXT: s_mov_b32 s33, s34
9825 ; GFX9-NEXT: s_waitcnt vmcnt(0)
9826 ; GFX9-NEXT: s_setpc_b64 s[30:31]
9828 ; GFX10-LABEL: test_call_external_void_func_i8_imm_inreg:
9830 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9831 ; GFX10-NEXT: s_mov_b32 s34, s33
9832 ; GFX10-NEXT: s_mov_b32 s33, s32
9833 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
9834 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
9835 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9836 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
9837 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
9838 ; GFX10-NEXT: s_addk_i32 s32, 0x200
9839 ; GFX10-NEXT: s_getpc_b64 s[34:35]
9840 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_inreg@rel32@lo+4
9841 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_inreg@rel32@hi+12
9842 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
9843 ; GFX10-NEXT: s_movk_i32 s4, 0x7b
9844 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
9845 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
9846 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
9847 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
9848 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
9849 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
9850 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
9851 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
9852 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
9853 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9854 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
9855 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
9856 ; GFX10-NEXT: s_mov_b32 s33, s34
9857 ; GFX10-NEXT: s_waitcnt vmcnt(0)
9858 ; GFX10-NEXT: s_setpc_b64 s[30:31]
9860 ; GFX11-LABEL: test_call_external_void_func_i8_imm_inreg:
9862 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9863 ; GFX11-NEXT: s_mov_b32 s0, s33
9864 ; GFX11-NEXT: s_mov_b32 s33, s32
9865 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
9866 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
9867 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
9868 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
9869 ; GFX11-NEXT: s_add_i32 s32, s32, 16
9870 ; GFX11-NEXT: s_getpc_b64 s[0:1]
9871 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i8_inreg@rel32@lo+4
9872 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i8_inreg@rel32@hi+12
9873 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
9874 ; GFX11-NEXT: s_movk_i32 s4, 0x7b
9875 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
9876 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
9877 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
9878 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
9879 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
9880 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
9881 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
9882 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
9883 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
9884 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
9885 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
9886 ; GFX11-NEXT: s_add_i32 s32, s32, -16
9887 ; GFX11-NEXT: s_mov_b32 s33, s0
9888 ; GFX11-NEXT: s_waitcnt vmcnt(0)
9889 ; GFX11-NEXT: s_setpc_b64 s[30:31]
9891 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm_inreg:
9892 ; GFX10-SCRATCH: ; %bb.0:
9893 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9894 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
9895 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
9896 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
9897 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
9898 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9899 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
9900 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3
9901 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
9902 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
9903 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_inreg@rel32@lo+4
9904 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_inreg@rel32@hi+12
9905 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
9906 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b
9907 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
9908 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
9909 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
9910 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
9911 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
9912 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
9913 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3
9914 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
9915 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
9916 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9917 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
9918 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
9919 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
9920 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
9921 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
9922 call amdgpu_gfx void @external_void_func_i8_inreg(i8 inreg 123)
9926 define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 {
9927 ; GFX9-LABEL: test_call_external_void_func_i16_imm_inreg:
9929 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9930 ; GFX9-NEXT: s_mov_b32 s34, s33
9931 ; GFX9-NEXT: s_mov_b32 s33, s32
9932 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
9933 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
9934 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
9935 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
9936 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
9937 ; GFX9-NEXT: s_addk_i32 s32, 0x400
9938 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
9939 ; GFX9-NEXT: s_movk_i32 s4, 0x7b
9940 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
9941 ; GFX9-NEXT: s_getpc_b64 s[34:35]
9942 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_inreg@rel32@lo+4
9943 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_inreg@rel32@hi+12
9944 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
9945 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
9946 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
9947 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
9948 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
9949 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
9950 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
9951 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
9952 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
9953 ; GFX9-NEXT: s_mov_b32 s33, s34
9954 ; GFX9-NEXT: s_waitcnt vmcnt(0)
9955 ; GFX9-NEXT: s_setpc_b64 s[30:31]
9957 ; GFX10-LABEL: test_call_external_void_func_i16_imm_inreg:
9959 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9960 ; GFX10-NEXT: s_mov_b32 s34, s33
9961 ; GFX10-NEXT: s_mov_b32 s33, s32
9962 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
9963 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
9964 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9965 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
9966 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
9967 ; GFX10-NEXT: s_addk_i32 s32, 0x200
9968 ; GFX10-NEXT: s_getpc_b64 s[34:35]
9969 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_inreg@rel32@lo+4
9970 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_inreg@rel32@hi+12
9971 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
9972 ; GFX10-NEXT: s_movk_i32 s4, 0x7b
9973 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
9974 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
9975 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
9976 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
9977 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
9978 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
9979 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
9980 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
9981 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
9982 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9983 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
9984 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
9985 ; GFX10-NEXT: s_mov_b32 s33, s34
9986 ; GFX10-NEXT: s_waitcnt vmcnt(0)
9987 ; GFX10-NEXT: s_setpc_b64 s[30:31]
9989 ; GFX11-LABEL: test_call_external_void_func_i16_imm_inreg:
9991 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9992 ; GFX11-NEXT: s_mov_b32 s0, s33
9993 ; GFX11-NEXT: s_mov_b32 s33, s32
9994 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
9995 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
9996 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
9997 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
9998 ; GFX11-NEXT: s_add_i32 s32, s32, 16
9999 ; GFX11-NEXT: s_getpc_b64 s[0:1]
10000 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i16_inreg@rel32@lo+4
10001 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i16_inreg@rel32@hi+12
10002 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
10003 ; GFX11-NEXT: s_movk_i32 s4, 0x7b
10004 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
10005 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
10006 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
10007 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
10008 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
10009 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
10010 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
10011 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
10012 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10013 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
10014 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10015 ; GFX11-NEXT: s_add_i32 s32, s32, -16
10016 ; GFX11-NEXT: s_mov_b32 s33, s0
10017 ; GFX11-NEXT: s_waitcnt vmcnt(0)
10018 ; GFX11-NEXT: s_setpc_b64 s[30:31]
10020 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm_inreg:
10021 ; GFX10-SCRATCH: ; %bb.0:
10022 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10023 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
10024 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
10025 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10026 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
10027 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10028 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10029 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3
10030 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
10031 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
10032 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16_inreg@rel32@lo+4
10033 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16_inreg@rel32@hi+12
10034 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
10035 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b
10036 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
10037 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
10038 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
10039 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
10040 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
10041 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
10042 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3
10043 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10044 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
10045 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10046 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10047 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
10048 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
10049 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
10050 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
10051 call amdgpu_gfx void @external_void_func_i16_inreg(i16 inreg 123)
10055 define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 {
10056 ; GFX9-LABEL: test_call_external_void_func_i32_imm_inreg:
10058 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10059 ; GFX9-NEXT: s_mov_b32 s34, s33
10060 ; GFX9-NEXT: s_mov_b32 s33, s32
10061 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10062 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10063 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10064 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
10065 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
10066 ; GFX9-NEXT: s_addk_i32 s32, 0x400
10067 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
10068 ; GFX9-NEXT: s_mov_b32 s4, 42
10069 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
10070 ; GFX9-NEXT: s_getpc_b64 s[34:35]
10071 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i32_inreg@rel32@lo+4
10072 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i32_inreg@rel32@hi+12
10073 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
10074 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
10075 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
10076 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
10077 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
10078 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10079 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10080 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10081 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
10082 ; GFX9-NEXT: s_mov_b32 s33, s34
10083 ; GFX9-NEXT: s_waitcnt vmcnt(0)
10084 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10086 ; GFX10-LABEL: test_call_external_void_func_i32_imm_inreg:
10088 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10089 ; GFX10-NEXT: s_mov_b32 s34, s33
10090 ; GFX10-NEXT: s_mov_b32 s33, s32
10091 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10092 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10093 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10094 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10095 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
10096 ; GFX10-NEXT: s_addk_i32 s32, 0x200
10097 ; GFX10-NEXT: s_getpc_b64 s[34:35]
10098 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i32_inreg@rel32@lo+4
10099 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i32_inreg@rel32@hi+12
10100 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
10101 ; GFX10-NEXT: s_mov_b32 s4, 42
10102 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
10103 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
10104 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
10105 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
10106 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
10107 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
10108 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
10109 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10110 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10111 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10112 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10113 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
10114 ; GFX10-NEXT: s_mov_b32 s33, s34
10115 ; GFX10-NEXT: s_waitcnt vmcnt(0)
10116 ; GFX10-NEXT: s_setpc_b64 s[30:31]
10118 ; GFX11-LABEL: test_call_external_void_func_i32_imm_inreg:
10120 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10121 ; GFX11-NEXT: s_mov_b32 s0, s33
10122 ; GFX11-NEXT: s_mov_b32 s33, s32
10123 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10124 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
10125 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10126 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
10127 ; GFX11-NEXT: s_add_i32 s32, s32, 16
10128 ; GFX11-NEXT: s_getpc_b64 s[0:1]
10129 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i32_inreg@rel32@lo+4
10130 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i32_inreg@rel32@hi+12
10131 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
10132 ; GFX11-NEXT: s_mov_b32 s4, 42
10133 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
10134 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
10135 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
10136 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
10137 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
10138 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
10139 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
10140 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
10141 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10142 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
10143 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10144 ; GFX11-NEXT: s_add_i32 s32, s32, -16
10145 ; GFX11-NEXT: s_mov_b32 s33, s0
10146 ; GFX11-NEXT: s_waitcnt vmcnt(0)
10147 ; GFX11-NEXT: s_setpc_b64 s[30:31]
10149 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i32_imm_inreg:
10150 ; GFX10-SCRATCH: ; %bb.0:
10151 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10152 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
10153 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
10154 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10155 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
10156 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10157 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10158 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3
10159 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
10160 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
10161 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i32_inreg@rel32@lo+4
10162 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i32_inreg@rel32@hi+12
10163 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
10164 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 42
10165 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
10166 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
10167 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
10168 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
10169 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
10170 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
10171 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3
10172 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10173 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
10174 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10175 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10176 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
10177 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
10178 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
10179 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
10180 call amdgpu_gfx void @external_void_func_i32_inreg(i32 inreg 42)
10184 define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 {
10185 ; GFX9-LABEL: test_call_external_void_func_i64_imm_inreg:
10187 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10188 ; GFX9-NEXT: s_mov_b32 s34, s33
10189 ; GFX9-NEXT: s_mov_b32 s33, s32
10190 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10191 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10192 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10193 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
10194 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
10195 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
10196 ; GFX9-NEXT: s_addk_i32 s32, 0x400
10197 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
10198 ; GFX9-NEXT: s_movk_i32 s4, 0x7b
10199 ; GFX9-NEXT: s_mov_b32 s5, 0
10200 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
10201 ; GFX9-NEXT: s_getpc_b64 s[34:35]
10202 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i64_inreg@rel32@lo+4
10203 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i64_inreg@rel32@hi+12
10204 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
10205 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
10206 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
10207 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
10208 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
10209 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
10210 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10211 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10212 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10213 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
10214 ; GFX9-NEXT: s_mov_b32 s33, s34
10215 ; GFX9-NEXT: s_waitcnt vmcnt(0)
10216 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10218 ; GFX10-LABEL: test_call_external_void_func_i64_imm_inreg:
10220 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10221 ; GFX10-NEXT: s_mov_b32 s34, s33
10222 ; GFX10-NEXT: s_mov_b32 s33, s32
10223 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10224 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10225 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10226 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10227 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
10228 ; GFX10-NEXT: s_addk_i32 s32, 0x200
10229 ; GFX10-NEXT: s_getpc_b64 s[34:35]
10230 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i64_inreg@rel32@lo+4
10231 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i64_inreg@rel32@hi+12
10232 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
10233 ; GFX10-NEXT: s_movk_i32 s4, 0x7b
10234 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
10235 ; GFX10-NEXT: s_mov_b32 s5, 0
10236 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
10237 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
10238 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
10239 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
10240 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
10241 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
10242 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
10243 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
10244 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10245 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10246 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10247 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10248 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
10249 ; GFX10-NEXT: s_mov_b32 s33, s34
10250 ; GFX10-NEXT: s_waitcnt vmcnt(0)
10251 ; GFX10-NEXT: s_setpc_b64 s[30:31]
10253 ; GFX11-LABEL: test_call_external_void_func_i64_imm_inreg:
10255 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10256 ; GFX11-NEXT: s_mov_b32 s0, s33
10257 ; GFX11-NEXT: s_mov_b32 s33, s32
10258 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10259 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
10260 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10261 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
10262 ; GFX11-NEXT: s_add_i32 s32, s32, 16
10263 ; GFX11-NEXT: s_getpc_b64 s[0:1]
10264 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i64_inreg@rel32@lo+4
10265 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i64_inreg@rel32@hi+12
10266 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
10267 ; GFX11-NEXT: s_movk_i32 s4, 0x7b
10268 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
10269 ; GFX11-NEXT: s_mov_b32 s5, 0
10270 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
10271 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
10272 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
10273 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
10274 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
10275 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
10276 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
10277 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
10278 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
10279 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10280 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
10281 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10282 ; GFX11-NEXT: s_add_i32 s32, s32, -16
10283 ; GFX11-NEXT: s_mov_b32 s33, s0
10284 ; GFX11-NEXT: s_waitcnt vmcnt(0)
10285 ; GFX11-NEXT: s_setpc_b64 s[30:31]
10287 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i64_imm_inreg:
10288 ; GFX10-SCRATCH: ; %bb.0:
10289 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10290 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
10291 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
10292 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10293 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
10294 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10295 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10296 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
10297 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
10298 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
10299 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i64_inreg@rel32@lo+4
10300 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i64_inreg@rel32@hi+12
10301 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
10302 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b
10303 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
10304 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0
10305 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
10306 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
10307 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
10308 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
10309 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
10310 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
10311 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
10312 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
10313 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10314 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
10315 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10316 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10317 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
10318 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
10319 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
10320 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
10321 call amdgpu_gfx void @external_void_func_i64_inreg(i64 inreg 123)
10325 define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 {
10326 ; GFX9-LABEL: test_call_external_void_func_v2i64_inreg:
10328 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10329 ; GFX9-NEXT: s_mov_b32 s34, s33
10330 ; GFX9-NEXT: s_mov_b32 s33, s32
10331 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10332 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10333 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10334 ; GFX9-NEXT: v_writelane_b32 v40, s34, 6
10335 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
10336 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
10337 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
10338 ; GFX9-NEXT: s_mov_b64 s[34:35], 0
10339 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
10340 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0
10341 ; GFX9-NEXT: s_addk_i32 s32, 0x400
10342 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4
10343 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5
10344 ; GFX9-NEXT: s_getpc_b64 s[34:35]
10345 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4
10346 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12
10347 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
10348 ; GFX9-NEXT: v_readlane_b32 s31, v40, 5
10349 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4
10350 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
10351 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
10352 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
10353 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
10354 ; GFX9-NEXT: v_readlane_b32 s34, v40, 6
10355 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10356 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10357 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10358 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
10359 ; GFX9-NEXT: s_mov_b32 s33, s34
10360 ; GFX9-NEXT: s_waitcnt vmcnt(0)
10361 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10363 ; GFX10-LABEL: test_call_external_void_func_v2i64_inreg:
10365 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10366 ; GFX10-NEXT: s_mov_b32 s34, s33
10367 ; GFX10-NEXT: s_mov_b32 s33, s32
10368 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10369 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10370 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10371 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10372 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6
10373 ; GFX10-NEXT: s_mov_b64 s[34:35], 0
10374 ; GFX10-NEXT: s_addk_i32 s32, 0x200
10375 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
10376 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
10377 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
10378 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
10379 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0
10380 ; GFX10-NEXT: s_getpc_b64 s[34:35]
10381 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4
10382 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12
10383 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4
10384 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5
10385 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
10386 ; GFX10-NEXT: v_readlane_b32 s31, v40, 5
10387 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4
10388 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
10389 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
10390 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
10391 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
10392 ; GFX10-NEXT: v_readlane_b32 s34, v40, 6
10393 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10394 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10395 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10396 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10397 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
10398 ; GFX10-NEXT: s_mov_b32 s33, s34
10399 ; GFX10-NEXT: s_waitcnt vmcnt(0)
10400 ; GFX10-NEXT: s_setpc_b64 s[30:31]
10402 ; GFX11-LABEL: test_call_external_void_func_v2i64_inreg:
10404 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10405 ; GFX11-NEXT: s_mov_b32 s0, s33
10406 ; GFX11-NEXT: s_mov_b32 s33, s32
10407 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10408 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
10409 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10410 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6
10411 ; GFX11-NEXT: s_mov_b64 s[0:1], 0
10412 ; GFX11-NEXT: s_add_i32 s32, s32, 16
10413 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
10414 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
10415 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
10416 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
10417 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
10418 ; GFX11-NEXT: s_getpc_b64 s[0:1]
10419 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4
10420 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12
10421 ; GFX11-NEXT: v_writelane_b32 v40, s30, 4
10422 ; GFX11-NEXT: v_writelane_b32 v40, s31, 5
10423 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
10424 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
10425 ; GFX11-NEXT: v_readlane_b32 s31, v40, 5
10426 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4
10427 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
10428 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
10429 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
10430 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
10431 ; GFX11-NEXT: v_readlane_b32 s0, v40, 6
10432 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10433 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
10434 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10435 ; GFX11-NEXT: s_add_i32 s32, s32, -16
10436 ; GFX11-NEXT: s_mov_b32 s33, s0
10437 ; GFX11-NEXT: s_waitcnt vmcnt(0)
10438 ; GFX11-NEXT: s_setpc_b64 s[30:31]
10440 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_inreg:
10441 ; GFX10-SCRATCH: ; %bb.0:
10442 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10443 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
10444 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
10445 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10446 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
10447 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10448 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10449 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6
10450 ; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0
10451 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
10452 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
10453 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
10454 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
10455 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
10456 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
10457 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
10458 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4
10459 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12
10460 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4
10461 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5
10462 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
10463 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5
10464 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4
10465 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
10466 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
10467 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
10468 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
10469 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 6
10470 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10471 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
10472 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10473 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10474 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
10475 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
10476 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
10477 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
10478 %val = load <2 x i64>, ptr addrspace(4) null
10479 call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg %val)
10483 define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 {
10484 ; GFX9-LABEL: test_call_external_void_func_v2i64_imm_inreg:
10486 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10487 ; GFX9-NEXT: s_mov_b32 s34, s33
10488 ; GFX9-NEXT: s_mov_b32 s33, s32
10489 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10490 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10491 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10492 ; GFX9-NEXT: v_writelane_b32 v40, s34, 6
10493 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
10494 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
10495 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
10496 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
10497 ; GFX9-NEXT: s_addk_i32 s32, 0x400
10498 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4
10499 ; GFX9-NEXT: s_mov_b32 s4, 1
10500 ; GFX9-NEXT: s_mov_b32 s5, 2
10501 ; GFX9-NEXT: s_mov_b32 s6, 3
10502 ; GFX9-NEXT: s_mov_b32 s7, 4
10503 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5
10504 ; GFX9-NEXT: s_getpc_b64 s[34:35]
10505 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4
10506 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12
10507 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
10508 ; GFX9-NEXT: v_readlane_b32 s31, v40, 5
10509 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4
10510 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
10511 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
10512 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
10513 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
10514 ; GFX9-NEXT: v_readlane_b32 s34, v40, 6
10515 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10516 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10517 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10518 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
10519 ; GFX9-NEXT: s_mov_b32 s33, s34
10520 ; GFX9-NEXT: s_waitcnt vmcnt(0)
10521 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10523 ; GFX10-LABEL: test_call_external_void_func_v2i64_imm_inreg:
10525 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10526 ; GFX10-NEXT: s_mov_b32 s34, s33
10527 ; GFX10-NEXT: s_mov_b32 s33, s32
10528 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10529 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10530 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10531 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10532 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6
10533 ; GFX10-NEXT: s_addk_i32 s32, 0x200
10534 ; GFX10-NEXT: s_getpc_b64 s[34:35]
10535 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4
10536 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12
10537 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
10538 ; GFX10-NEXT: s_mov_b32 s4, 1
10539 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
10540 ; GFX10-NEXT: s_mov_b32 s5, 2
10541 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
10542 ; GFX10-NEXT: s_mov_b32 s6, 3
10543 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
10544 ; GFX10-NEXT: s_mov_b32 s7, 4
10545 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4
10546 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5
10547 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
10548 ; GFX10-NEXT: v_readlane_b32 s31, v40, 5
10549 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4
10550 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
10551 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
10552 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
10553 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
10554 ; GFX10-NEXT: v_readlane_b32 s34, v40, 6
10555 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10556 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10557 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10558 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10559 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
10560 ; GFX10-NEXT: s_mov_b32 s33, s34
10561 ; GFX10-NEXT: s_waitcnt vmcnt(0)
10562 ; GFX10-NEXT: s_setpc_b64 s[30:31]
10564 ; GFX11-LABEL: test_call_external_void_func_v2i64_imm_inreg:
10566 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10567 ; GFX11-NEXT: s_mov_b32 s0, s33
10568 ; GFX11-NEXT: s_mov_b32 s33, s32
10569 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10570 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
10571 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10572 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6
10573 ; GFX11-NEXT: s_add_i32 s32, s32, 16
10574 ; GFX11-NEXT: s_getpc_b64 s[0:1]
10575 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4
10576 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12
10577 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
10578 ; GFX11-NEXT: s_mov_b32 s4, 1
10579 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
10580 ; GFX11-NEXT: s_mov_b32 s5, 2
10581 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
10582 ; GFX11-NEXT: s_mov_b32 s6, 3
10583 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
10584 ; GFX11-NEXT: s_mov_b32 s7, 4
10585 ; GFX11-NEXT: v_writelane_b32 v40, s30, 4
10586 ; GFX11-NEXT: v_writelane_b32 v40, s31, 5
10587 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
10588 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
10589 ; GFX11-NEXT: v_readlane_b32 s31, v40, 5
10590 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4
10591 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
10592 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
10593 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
10594 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
10595 ; GFX11-NEXT: v_readlane_b32 s0, v40, 6
10596 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10597 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
10598 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10599 ; GFX11-NEXT: s_add_i32 s32, s32, -16
10600 ; GFX11-NEXT: s_mov_b32 s33, s0
10601 ; GFX11-NEXT: s_waitcnt vmcnt(0)
10602 ; GFX11-NEXT: s_setpc_b64 s[30:31]
10604 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_imm_inreg:
10605 ; GFX10-SCRATCH: ; %bb.0:
10606 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10607 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
10608 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
10609 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10610 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
10611 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10612 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10613 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6
10614 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
10615 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
10616 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4
10617 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12
10618 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
10619 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
10620 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
10621 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
10622 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
10623 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
10624 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
10625 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
10626 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4
10627 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5
10628 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
10629 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5
10630 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4
10631 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
10632 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
10633 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
10634 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
10635 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 6
10636 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10637 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
10638 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10639 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10640 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
10641 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
10642 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
10643 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
10644 call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg <i64 8589934593, i64 17179869187>)
10648 define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 {
10649 ; GFX9-LABEL: test_call_external_void_func_v3i64_inreg:
10651 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10652 ; GFX9-NEXT: s_mov_b32 s34, s33
10653 ; GFX9-NEXT: s_mov_b32 s33, s32
10654 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10655 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10656 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10657 ; GFX9-NEXT: v_writelane_b32 v40, s34, 8
10658 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
10659 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
10660 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
10661 ; GFX9-NEXT: s_mov_b64 s[34:35], 0
10662 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
10663 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0
10664 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
10665 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5
10666 ; GFX9-NEXT: s_addk_i32 s32, 0x400
10667 ; GFX9-NEXT: v_writelane_b32 v40, s30, 6
10668 ; GFX9-NEXT: s_mov_b32 s8, 1
10669 ; GFX9-NEXT: s_mov_b32 s9, 2
10670 ; GFX9-NEXT: v_writelane_b32 v40, s31, 7
10671 ; GFX9-NEXT: s_getpc_b64 s[34:35]
10672 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i64_inreg@rel32@lo+4
10673 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64_inreg@rel32@hi+12
10674 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
10675 ; GFX9-NEXT: v_readlane_b32 s31, v40, 7
10676 ; GFX9-NEXT: v_readlane_b32 s30, v40, 6
10677 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5
10678 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
10679 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
10680 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
10681 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
10682 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
10683 ; GFX9-NEXT: v_readlane_b32 s34, v40, 8
10684 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10685 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10686 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10687 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
10688 ; GFX9-NEXT: s_mov_b32 s33, s34
10689 ; GFX9-NEXT: s_waitcnt vmcnt(0)
10690 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10692 ; GFX10-LABEL: test_call_external_void_func_v3i64_inreg:
10694 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10695 ; GFX10-NEXT: s_mov_b32 s34, s33
10696 ; GFX10-NEXT: s_mov_b32 s33, s32
10697 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10698 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10699 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10700 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10701 ; GFX10-NEXT: v_writelane_b32 v40, s34, 8
10702 ; GFX10-NEXT: s_mov_b64 s[34:35], 0
10703 ; GFX10-NEXT: s_addk_i32 s32, 0x200
10704 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
10705 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
10706 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
10707 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
10708 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0
10709 ; GFX10-NEXT: s_getpc_b64 s[34:35]
10710 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i64_inreg@rel32@lo+4
10711 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64_inreg@rel32@hi+12
10712 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
10713 ; GFX10-NEXT: s_mov_b32 s8, 1
10714 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5
10715 ; GFX10-NEXT: s_mov_b32 s9, 2
10716 ; GFX10-NEXT: v_writelane_b32 v40, s30, 6
10717 ; GFX10-NEXT: v_writelane_b32 v40, s31, 7
10718 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
10719 ; GFX10-NEXT: v_readlane_b32 s31, v40, 7
10720 ; GFX10-NEXT: v_readlane_b32 s30, v40, 6
10721 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5
10722 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
10723 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
10724 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
10725 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
10726 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
10727 ; GFX10-NEXT: v_readlane_b32 s34, v40, 8
10728 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10729 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10730 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10731 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10732 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
10733 ; GFX10-NEXT: s_mov_b32 s33, s34
10734 ; GFX10-NEXT: s_waitcnt vmcnt(0)
10735 ; GFX10-NEXT: s_setpc_b64 s[30:31]
10737 ; GFX11-LABEL: test_call_external_void_func_v3i64_inreg:
10739 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10740 ; GFX11-NEXT: s_mov_b32 s0, s33
10741 ; GFX11-NEXT: s_mov_b32 s33, s32
10742 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10743 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
10744 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10745 ; GFX11-NEXT: v_writelane_b32 v40, s0, 8
10746 ; GFX11-NEXT: s_mov_b64 s[0:1], 0
10747 ; GFX11-NEXT: s_add_i32 s32, s32, 16
10748 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
10749 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
10750 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
10751 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
10752 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
10753 ; GFX11-NEXT: s_getpc_b64 s[0:1]
10754 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i64_inreg@rel32@lo+4
10755 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64_inreg@rel32@hi+12
10756 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
10757 ; GFX11-NEXT: s_mov_b32 s8, 1
10758 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5
10759 ; GFX11-NEXT: s_mov_b32 s9, 2
10760 ; GFX11-NEXT: v_writelane_b32 v40, s30, 6
10761 ; GFX11-NEXT: v_writelane_b32 v40, s31, 7
10762 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
10763 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
10764 ; GFX11-NEXT: v_readlane_b32 s31, v40, 7
10765 ; GFX11-NEXT: v_readlane_b32 s30, v40, 6
10766 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5
10767 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
10768 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
10769 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
10770 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
10771 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
10772 ; GFX11-NEXT: v_readlane_b32 s0, v40, 8
10773 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10774 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
10775 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10776 ; GFX11-NEXT: s_add_i32 s32, s32, -16
10777 ; GFX11-NEXT: s_mov_b32 s33, s0
10778 ; GFX11-NEXT: s_waitcnt vmcnt(0)
10779 ; GFX11-NEXT: s_setpc_b64 s[30:31]
10781 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i64_inreg:
10782 ; GFX10-SCRATCH: ; %bb.0:
10783 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10784 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
10785 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
10786 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10787 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
10788 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10789 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10790 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 8
10791 ; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0
10792 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
10793 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
10794 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
10795 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
10796 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
10797 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
10798 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
10799 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i64_inreg@rel32@lo+4
10800 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64_inreg@rel32@hi+12
10801 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
10802 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1
10803 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
10804 ; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2
10805 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6
10806 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7
10807 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
10808 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7
10809 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6
10810 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
10811 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
10812 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
10813 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
10814 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
10815 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
10816 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 8
10817 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10818 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
10819 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10820 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10821 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
10822 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
10823 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
10824 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
10825 %load = load <2 x i64>, ptr addrspace(4) null
10826 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
10828 call amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg %val)
10832 define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 {
10833 ; GFX9-LABEL: test_call_external_void_func_v4i64_inreg:
10835 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10836 ; GFX9-NEXT: s_mov_b32 s34, s33
10837 ; GFX9-NEXT: s_mov_b32 s33, s32
10838 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10839 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10840 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10841 ; GFX9-NEXT: v_writelane_b32 v40, s34, 10
10842 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
10843 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
10844 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
10845 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
10846 ; GFX9-NEXT: s_mov_b64 s[34:35], 0
10847 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
10848 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0
10849 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5
10850 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6
10851 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7
10852 ; GFX9-NEXT: s_addk_i32 s32, 0x400
10853 ; GFX9-NEXT: v_writelane_b32 v40, s30, 8
10854 ; GFX9-NEXT: s_mov_b32 s8, 1
10855 ; GFX9-NEXT: s_mov_b32 s9, 2
10856 ; GFX9-NEXT: s_mov_b32 s10, 3
10857 ; GFX9-NEXT: s_mov_b32 s11, 4
10858 ; GFX9-NEXT: v_writelane_b32 v40, s31, 9
10859 ; GFX9-NEXT: s_getpc_b64 s[34:35]
10860 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i64_inreg@rel32@lo+4
10861 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64_inreg@rel32@hi+12
10862 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
10863 ; GFX9-NEXT: v_readlane_b32 s31, v40, 9
10864 ; GFX9-NEXT: v_readlane_b32 s30, v40, 8
10865 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7
10866 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6
10867 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5
10868 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
10869 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
10870 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
10871 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
10872 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
10873 ; GFX9-NEXT: v_readlane_b32 s34, v40, 10
10874 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
10875 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10876 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
10877 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
10878 ; GFX9-NEXT: s_mov_b32 s33, s34
10879 ; GFX9-NEXT: s_waitcnt vmcnt(0)
10880 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10882 ; GFX10-LABEL: test_call_external_void_func_v4i64_inreg:
10884 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10885 ; GFX10-NEXT: s_mov_b32 s34, s33
10886 ; GFX10-NEXT: s_mov_b32 s33, s32
10887 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10888 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
10889 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10890 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10891 ; GFX10-NEXT: v_writelane_b32 v40, s34, 10
10892 ; GFX10-NEXT: s_mov_b64 s[34:35], 0
10893 ; GFX10-NEXT: s_addk_i32 s32, 0x200
10894 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
10895 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
10896 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
10897 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
10898 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0
10899 ; GFX10-NEXT: s_getpc_b64 s[34:35]
10900 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i64_inreg@rel32@lo+4
10901 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64_inreg@rel32@hi+12
10902 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
10903 ; GFX10-NEXT: s_mov_b32 s8, 1
10904 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5
10905 ; GFX10-NEXT: s_mov_b32 s9, 2
10906 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6
10907 ; GFX10-NEXT: s_mov_b32 s10, 3
10908 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7
10909 ; GFX10-NEXT: s_mov_b32 s11, 4
10910 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8
10911 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9
10912 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
10913 ; GFX10-NEXT: v_readlane_b32 s31, v40, 9
10914 ; GFX10-NEXT: v_readlane_b32 s30, v40, 8
10915 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7
10916 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6
10917 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5
10918 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
10919 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
10920 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
10921 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
10922 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
10923 ; GFX10-NEXT: v_readlane_b32 s34, v40, 10
10924 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
10925 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
10926 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
10927 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
10928 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
10929 ; GFX10-NEXT: s_mov_b32 s33, s34
10930 ; GFX10-NEXT: s_waitcnt vmcnt(0)
10931 ; GFX10-NEXT: s_setpc_b64 s[30:31]
10933 ; GFX11-LABEL: test_call_external_void_func_v4i64_inreg:
10935 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10936 ; GFX11-NEXT: s_mov_b32 s0, s33
10937 ; GFX11-NEXT: s_mov_b32 s33, s32
10938 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10939 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
10940 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10941 ; GFX11-NEXT: v_writelane_b32 v40, s0, 10
10942 ; GFX11-NEXT: s_mov_b64 s[0:1], 0
10943 ; GFX11-NEXT: s_add_i32 s32, s32, 16
10944 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
10945 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
10946 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
10947 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
10948 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
10949 ; GFX11-NEXT: s_getpc_b64 s[0:1]
10950 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i64_inreg@rel32@lo+4
10951 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64_inreg@rel32@hi+12
10952 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
10953 ; GFX11-NEXT: s_mov_b32 s8, 1
10954 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5
10955 ; GFX11-NEXT: s_mov_b32 s9, 2
10956 ; GFX11-NEXT: v_writelane_b32 v40, s10, 6
10957 ; GFX11-NEXT: s_mov_b32 s10, 3
10958 ; GFX11-NEXT: v_writelane_b32 v40, s11, 7
10959 ; GFX11-NEXT: s_mov_b32 s11, 4
10960 ; GFX11-NEXT: v_writelane_b32 v40, s30, 8
10961 ; GFX11-NEXT: v_writelane_b32 v40, s31, 9
10962 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
10963 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
10964 ; GFX11-NEXT: v_readlane_b32 s31, v40, 9
10965 ; GFX11-NEXT: v_readlane_b32 s30, v40, 8
10966 ; GFX11-NEXT: v_readlane_b32 s11, v40, 7
10967 ; GFX11-NEXT: v_readlane_b32 s10, v40, 6
10968 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5
10969 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
10970 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
10971 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
10972 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
10973 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
10974 ; GFX11-NEXT: v_readlane_b32 s0, v40, 10
10975 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
10976 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
10977 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
10978 ; GFX11-NEXT: s_add_i32 s32, s32, -16
10979 ; GFX11-NEXT: s_mov_b32 s33, s0
10980 ; GFX11-NEXT: s_waitcnt vmcnt(0)
10981 ; GFX11-NEXT: s_setpc_b64 s[30:31]
10983 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i64_inreg:
10984 ; GFX10-SCRATCH: ; %bb.0:
10985 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10986 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
10987 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
10988 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
10989 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
10990 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
10991 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
10992 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10
10993 ; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0
10994 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
10995 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
10996 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
10997 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
10998 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
10999 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
11000 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
11001 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i64_inreg@rel32@lo+4
11002 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64_inreg@rel32@hi+12
11003 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
11004 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1
11005 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
11006 ; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2
11007 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6
11008 ; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 3
11009 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7
11010 ; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 4
11011 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8
11012 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9
11013 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
11014 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9
11015 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8
11016 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7
11017 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6
11018 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
11019 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
11020 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
11021 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
11022 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
11023 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
11024 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 10
11025 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11026 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
11027 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11028 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11029 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
11030 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
11031 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
11032 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
11033 %load = load <2 x i64>, ptr addrspace(4) null
11034 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11035 call amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> inreg %val)
11039 define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 {
11040 ; GFX9-LABEL: test_call_external_void_func_f16_imm_inreg:
11042 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11043 ; GFX9-NEXT: s_mov_b32 s34, s33
11044 ; GFX9-NEXT: s_mov_b32 s33, s32
11045 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11046 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11047 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11048 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
11049 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
11050 ; GFX9-NEXT: s_addk_i32 s32, 0x400
11051 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
11052 ; GFX9-NEXT: s_movk_i32 s4, 0x4400
11053 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
11054 ; GFX9-NEXT: s_getpc_b64 s[34:35]
11055 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f16_inreg@rel32@lo+4
11056 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f16_inreg@rel32@hi+12
11057 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
11058 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
11059 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
11060 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
11061 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
11062 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11063 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11064 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11065 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
11066 ; GFX9-NEXT: s_mov_b32 s33, s34
11067 ; GFX9-NEXT: s_waitcnt vmcnt(0)
11068 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11070 ; GFX10-LABEL: test_call_external_void_func_f16_imm_inreg:
11072 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11073 ; GFX10-NEXT: s_mov_b32 s34, s33
11074 ; GFX10-NEXT: s_mov_b32 s33, s32
11075 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11076 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11077 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11078 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11079 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
11080 ; GFX10-NEXT: s_addk_i32 s32, 0x200
11081 ; GFX10-NEXT: s_getpc_b64 s[34:35]
11082 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f16_inreg@rel32@lo+4
11083 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f16_inreg@rel32@hi+12
11084 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
11085 ; GFX10-NEXT: s_movk_i32 s4, 0x4400
11086 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
11087 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
11088 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
11089 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
11090 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
11091 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
11092 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
11093 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11094 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11095 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11096 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11097 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
11098 ; GFX10-NEXT: s_mov_b32 s33, s34
11099 ; GFX10-NEXT: s_waitcnt vmcnt(0)
11100 ; GFX10-NEXT: s_setpc_b64 s[30:31]
11102 ; GFX11-LABEL: test_call_external_void_func_f16_imm_inreg:
11104 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11105 ; GFX11-NEXT: s_mov_b32 s0, s33
11106 ; GFX11-NEXT: s_mov_b32 s33, s32
11107 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11108 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
11109 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11110 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
11111 ; GFX11-NEXT: s_add_i32 s32, s32, 16
11112 ; GFX11-NEXT: s_getpc_b64 s[0:1]
11113 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_f16_inreg@rel32@lo+4
11114 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_f16_inreg@rel32@hi+12
11115 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
11116 ; GFX11-NEXT: s_movk_i32 s4, 0x4400
11117 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
11118 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
11119 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
11120 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
11121 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
11122 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
11123 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
11124 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
11125 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11126 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
11127 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11128 ; GFX11-NEXT: s_add_i32 s32, s32, -16
11129 ; GFX11-NEXT: s_mov_b32 s33, s0
11130 ; GFX11-NEXT: s_waitcnt vmcnt(0)
11131 ; GFX11-NEXT: s_setpc_b64 s[30:31]
11133 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm_inreg:
11134 ; GFX10-SCRATCH: ; %bb.0:
11135 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11136 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
11137 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
11138 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11139 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
11140 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11141 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11142 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3
11143 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
11144 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
11145 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f16_inreg@rel32@lo+4
11146 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f16_inreg@rel32@hi+12
11147 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
11148 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x4400
11149 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
11150 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
11151 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
11152 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
11153 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
11154 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
11155 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3
11156 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11157 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
11158 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11159 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11160 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
11161 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
11162 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
11163 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
11164 call amdgpu_gfx void @external_void_func_f16_inreg(half inreg 4.0)
11168 define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 {
11169 ; GFX9-LABEL: test_call_external_void_func_f32_imm_inreg:
11171 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11172 ; GFX9-NEXT: s_mov_b32 s34, s33
11173 ; GFX9-NEXT: s_mov_b32 s33, s32
11174 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11175 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11176 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11177 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
11178 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
11179 ; GFX9-NEXT: s_addk_i32 s32, 0x400
11180 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
11181 ; GFX9-NEXT: s_mov_b32 s4, 4.0
11182 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
11183 ; GFX9-NEXT: s_getpc_b64 s[34:35]
11184 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f32_inreg@rel32@lo+4
11185 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f32_inreg@rel32@hi+12
11186 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
11187 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
11188 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
11189 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
11190 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
11191 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11192 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11193 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11194 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
11195 ; GFX9-NEXT: s_mov_b32 s33, s34
11196 ; GFX9-NEXT: s_waitcnt vmcnt(0)
11197 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11199 ; GFX10-LABEL: test_call_external_void_func_f32_imm_inreg:
11201 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11202 ; GFX10-NEXT: s_mov_b32 s34, s33
11203 ; GFX10-NEXT: s_mov_b32 s33, s32
11204 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11205 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11206 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11207 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11208 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
11209 ; GFX10-NEXT: s_addk_i32 s32, 0x200
11210 ; GFX10-NEXT: s_getpc_b64 s[34:35]
11211 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f32_inreg@rel32@lo+4
11212 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f32_inreg@rel32@hi+12
11213 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
11214 ; GFX10-NEXT: s_mov_b32 s4, 4.0
11215 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
11216 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
11217 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
11218 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
11219 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
11220 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
11221 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
11222 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11223 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11224 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11225 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11226 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
11227 ; GFX10-NEXT: s_mov_b32 s33, s34
11228 ; GFX10-NEXT: s_waitcnt vmcnt(0)
11229 ; GFX10-NEXT: s_setpc_b64 s[30:31]
11231 ; GFX11-LABEL: test_call_external_void_func_f32_imm_inreg:
11233 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11234 ; GFX11-NEXT: s_mov_b32 s0, s33
11235 ; GFX11-NEXT: s_mov_b32 s33, s32
11236 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11237 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
11238 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11239 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
11240 ; GFX11-NEXT: s_add_i32 s32, s32, 16
11241 ; GFX11-NEXT: s_getpc_b64 s[0:1]
11242 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_f32_inreg@rel32@lo+4
11243 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_f32_inreg@rel32@hi+12
11244 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
11245 ; GFX11-NEXT: s_mov_b32 s4, 4.0
11246 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
11247 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
11248 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
11249 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
11250 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
11251 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
11252 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
11253 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
11254 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11255 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
11256 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11257 ; GFX11-NEXT: s_add_i32 s32, s32, -16
11258 ; GFX11-NEXT: s_mov_b32 s33, s0
11259 ; GFX11-NEXT: s_waitcnt vmcnt(0)
11260 ; GFX11-NEXT: s_setpc_b64 s[30:31]
11262 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f32_imm_inreg:
11263 ; GFX10-SCRATCH: ; %bb.0:
11264 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11265 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
11266 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
11267 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11268 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
11269 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11270 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11271 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3
11272 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
11273 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
11274 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f32_inreg@rel32@lo+4
11275 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f32_inreg@rel32@hi+12
11276 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
11277 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 4.0
11278 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
11279 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
11280 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
11281 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
11282 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
11283 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
11284 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3
11285 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11286 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
11287 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11288 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11289 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
11290 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
11291 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
11292 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
11293 call amdgpu_gfx void @external_void_func_f32_inreg(float inreg 4.0)
11297 define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 {
11298 ; GFX9-LABEL: test_call_external_void_func_v2f32_imm_inreg:
11300 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11301 ; GFX9-NEXT: s_mov_b32 s34, s33
11302 ; GFX9-NEXT: s_mov_b32 s33, s32
11303 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11304 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11305 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11306 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
11307 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
11308 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
11309 ; GFX9-NEXT: s_addk_i32 s32, 0x400
11310 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
11311 ; GFX9-NEXT: s_mov_b32 s4, 1.0
11312 ; GFX9-NEXT: s_mov_b32 s5, 2.0
11313 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
11314 ; GFX9-NEXT: s_getpc_b64 s[34:35]
11315 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f32_inreg@rel32@lo+4
11316 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32_inreg@rel32@hi+12
11317 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
11318 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
11319 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
11320 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
11321 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
11322 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
11323 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11324 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11325 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11326 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
11327 ; GFX9-NEXT: s_mov_b32 s33, s34
11328 ; GFX9-NEXT: s_waitcnt vmcnt(0)
11329 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11331 ; GFX10-LABEL: test_call_external_void_func_v2f32_imm_inreg:
11333 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11334 ; GFX10-NEXT: s_mov_b32 s34, s33
11335 ; GFX10-NEXT: s_mov_b32 s33, s32
11336 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11337 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11338 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11339 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11340 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
11341 ; GFX10-NEXT: s_addk_i32 s32, 0x200
11342 ; GFX10-NEXT: s_getpc_b64 s[34:35]
11343 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f32_inreg@rel32@lo+4
11344 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32_inreg@rel32@hi+12
11345 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
11346 ; GFX10-NEXT: s_mov_b32 s4, 1.0
11347 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
11348 ; GFX10-NEXT: s_mov_b32 s5, 2.0
11349 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
11350 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
11351 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
11352 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
11353 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
11354 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
11355 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
11356 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
11357 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11358 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11359 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11360 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11361 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
11362 ; GFX10-NEXT: s_mov_b32 s33, s34
11363 ; GFX10-NEXT: s_waitcnt vmcnt(0)
11364 ; GFX10-NEXT: s_setpc_b64 s[30:31]
11366 ; GFX11-LABEL: test_call_external_void_func_v2f32_imm_inreg:
11368 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11369 ; GFX11-NEXT: s_mov_b32 s0, s33
11370 ; GFX11-NEXT: s_mov_b32 s33, s32
11371 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11372 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
11373 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11374 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
11375 ; GFX11-NEXT: s_add_i32 s32, s32, 16
11376 ; GFX11-NEXT: s_getpc_b64 s[0:1]
11377 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2f32_inreg@rel32@lo+4
11378 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32_inreg@rel32@hi+12
11379 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
11380 ; GFX11-NEXT: s_mov_b32 s4, 1.0
11381 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
11382 ; GFX11-NEXT: s_mov_b32 s5, 2.0
11383 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
11384 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
11385 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
11386 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
11387 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
11388 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
11389 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
11390 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
11391 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
11392 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11393 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
11394 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11395 ; GFX11-NEXT: s_add_i32 s32, s32, -16
11396 ; GFX11-NEXT: s_mov_b32 s33, s0
11397 ; GFX11-NEXT: s_waitcnt vmcnt(0)
11398 ; GFX11-NEXT: s_setpc_b64 s[30:31]
11400 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f32_imm_inreg:
11401 ; GFX10-SCRATCH: ; %bb.0:
11402 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11403 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
11404 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
11405 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11406 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
11407 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11408 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11409 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
11410 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
11411 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
11412 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f32_inreg@rel32@lo+4
11413 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32_inreg@rel32@hi+12
11414 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
11415 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0
11416 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
11417 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
11418 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
11419 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
11420 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
11421 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
11422 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
11423 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
11424 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
11425 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
11426 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11427 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
11428 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11429 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11430 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
11431 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
11432 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
11433 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
11434 call amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> inreg <float 1.0, float 2.0>)
11438 define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 {
11439 ; GFX9-LABEL: test_call_external_void_func_v3f32_imm_inreg:
11441 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11442 ; GFX9-NEXT: s_mov_b32 s34, s33
11443 ; GFX9-NEXT: s_mov_b32 s33, s32
11444 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11445 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11446 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11447 ; GFX9-NEXT: v_writelane_b32 v40, s34, 5
11448 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
11449 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
11450 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
11451 ; GFX9-NEXT: s_addk_i32 s32, 0x400
11452 ; GFX9-NEXT: v_writelane_b32 v40, s30, 3
11453 ; GFX9-NEXT: s_mov_b32 s4, 1.0
11454 ; GFX9-NEXT: s_mov_b32 s5, 2.0
11455 ; GFX9-NEXT: s_mov_b32 s6, 4.0
11456 ; GFX9-NEXT: v_writelane_b32 v40, s31, 4
11457 ; GFX9-NEXT: s_getpc_b64 s[34:35]
11458 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f32_inreg@rel32@lo+4
11459 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32_inreg@rel32@hi+12
11460 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
11461 ; GFX9-NEXT: v_readlane_b32 s31, v40, 4
11462 ; GFX9-NEXT: v_readlane_b32 s30, v40, 3
11463 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
11464 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
11465 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
11466 ; GFX9-NEXT: v_readlane_b32 s34, v40, 5
11467 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11468 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11469 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11470 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
11471 ; GFX9-NEXT: s_mov_b32 s33, s34
11472 ; GFX9-NEXT: s_waitcnt vmcnt(0)
11473 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11475 ; GFX10-LABEL: test_call_external_void_func_v3f32_imm_inreg:
11477 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11478 ; GFX10-NEXT: s_mov_b32 s34, s33
11479 ; GFX10-NEXT: s_mov_b32 s33, s32
11480 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11481 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11482 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11483 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11484 ; GFX10-NEXT: v_writelane_b32 v40, s34, 5
11485 ; GFX10-NEXT: s_addk_i32 s32, 0x200
11486 ; GFX10-NEXT: s_getpc_b64 s[34:35]
11487 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f32_inreg@rel32@lo+4
11488 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32_inreg@rel32@hi+12
11489 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
11490 ; GFX10-NEXT: s_mov_b32 s4, 1.0
11491 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
11492 ; GFX10-NEXT: s_mov_b32 s5, 2.0
11493 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
11494 ; GFX10-NEXT: s_mov_b32 s6, 4.0
11495 ; GFX10-NEXT: v_writelane_b32 v40, s30, 3
11496 ; GFX10-NEXT: v_writelane_b32 v40, s31, 4
11497 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
11498 ; GFX10-NEXT: v_readlane_b32 s31, v40, 4
11499 ; GFX10-NEXT: v_readlane_b32 s30, v40, 3
11500 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
11501 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
11502 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
11503 ; GFX10-NEXT: v_readlane_b32 s34, v40, 5
11504 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11505 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11506 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11507 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11508 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
11509 ; GFX10-NEXT: s_mov_b32 s33, s34
11510 ; GFX10-NEXT: s_waitcnt vmcnt(0)
11511 ; GFX10-NEXT: s_setpc_b64 s[30:31]
11513 ; GFX11-LABEL: test_call_external_void_func_v3f32_imm_inreg:
11515 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11516 ; GFX11-NEXT: s_mov_b32 s0, s33
11517 ; GFX11-NEXT: s_mov_b32 s33, s32
11518 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11519 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
11520 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11521 ; GFX11-NEXT: v_writelane_b32 v40, s0, 5
11522 ; GFX11-NEXT: s_add_i32 s32, s32, 16
11523 ; GFX11-NEXT: s_getpc_b64 s[0:1]
11524 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f32_inreg@rel32@lo+4
11525 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32_inreg@rel32@hi+12
11526 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
11527 ; GFX11-NEXT: s_mov_b32 s4, 1.0
11528 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
11529 ; GFX11-NEXT: s_mov_b32 s5, 2.0
11530 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
11531 ; GFX11-NEXT: s_mov_b32 s6, 4.0
11532 ; GFX11-NEXT: v_writelane_b32 v40, s30, 3
11533 ; GFX11-NEXT: v_writelane_b32 v40, s31, 4
11534 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
11535 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
11536 ; GFX11-NEXT: v_readlane_b32 s31, v40, 4
11537 ; GFX11-NEXT: v_readlane_b32 s30, v40, 3
11538 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
11539 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
11540 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
11541 ; GFX11-NEXT: v_readlane_b32 s0, v40, 5
11542 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11543 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
11544 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11545 ; GFX11-NEXT: s_add_i32 s32, s32, -16
11546 ; GFX11-NEXT: s_mov_b32 s33, s0
11547 ; GFX11-NEXT: s_waitcnt vmcnt(0)
11548 ; GFX11-NEXT: s_setpc_b64 s[30:31]
11550 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f32_imm_inreg:
11551 ; GFX10-SCRATCH: ; %bb.0:
11552 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11553 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
11554 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
11555 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11556 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
11557 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11558 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11559 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 5
11560 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
11561 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
11562 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f32_inreg@rel32@lo+4
11563 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32_inreg@rel32@hi+12
11564 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
11565 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0
11566 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
11567 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
11568 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
11569 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0
11570 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3
11571 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4
11572 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
11573 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4
11574 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3
11575 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
11576 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
11577 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
11578 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 5
11579 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11580 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
11581 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11582 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11583 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
11584 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
11585 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
11586 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
11587 call amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> inreg <float 1.0, float 2.0, float 4.0>)
11591 define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 {
11592 ; GFX9-LABEL: test_call_external_void_func_v5f32_imm_inreg:
11594 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11595 ; GFX9-NEXT: s_mov_b32 s34, s33
11596 ; GFX9-NEXT: s_mov_b32 s33, s32
11597 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11598 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11599 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11600 ; GFX9-NEXT: v_writelane_b32 v40, s34, 7
11601 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
11602 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
11603 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
11604 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
11605 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
11606 ; GFX9-NEXT: s_addk_i32 s32, 0x400
11607 ; GFX9-NEXT: v_writelane_b32 v40, s30, 5
11608 ; GFX9-NEXT: s_mov_b32 s4, 1.0
11609 ; GFX9-NEXT: s_mov_b32 s5, 2.0
11610 ; GFX9-NEXT: s_mov_b32 s6, 4.0
11611 ; GFX9-NEXT: s_mov_b32 s7, -1.0
11612 ; GFX9-NEXT: s_mov_b32 s8, 0.5
11613 ; GFX9-NEXT: v_writelane_b32 v40, s31, 6
11614 ; GFX9-NEXT: s_getpc_b64 s[34:35]
11615 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5f32_inreg@rel32@lo+4
11616 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32_inreg@rel32@hi+12
11617 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
11618 ; GFX9-NEXT: v_readlane_b32 s31, v40, 6
11619 ; GFX9-NEXT: v_readlane_b32 s30, v40, 5
11620 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
11621 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
11622 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
11623 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
11624 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
11625 ; GFX9-NEXT: v_readlane_b32 s34, v40, 7
11626 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11627 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11628 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11629 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
11630 ; GFX9-NEXT: s_mov_b32 s33, s34
11631 ; GFX9-NEXT: s_waitcnt vmcnt(0)
11632 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11634 ; GFX10-LABEL: test_call_external_void_func_v5f32_imm_inreg:
11636 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11637 ; GFX10-NEXT: s_mov_b32 s34, s33
11638 ; GFX10-NEXT: s_mov_b32 s33, s32
11639 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11640 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11641 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11642 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11643 ; GFX10-NEXT: v_writelane_b32 v40, s34, 7
11644 ; GFX10-NEXT: s_addk_i32 s32, 0x200
11645 ; GFX10-NEXT: s_getpc_b64 s[34:35]
11646 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5f32_inreg@rel32@lo+4
11647 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32_inreg@rel32@hi+12
11648 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
11649 ; GFX10-NEXT: s_mov_b32 s4, 1.0
11650 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
11651 ; GFX10-NEXT: s_mov_b32 s5, 2.0
11652 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
11653 ; GFX10-NEXT: s_mov_b32 s6, 4.0
11654 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
11655 ; GFX10-NEXT: s_mov_b32 s7, -1.0
11656 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
11657 ; GFX10-NEXT: s_mov_b32 s8, 0.5
11658 ; GFX10-NEXT: v_writelane_b32 v40, s30, 5
11659 ; GFX10-NEXT: v_writelane_b32 v40, s31, 6
11660 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
11661 ; GFX10-NEXT: v_readlane_b32 s31, v40, 6
11662 ; GFX10-NEXT: v_readlane_b32 s30, v40, 5
11663 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
11664 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
11665 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
11666 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
11667 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
11668 ; GFX10-NEXT: v_readlane_b32 s34, v40, 7
11669 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11670 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11671 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11672 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11673 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
11674 ; GFX10-NEXT: s_mov_b32 s33, s34
11675 ; GFX10-NEXT: s_waitcnt vmcnt(0)
11676 ; GFX10-NEXT: s_setpc_b64 s[30:31]
11678 ; GFX11-LABEL: test_call_external_void_func_v5f32_imm_inreg:
11680 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11681 ; GFX11-NEXT: s_mov_b32 s0, s33
11682 ; GFX11-NEXT: s_mov_b32 s33, s32
11683 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11684 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
11685 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11686 ; GFX11-NEXT: v_writelane_b32 v40, s0, 7
11687 ; GFX11-NEXT: s_add_i32 s32, s32, 16
11688 ; GFX11-NEXT: s_getpc_b64 s[0:1]
11689 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v5f32_inreg@rel32@lo+4
11690 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32_inreg@rel32@hi+12
11691 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
11692 ; GFX11-NEXT: s_mov_b32 s4, 1.0
11693 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
11694 ; GFX11-NEXT: s_mov_b32 s5, 2.0
11695 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
11696 ; GFX11-NEXT: s_mov_b32 s6, 4.0
11697 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
11698 ; GFX11-NEXT: s_mov_b32 s7, -1.0
11699 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
11700 ; GFX11-NEXT: s_mov_b32 s8, 0.5
11701 ; GFX11-NEXT: v_writelane_b32 v40, s30, 5
11702 ; GFX11-NEXT: v_writelane_b32 v40, s31, 6
11703 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
11704 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
11705 ; GFX11-NEXT: v_readlane_b32 s31, v40, 6
11706 ; GFX11-NEXT: v_readlane_b32 s30, v40, 5
11707 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
11708 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
11709 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
11710 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
11711 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
11712 ; GFX11-NEXT: v_readlane_b32 s0, v40, 7
11713 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11714 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
11715 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11716 ; GFX11-NEXT: s_add_i32 s32, s32, -16
11717 ; GFX11-NEXT: s_mov_b32 s33, s0
11718 ; GFX11-NEXT: s_waitcnt vmcnt(0)
11719 ; GFX11-NEXT: s_setpc_b64 s[30:31]
11721 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5f32_imm_inreg:
11722 ; GFX10-SCRATCH: ; %bb.0:
11723 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11724 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
11725 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
11726 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11727 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
11728 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11729 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11730 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 7
11731 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
11732 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
11733 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5f32_inreg@rel32@lo+4
11734 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32_inreg@rel32@hi+12
11735 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
11736 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0
11737 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
11738 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
11739 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
11740 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0
11741 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
11742 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, -1.0
11743 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
11744 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0.5
11745 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5
11746 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6
11747 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
11748 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6
11749 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5
11750 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
11751 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
11752 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
11753 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
11754 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
11755 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 7
11756 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11757 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
11758 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11759 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11760 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
11761 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
11762 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
11763 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
11764 call amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> inreg <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
11768 define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 {
11769 ; GFX9-LABEL: test_call_external_void_func_f64_imm_inreg:
11771 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11772 ; GFX9-NEXT: s_mov_b32 s34, s33
11773 ; GFX9-NEXT: s_mov_b32 s33, s32
11774 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11775 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11776 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11777 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
11778 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
11779 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
11780 ; GFX9-NEXT: s_addk_i32 s32, 0x400
11781 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
11782 ; GFX9-NEXT: s_mov_b32 s4, 0
11783 ; GFX9-NEXT: s_mov_b32 s5, 0x40100000
11784 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
11785 ; GFX9-NEXT: s_getpc_b64 s[34:35]
11786 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f64_inreg@rel32@lo+4
11787 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f64_inreg@rel32@hi+12
11788 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
11789 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
11790 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
11791 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
11792 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
11793 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
11794 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11795 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11796 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11797 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
11798 ; GFX9-NEXT: s_mov_b32 s33, s34
11799 ; GFX9-NEXT: s_waitcnt vmcnt(0)
11800 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11802 ; GFX10-LABEL: test_call_external_void_func_f64_imm_inreg:
11804 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11805 ; GFX10-NEXT: s_mov_b32 s34, s33
11806 ; GFX10-NEXT: s_mov_b32 s33, s32
11807 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11808 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11809 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11810 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11811 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
11812 ; GFX10-NEXT: s_addk_i32 s32, 0x200
11813 ; GFX10-NEXT: s_getpc_b64 s[34:35]
11814 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f64_inreg@rel32@lo+4
11815 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f64_inreg@rel32@hi+12
11816 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
11817 ; GFX10-NEXT: s_mov_b32 s4, 0
11818 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
11819 ; GFX10-NEXT: s_mov_b32 s5, 0x40100000
11820 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
11821 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
11822 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
11823 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
11824 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
11825 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
11826 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
11827 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
11828 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11829 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11830 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11831 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11832 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
11833 ; GFX10-NEXT: s_mov_b32 s33, s34
11834 ; GFX10-NEXT: s_waitcnt vmcnt(0)
11835 ; GFX10-NEXT: s_setpc_b64 s[30:31]
11837 ; GFX11-LABEL: test_call_external_void_func_f64_imm_inreg:
11839 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11840 ; GFX11-NEXT: s_mov_b32 s0, s33
11841 ; GFX11-NEXT: s_mov_b32 s33, s32
11842 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11843 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
11844 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11845 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
11846 ; GFX11-NEXT: s_add_i32 s32, s32, 16
11847 ; GFX11-NEXT: s_getpc_b64 s[0:1]
11848 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_f64_inreg@rel32@lo+4
11849 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_f64_inreg@rel32@hi+12
11850 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
11851 ; GFX11-NEXT: s_mov_b32 s4, 0
11852 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
11853 ; GFX11-NEXT: s_mov_b32 s5, 0x40100000
11854 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
11855 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
11856 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
11857 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
11858 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
11859 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
11860 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
11861 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
11862 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
11863 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11864 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
11865 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11866 ; GFX11-NEXT: s_add_i32 s32, s32, -16
11867 ; GFX11-NEXT: s_mov_b32 s33, s0
11868 ; GFX11-NEXT: s_waitcnt vmcnt(0)
11869 ; GFX11-NEXT: s_setpc_b64 s[30:31]
11871 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f64_imm_inreg:
11872 ; GFX10-SCRATCH: ; %bb.0:
11873 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11874 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
11875 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
11876 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11877 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
11878 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11879 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11880 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
11881 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
11882 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
11883 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f64_inreg@rel32@lo+4
11884 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f64_inreg@rel32@hi+12
11885 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
11886 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0
11887 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
11888 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40100000
11889 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
11890 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
11891 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
11892 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
11893 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
11894 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
11895 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
11896 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
11897 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
11898 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
11899 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
11900 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
11901 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
11902 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
11903 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
11904 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
11905 call amdgpu_gfx void @external_void_func_f64_inreg(double inreg 4.0)
11909 define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 {
11910 ; GFX9-LABEL: test_call_external_void_func_v2f64_imm_inreg:
11912 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11913 ; GFX9-NEXT: s_mov_b32 s34, s33
11914 ; GFX9-NEXT: s_mov_b32 s33, s32
11915 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11916 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11917 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11918 ; GFX9-NEXT: v_writelane_b32 v40, s34, 6
11919 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
11920 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
11921 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
11922 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
11923 ; GFX9-NEXT: s_addk_i32 s32, 0x400
11924 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4
11925 ; GFX9-NEXT: s_mov_b32 s4, 0
11926 ; GFX9-NEXT: s_mov_b32 s5, 2.0
11927 ; GFX9-NEXT: s_mov_b32 s6, 0
11928 ; GFX9-NEXT: s_mov_b32 s7, 0x40100000
11929 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5
11930 ; GFX9-NEXT: s_getpc_b64 s[34:35]
11931 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f64_inreg@rel32@lo+4
11932 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64_inreg@rel32@hi+12
11933 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
11934 ; GFX9-NEXT: v_readlane_b32 s31, v40, 5
11935 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4
11936 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
11937 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
11938 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
11939 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
11940 ; GFX9-NEXT: v_readlane_b32 s34, v40, 6
11941 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
11942 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11943 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
11944 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
11945 ; GFX9-NEXT: s_mov_b32 s33, s34
11946 ; GFX9-NEXT: s_waitcnt vmcnt(0)
11947 ; GFX9-NEXT: s_setpc_b64 s[30:31]
11949 ; GFX10-LABEL: test_call_external_void_func_v2f64_imm_inreg:
11951 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11952 ; GFX10-NEXT: s_mov_b32 s34, s33
11953 ; GFX10-NEXT: s_mov_b32 s33, s32
11954 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11955 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
11956 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11957 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11958 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6
11959 ; GFX10-NEXT: s_addk_i32 s32, 0x200
11960 ; GFX10-NEXT: s_getpc_b64 s[34:35]
11961 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f64_inreg@rel32@lo+4
11962 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64_inreg@rel32@hi+12
11963 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
11964 ; GFX10-NEXT: s_mov_b32 s4, 0
11965 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
11966 ; GFX10-NEXT: s_mov_b32 s5, 2.0
11967 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
11968 ; GFX10-NEXT: s_mov_b32 s6, 0
11969 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
11970 ; GFX10-NEXT: s_mov_b32 s7, 0x40100000
11971 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4
11972 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5
11973 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
11974 ; GFX10-NEXT: v_readlane_b32 s31, v40, 5
11975 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4
11976 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
11977 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
11978 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
11979 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
11980 ; GFX10-NEXT: v_readlane_b32 s34, v40, 6
11981 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
11982 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
11983 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
11984 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
11985 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
11986 ; GFX10-NEXT: s_mov_b32 s33, s34
11987 ; GFX10-NEXT: s_waitcnt vmcnt(0)
11988 ; GFX10-NEXT: s_setpc_b64 s[30:31]
11990 ; GFX11-LABEL: test_call_external_void_func_v2f64_imm_inreg:
11992 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11993 ; GFX11-NEXT: s_mov_b32 s0, s33
11994 ; GFX11-NEXT: s_mov_b32 s33, s32
11995 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
11996 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
11997 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
11998 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6
11999 ; GFX11-NEXT: s_add_i32 s32, s32, 16
12000 ; GFX11-NEXT: s_getpc_b64 s[0:1]
12001 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2f64_inreg@rel32@lo+4
12002 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64_inreg@rel32@hi+12
12003 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
12004 ; GFX11-NEXT: s_mov_b32 s4, 0
12005 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
12006 ; GFX11-NEXT: s_mov_b32 s5, 2.0
12007 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
12008 ; GFX11-NEXT: s_mov_b32 s6, 0
12009 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
12010 ; GFX11-NEXT: s_mov_b32 s7, 0x40100000
12011 ; GFX11-NEXT: v_writelane_b32 v40, s30, 4
12012 ; GFX11-NEXT: v_writelane_b32 v40, s31, 5
12013 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
12014 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
12015 ; GFX11-NEXT: v_readlane_b32 s31, v40, 5
12016 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4
12017 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
12018 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
12019 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
12020 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
12021 ; GFX11-NEXT: v_readlane_b32 s0, v40, 6
12022 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12023 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
12024 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12025 ; GFX11-NEXT: s_add_i32 s32, s32, -16
12026 ; GFX11-NEXT: s_mov_b32 s33, s0
12027 ; GFX11-NEXT: s_waitcnt vmcnt(0)
12028 ; GFX11-NEXT: s_setpc_b64 s[30:31]
12030 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f64_imm_inreg:
12031 ; GFX10-SCRATCH: ; %bb.0:
12032 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12033 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
12034 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
12035 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12036 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
12037 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12038 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12039 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6
12040 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
12041 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
12042 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f64_inreg@rel32@lo+4
12043 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64_inreg@rel32@hi+12
12044 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
12045 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0
12046 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
12047 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
12048 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
12049 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0
12050 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
12051 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000
12052 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4
12053 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5
12054 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
12055 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5
12056 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4
12057 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
12058 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
12059 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
12060 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
12061 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 6
12062 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12063 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
12064 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12065 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12066 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
12067 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
12068 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
12069 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
12070 call amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> inreg <double 2.0, double 4.0>)
12074 define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 {
12075 ; GFX9-LABEL: test_call_external_void_func_v3f64_imm_inreg:
12077 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12078 ; GFX9-NEXT: s_mov_b32 s34, s33
12079 ; GFX9-NEXT: s_mov_b32 s33, s32
12080 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12081 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12082 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12083 ; GFX9-NEXT: v_writelane_b32 v40, s34, 8
12084 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
12085 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
12086 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
12087 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
12088 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
12089 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5
12090 ; GFX9-NEXT: s_addk_i32 s32, 0x400
12091 ; GFX9-NEXT: v_writelane_b32 v40, s30, 6
12092 ; GFX9-NEXT: s_mov_b32 s4, 0
12093 ; GFX9-NEXT: s_mov_b32 s5, 2.0
12094 ; GFX9-NEXT: s_mov_b32 s6, 0
12095 ; GFX9-NEXT: s_mov_b32 s7, 0x40100000
12096 ; GFX9-NEXT: s_mov_b32 s8, 0
12097 ; GFX9-NEXT: s_mov_b32 s9, 0x40200000
12098 ; GFX9-NEXT: v_writelane_b32 v40, s31, 7
12099 ; GFX9-NEXT: s_getpc_b64 s[34:35]
12100 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f64_inreg@rel32@lo+4
12101 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64_inreg@rel32@hi+12
12102 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
12103 ; GFX9-NEXT: v_readlane_b32 s31, v40, 7
12104 ; GFX9-NEXT: v_readlane_b32 s30, v40, 6
12105 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5
12106 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
12107 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
12108 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
12109 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
12110 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
12111 ; GFX9-NEXT: v_readlane_b32 s34, v40, 8
12112 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12113 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12114 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12115 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
12116 ; GFX9-NEXT: s_mov_b32 s33, s34
12117 ; GFX9-NEXT: s_waitcnt vmcnt(0)
12118 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12120 ; GFX10-LABEL: test_call_external_void_func_v3f64_imm_inreg:
12122 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12123 ; GFX10-NEXT: s_mov_b32 s34, s33
12124 ; GFX10-NEXT: s_mov_b32 s33, s32
12125 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12126 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12127 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12128 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12129 ; GFX10-NEXT: v_writelane_b32 v40, s34, 8
12130 ; GFX10-NEXT: s_addk_i32 s32, 0x200
12131 ; GFX10-NEXT: s_getpc_b64 s[34:35]
12132 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f64_inreg@rel32@lo+4
12133 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64_inreg@rel32@hi+12
12134 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
12135 ; GFX10-NEXT: s_mov_b32 s4, 0
12136 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
12137 ; GFX10-NEXT: s_mov_b32 s5, 2.0
12138 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
12139 ; GFX10-NEXT: s_mov_b32 s6, 0
12140 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
12141 ; GFX10-NEXT: s_mov_b32 s7, 0x40100000
12142 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
12143 ; GFX10-NEXT: s_mov_b32 s8, 0
12144 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5
12145 ; GFX10-NEXT: s_mov_b32 s9, 0x40200000
12146 ; GFX10-NEXT: v_writelane_b32 v40, s30, 6
12147 ; GFX10-NEXT: v_writelane_b32 v40, s31, 7
12148 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
12149 ; GFX10-NEXT: v_readlane_b32 s31, v40, 7
12150 ; GFX10-NEXT: v_readlane_b32 s30, v40, 6
12151 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5
12152 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
12153 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
12154 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
12155 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
12156 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
12157 ; GFX10-NEXT: v_readlane_b32 s34, v40, 8
12158 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12159 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12160 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12161 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12162 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
12163 ; GFX10-NEXT: s_mov_b32 s33, s34
12164 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12165 ; GFX10-NEXT: s_setpc_b64 s[30:31]
12167 ; GFX11-LABEL: test_call_external_void_func_v3f64_imm_inreg:
12169 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12170 ; GFX11-NEXT: s_mov_b32 s0, s33
12171 ; GFX11-NEXT: s_mov_b32 s33, s32
12172 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12173 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
12174 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12175 ; GFX11-NEXT: v_writelane_b32 v40, s0, 8
12176 ; GFX11-NEXT: s_add_i32 s32, s32, 16
12177 ; GFX11-NEXT: s_getpc_b64 s[0:1]
12178 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f64_inreg@rel32@lo+4
12179 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64_inreg@rel32@hi+12
12180 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
12181 ; GFX11-NEXT: s_mov_b32 s4, 0
12182 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
12183 ; GFX11-NEXT: s_mov_b32 s5, 2.0
12184 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
12185 ; GFX11-NEXT: s_mov_b32 s6, 0
12186 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
12187 ; GFX11-NEXT: s_mov_b32 s7, 0x40100000
12188 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
12189 ; GFX11-NEXT: s_mov_b32 s8, 0
12190 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5
12191 ; GFX11-NEXT: s_mov_b32 s9, 0x40200000
12192 ; GFX11-NEXT: v_writelane_b32 v40, s30, 6
12193 ; GFX11-NEXT: v_writelane_b32 v40, s31, 7
12194 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
12195 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
12196 ; GFX11-NEXT: v_readlane_b32 s31, v40, 7
12197 ; GFX11-NEXT: v_readlane_b32 s30, v40, 6
12198 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5
12199 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
12200 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
12201 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
12202 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
12203 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
12204 ; GFX11-NEXT: v_readlane_b32 s0, v40, 8
12205 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12206 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
12207 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12208 ; GFX11-NEXT: s_add_i32 s32, s32, -16
12209 ; GFX11-NEXT: s_mov_b32 s33, s0
12210 ; GFX11-NEXT: s_waitcnt vmcnt(0)
12211 ; GFX11-NEXT: s_setpc_b64 s[30:31]
12213 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f64_imm_inreg:
12214 ; GFX10-SCRATCH: ; %bb.0:
12215 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12216 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
12217 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
12218 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12219 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
12220 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12221 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12222 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 8
12223 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
12224 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
12225 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64_inreg@rel32@lo+4
12226 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64_inreg@rel32@hi+12
12227 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
12228 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0
12229 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
12230 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
12231 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
12232 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0
12233 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
12234 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000
12235 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
12236 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0
12237 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
12238 ; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 0x40200000
12239 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6
12240 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7
12241 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
12242 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7
12243 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6
12244 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
12245 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
12246 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
12247 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
12248 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
12249 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
12250 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 8
12251 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12252 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
12253 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12254 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12255 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
12256 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
12257 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
12258 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
12259 call amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> inreg <double 2.0, double 4.0, double 8.0>)
12263 define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 {
12264 ; GFX9-LABEL: test_call_external_void_func_v2i16_inreg:
12266 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12267 ; GFX9-NEXT: s_mov_b32 s34, s33
12268 ; GFX9-NEXT: s_mov_b32 s33, s32
12269 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12270 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12271 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12272 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
12273 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
12274 ; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0
12275 ; GFX9-NEXT: s_addk_i32 s32, 0x400
12276 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
12277 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
12278 ; GFX9-NEXT: s_getpc_b64 s[34:35]
12279 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i16_inreg@rel32@lo+4
12280 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16_inreg@rel32@hi+12
12281 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
12282 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
12283 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
12284 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
12285 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
12286 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12287 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12288 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12289 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
12290 ; GFX9-NEXT: s_mov_b32 s33, s34
12291 ; GFX9-NEXT: s_waitcnt vmcnt(0)
12292 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12294 ; GFX10-LABEL: test_call_external_void_func_v2i16_inreg:
12296 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12297 ; GFX10-NEXT: s_mov_b32 s34, s33
12298 ; GFX10-NEXT: s_mov_b32 s33, s32
12299 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12300 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12301 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12302 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12303 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
12304 ; GFX10-NEXT: s_addk_i32 s32, 0x200
12305 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
12306 ; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0
12307 ; GFX10-NEXT: s_getpc_b64 s[34:35]
12308 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i16_inreg@rel32@lo+4
12309 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16_inreg@rel32@hi+12
12310 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
12311 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
12312 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
12313 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
12314 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
12315 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
12316 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
12317 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12318 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12319 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12320 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12321 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
12322 ; GFX10-NEXT: s_mov_b32 s33, s34
12323 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12324 ; GFX10-NEXT: s_setpc_b64 s[30:31]
12326 ; GFX11-LABEL: test_call_external_void_func_v2i16_inreg:
12328 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12329 ; GFX11-NEXT: s_mov_b32 s0, s33
12330 ; GFX11-NEXT: s_mov_b32 s33, s32
12331 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12332 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
12333 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12334 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
12335 ; GFX11-NEXT: s_add_i32 s32, s32, 16
12336 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
12337 ; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x0
12338 ; GFX11-NEXT: s_getpc_b64 s[0:1]
12339 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i16_inreg@rel32@lo+4
12340 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i16_inreg@rel32@hi+12
12341 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
12342 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
12343 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
12344 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
12345 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
12346 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
12347 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
12348 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
12349 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12350 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
12351 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12352 ; GFX11-NEXT: s_add_i32 s32, s32, -16
12353 ; GFX11-NEXT: s_mov_b32 s33, s0
12354 ; GFX11-NEXT: s_waitcnt vmcnt(0)
12355 ; GFX11-NEXT: s_setpc_b64 s[30:31]
12357 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i16_inreg:
12358 ; GFX10-SCRATCH: ; %bb.0:
12359 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12360 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
12361 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
12362 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12363 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
12364 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12365 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12366 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3
12367 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
12368 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
12369 ; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0
12370 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
12371 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i16_inreg@rel32@lo+4
12372 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i16_inreg@rel32@hi+12
12373 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
12374 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
12375 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
12376 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
12377 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
12378 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
12379 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3
12380 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12381 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
12382 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12383 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12384 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
12385 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
12386 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
12387 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
12388 %val = load <2 x i16>, ptr addrspace(4) undef
12389 call amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> inreg %val)
12393 define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 {
12394 ; GFX9-LABEL: test_call_external_void_func_v3i16_inreg:
12396 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12397 ; GFX9-NEXT: s_mov_b32 s34, s33
12398 ; GFX9-NEXT: s_mov_b32 s33, s32
12399 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12401 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12402 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
12403 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
12404 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
12405 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0
12406 ; GFX9-NEXT: s_addk_i32 s32, 0x400
12407 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
12408 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
12409 ; GFX9-NEXT: s_getpc_b64 s[34:35]
12410 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4
12411 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12
12412 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
12413 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
12414 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
12415 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
12416 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
12417 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
12418 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12419 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12420 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12421 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
12422 ; GFX9-NEXT: s_mov_b32 s33, s34
12423 ; GFX9-NEXT: s_waitcnt vmcnt(0)
12424 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12426 ; GFX10-LABEL: test_call_external_void_func_v3i16_inreg:
12428 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12429 ; GFX10-NEXT: s_mov_b32 s34, s33
12430 ; GFX10-NEXT: s_mov_b32 s33, s32
12431 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12432 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12433 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12434 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12435 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
12436 ; GFX10-NEXT: s_addk_i32 s32, 0x200
12437 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
12438 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
12439 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0
12440 ; GFX10-NEXT: s_getpc_b64 s[34:35]
12441 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4
12442 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12
12443 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
12444 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
12445 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
12446 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
12447 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
12448 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
12449 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
12450 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
12451 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12452 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12453 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12454 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12455 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
12456 ; GFX10-NEXT: s_mov_b32 s33, s34
12457 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12458 ; GFX10-NEXT: s_setpc_b64 s[30:31]
12460 ; GFX11-LABEL: test_call_external_void_func_v3i16_inreg:
12462 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12463 ; GFX11-NEXT: s_mov_b32 s0, s33
12464 ; GFX11-NEXT: s_mov_b32 s33, s32
12465 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12466 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
12467 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12468 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
12469 ; GFX11-NEXT: s_add_i32 s32, s32, 16
12470 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
12471 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
12472 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
12473 ; GFX11-NEXT: s_getpc_b64 s[0:1]
12474 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4
12475 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12
12476 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
12477 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
12478 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
12479 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
12480 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
12481 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
12482 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
12483 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
12484 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
12485 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12486 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
12487 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12488 ; GFX11-NEXT: s_add_i32 s32, s32, -16
12489 ; GFX11-NEXT: s_mov_b32 s33, s0
12490 ; GFX11-NEXT: s_waitcnt vmcnt(0)
12491 ; GFX11-NEXT: s_setpc_b64 s[30:31]
12493 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_inreg:
12494 ; GFX10-SCRATCH: ; %bb.0:
12495 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12496 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
12497 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
12498 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12499 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
12500 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12501 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12502 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
12503 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
12504 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
12505 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
12506 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
12507 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
12508 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4
12509 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12
12510 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
12511 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
12512 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
12513 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
12514 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
12515 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
12516 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
12517 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
12518 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12519 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
12520 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12521 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12522 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
12523 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
12524 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
12525 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
12526 %val = load <3 x i16>, ptr addrspace(4) undef
12527 call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg %val)
12531 define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 {
12532 ; GFX9-LABEL: test_call_external_void_func_v3f16_inreg:
12534 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12535 ; GFX9-NEXT: s_mov_b32 s34, s33
12536 ; GFX9-NEXT: s_mov_b32 s33, s32
12537 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12538 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12539 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12540 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
12541 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
12542 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
12543 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0
12544 ; GFX9-NEXT: s_addk_i32 s32, 0x400
12545 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
12546 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
12547 ; GFX9-NEXT: s_getpc_b64 s[34:35]
12548 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4
12549 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12
12550 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
12551 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
12552 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
12553 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
12554 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
12555 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
12556 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12557 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12558 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12559 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
12560 ; GFX9-NEXT: s_mov_b32 s33, s34
12561 ; GFX9-NEXT: s_waitcnt vmcnt(0)
12562 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12564 ; GFX10-LABEL: test_call_external_void_func_v3f16_inreg:
12566 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12567 ; GFX10-NEXT: s_mov_b32 s34, s33
12568 ; GFX10-NEXT: s_mov_b32 s33, s32
12569 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12570 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12571 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12572 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12573 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
12574 ; GFX10-NEXT: s_addk_i32 s32, 0x200
12575 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
12576 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
12577 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0
12578 ; GFX10-NEXT: s_getpc_b64 s[34:35]
12579 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4
12580 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12
12581 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
12582 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
12583 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
12584 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
12585 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
12586 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
12587 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
12588 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
12589 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12590 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12591 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12592 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12593 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
12594 ; GFX10-NEXT: s_mov_b32 s33, s34
12595 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12596 ; GFX10-NEXT: s_setpc_b64 s[30:31]
12598 ; GFX11-LABEL: test_call_external_void_func_v3f16_inreg:
12600 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12601 ; GFX11-NEXT: s_mov_b32 s0, s33
12602 ; GFX11-NEXT: s_mov_b32 s33, s32
12603 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12604 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
12605 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12606 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
12607 ; GFX11-NEXT: s_add_i32 s32, s32, 16
12608 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
12609 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
12610 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
12611 ; GFX11-NEXT: s_getpc_b64 s[0:1]
12612 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4
12613 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12
12614 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
12615 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
12616 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
12617 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
12618 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
12619 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
12620 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
12621 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
12622 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
12623 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12624 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
12625 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12626 ; GFX11-NEXT: s_add_i32 s32, s32, -16
12627 ; GFX11-NEXT: s_mov_b32 s33, s0
12628 ; GFX11-NEXT: s_waitcnt vmcnt(0)
12629 ; GFX11-NEXT: s_setpc_b64 s[30:31]
12631 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_inreg:
12632 ; GFX10-SCRATCH: ; %bb.0:
12633 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12634 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
12635 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
12636 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12637 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
12638 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12639 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12640 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
12641 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
12642 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
12643 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
12644 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
12645 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
12646 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4
12647 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12
12648 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
12649 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
12650 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
12651 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
12652 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
12653 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
12654 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
12655 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
12656 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12657 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
12658 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12659 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12660 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
12661 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
12662 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
12663 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
12664 %val = load <3 x half>, ptr addrspace(4) undef
12665 call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg %val)
12669 define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 {
12670 ; GFX9-LABEL: test_call_external_void_func_v3i16_imm_inreg:
12672 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12673 ; GFX9-NEXT: s_mov_b32 s34, s33
12674 ; GFX9-NEXT: s_mov_b32 s33, s32
12675 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12676 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12677 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12678 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
12679 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
12680 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
12681 ; GFX9-NEXT: s_addk_i32 s32, 0x400
12682 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
12683 ; GFX9-NEXT: s_mov_b32 s4, 0x20001
12684 ; GFX9-NEXT: s_mov_b32 s5, 3
12685 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
12686 ; GFX9-NEXT: s_getpc_b64 s[34:35]
12687 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4
12688 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12
12689 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
12690 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
12691 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
12692 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
12693 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
12694 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
12695 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12696 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12697 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12698 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
12699 ; GFX9-NEXT: s_mov_b32 s33, s34
12700 ; GFX9-NEXT: s_waitcnt vmcnt(0)
12701 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12703 ; GFX10-LABEL: test_call_external_void_func_v3i16_imm_inreg:
12705 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12706 ; GFX10-NEXT: s_mov_b32 s34, s33
12707 ; GFX10-NEXT: s_mov_b32 s33, s32
12708 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12709 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12710 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12711 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12712 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
12713 ; GFX10-NEXT: s_addk_i32 s32, 0x200
12714 ; GFX10-NEXT: s_getpc_b64 s[34:35]
12715 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4
12716 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12
12717 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
12718 ; GFX10-NEXT: s_mov_b32 s4, 0x20001
12719 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
12720 ; GFX10-NEXT: s_mov_b32 s5, 3
12721 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
12722 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
12723 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
12724 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
12725 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
12726 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
12727 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
12728 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
12729 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12730 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12731 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12732 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12733 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
12734 ; GFX10-NEXT: s_mov_b32 s33, s34
12735 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12736 ; GFX10-NEXT: s_setpc_b64 s[30:31]
12738 ; GFX11-LABEL: test_call_external_void_func_v3i16_imm_inreg:
12740 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12741 ; GFX11-NEXT: s_mov_b32 s0, s33
12742 ; GFX11-NEXT: s_mov_b32 s33, s32
12743 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12744 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
12745 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12746 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
12747 ; GFX11-NEXT: s_add_i32 s32, s32, 16
12748 ; GFX11-NEXT: s_getpc_b64 s[0:1]
12749 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4
12750 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12
12751 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
12752 ; GFX11-NEXT: s_mov_b32 s4, 0x20001
12753 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
12754 ; GFX11-NEXT: s_mov_b32 s5, 3
12755 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
12756 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
12757 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
12758 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
12759 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
12760 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
12761 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
12762 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
12763 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
12764 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12765 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
12766 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12767 ; GFX11-NEXT: s_add_i32 s32, s32, -16
12768 ; GFX11-NEXT: s_mov_b32 s33, s0
12769 ; GFX11-NEXT: s_waitcnt vmcnt(0)
12770 ; GFX11-NEXT: s_setpc_b64 s[30:31]
12772 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_imm_inreg:
12773 ; GFX10-SCRATCH: ; %bb.0:
12774 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12775 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
12776 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
12777 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12778 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
12779 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12780 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12781 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
12782 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
12783 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
12784 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4
12785 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12
12786 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
12787 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001
12788 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
12789 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 3
12790 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
12791 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
12792 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
12793 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
12794 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
12795 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
12796 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
12797 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
12798 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12799 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
12800 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12801 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12802 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
12803 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
12804 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
12805 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
12806 call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg <i16 1, i16 2, i16 3>)
12810 define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 {
12811 ; GFX9-LABEL: test_call_external_void_func_v3f16_imm_inreg:
12813 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12814 ; GFX9-NEXT: s_mov_b32 s34, s33
12815 ; GFX9-NEXT: s_mov_b32 s33, s32
12816 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12817 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12818 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12819 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
12820 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
12821 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
12822 ; GFX9-NEXT: s_addk_i32 s32, 0x400
12823 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
12824 ; GFX9-NEXT: s_mov_b32 s4, 0x40003c00
12825 ; GFX9-NEXT: s_movk_i32 s5, 0x4400
12826 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
12827 ; GFX9-NEXT: s_getpc_b64 s[34:35]
12828 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4
12829 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12
12830 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
12831 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
12832 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
12833 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
12834 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
12835 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
12836 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12837 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12838 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12839 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
12840 ; GFX9-NEXT: s_mov_b32 s33, s34
12841 ; GFX9-NEXT: s_waitcnt vmcnt(0)
12842 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12844 ; GFX10-LABEL: test_call_external_void_func_v3f16_imm_inreg:
12846 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12847 ; GFX10-NEXT: s_mov_b32 s34, s33
12848 ; GFX10-NEXT: s_mov_b32 s33, s32
12849 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12850 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12851 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12852 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12853 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
12854 ; GFX10-NEXT: s_addk_i32 s32, 0x200
12855 ; GFX10-NEXT: s_getpc_b64 s[34:35]
12856 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4
12857 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12
12858 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
12859 ; GFX10-NEXT: s_mov_b32 s4, 0x40003c00
12860 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
12861 ; GFX10-NEXT: s_movk_i32 s5, 0x4400
12862 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
12863 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
12864 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
12865 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
12866 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
12867 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
12868 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
12869 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
12870 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12871 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12872 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12873 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12874 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
12875 ; GFX10-NEXT: s_mov_b32 s33, s34
12876 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12877 ; GFX10-NEXT: s_setpc_b64 s[30:31]
12879 ; GFX11-LABEL: test_call_external_void_func_v3f16_imm_inreg:
12881 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12882 ; GFX11-NEXT: s_mov_b32 s0, s33
12883 ; GFX11-NEXT: s_mov_b32 s33, s32
12884 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12885 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
12886 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12887 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
12888 ; GFX11-NEXT: s_add_i32 s32, s32, 16
12889 ; GFX11-NEXT: s_getpc_b64 s[0:1]
12890 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4
12891 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12
12892 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
12893 ; GFX11-NEXT: s_mov_b32 s4, 0x40003c00
12894 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
12895 ; GFX11-NEXT: s_movk_i32 s5, 0x4400
12896 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
12897 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
12898 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
12899 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
12900 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
12901 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
12902 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
12903 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
12904 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
12905 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
12906 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
12907 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
12908 ; GFX11-NEXT: s_add_i32 s32, s32, -16
12909 ; GFX11-NEXT: s_mov_b32 s33, s0
12910 ; GFX11-NEXT: s_waitcnt vmcnt(0)
12911 ; GFX11-NEXT: s_setpc_b64 s[30:31]
12913 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_imm_inreg:
12914 ; GFX10-SCRATCH: ; %bb.0:
12915 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12916 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
12917 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
12918 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12919 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
12920 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12921 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12922 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
12923 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
12924 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
12925 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4
12926 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12
12927 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
12928 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x40003c00
12929 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
12930 ; GFX10-SCRATCH-NEXT: s_movk_i32 s5, 0x4400
12931 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
12932 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
12933 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
12934 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
12935 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
12936 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
12937 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
12938 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
12939 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
12940 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
12941 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
12942 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
12943 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
12944 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
12945 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
12946 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
12947 call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg <half 1.0, half 2.0, half 4.0>)
12951 define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 {
12952 ; GFX9-LABEL: test_call_external_void_func_v4i16_inreg:
12954 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12955 ; GFX9-NEXT: s_mov_b32 s34, s33
12956 ; GFX9-NEXT: s_mov_b32 s33, s32
12957 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12958 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12959 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12960 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
12961 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
12962 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
12963 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0
12964 ; GFX9-NEXT: s_addk_i32 s32, 0x400
12965 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
12966 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
12967 ; GFX9-NEXT: s_getpc_b64 s[34:35]
12968 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4
12969 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12
12970 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
12971 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
12972 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
12973 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
12974 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
12975 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
12976 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
12977 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
12978 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
12979 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
12980 ; GFX9-NEXT: s_mov_b32 s33, s34
12981 ; GFX9-NEXT: s_waitcnt vmcnt(0)
12982 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12984 ; GFX10-LABEL: test_call_external_void_func_v4i16_inreg:
12986 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12987 ; GFX10-NEXT: s_mov_b32 s34, s33
12988 ; GFX10-NEXT: s_mov_b32 s33, s32
12989 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
12990 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
12991 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
12992 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
12993 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
12994 ; GFX10-NEXT: s_addk_i32 s32, 0x200
12995 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
12996 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
12997 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0
12998 ; GFX10-NEXT: s_getpc_b64 s[34:35]
12999 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4
13000 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12
13001 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
13002 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
13003 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
13004 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
13005 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
13006 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
13007 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
13008 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
13009 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13010 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13011 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13012 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13013 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
13014 ; GFX10-NEXT: s_mov_b32 s33, s34
13015 ; GFX10-NEXT: s_waitcnt vmcnt(0)
13016 ; GFX10-NEXT: s_setpc_b64 s[30:31]
13018 ; GFX11-LABEL: test_call_external_void_func_v4i16_inreg:
13020 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13021 ; GFX11-NEXT: s_mov_b32 s0, s33
13022 ; GFX11-NEXT: s_mov_b32 s33, s32
13023 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13024 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
13025 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13026 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
13027 ; GFX11-NEXT: s_add_i32 s32, s32, 16
13028 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
13029 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
13030 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
13031 ; GFX11-NEXT: s_getpc_b64 s[0:1]
13032 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4
13033 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12
13034 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
13035 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
13036 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
13037 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
13038 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
13039 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
13040 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
13041 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
13042 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
13043 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13044 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
13045 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13046 ; GFX11-NEXT: s_add_i32 s32, s32, -16
13047 ; GFX11-NEXT: s_mov_b32 s33, s0
13048 ; GFX11-NEXT: s_waitcnt vmcnt(0)
13049 ; GFX11-NEXT: s_setpc_b64 s[30:31]
13051 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_inreg:
13052 ; GFX10-SCRATCH: ; %bb.0:
13053 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13054 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
13055 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
13056 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13057 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
13058 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13059 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13060 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
13061 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
13062 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
13063 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
13064 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
13065 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
13066 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4
13067 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12
13068 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
13069 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
13070 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
13071 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
13072 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
13073 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
13074 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
13075 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
13076 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13077 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
13078 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13079 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13080 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
13081 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
13082 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
13083 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
13084 %val = load <4 x i16>, ptr addrspace(4) undef
13085 call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg %val)
13089 define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 {
13090 ; GFX9-LABEL: test_call_external_void_func_v4i16_imm_inreg:
13092 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13093 ; GFX9-NEXT: s_mov_b32 s34, s33
13094 ; GFX9-NEXT: s_mov_b32 s33, s32
13095 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13096 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13097 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13098 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
13099 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
13100 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
13101 ; GFX9-NEXT: s_addk_i32 s32, 0x400
13102 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
13103 ; GFX9-NEXT: s_mov_b32 s4, 0x20001
13104 ; GFX9-NEXT: s_mov_b32 s5, 0x40003
13105 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
13106 ; GFX9-NEXT: s_getpc_b64 s[34:35]
13107 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4
13108 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12
13109 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
13110 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
13111 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
13112 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
13113 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
13114 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
13115 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13116 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13117 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13118 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
13119 ; GFX9-NEXT: s_mov_b32 s33, s34
13120 ; GFX9-NEXT: s_waitcnt vmcnt(0)
13121 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13123 ; GFX10-LABEL: test_call_external_void_func_v4i16_imm_inreg:
13125 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13126 ; GFX10-NEXT: s_mov_b32 s34, s33
13127 ; GFX10-NEXT: s_mov_b32 s33, s32
13128 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13129 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13130 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13131 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13132 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
13133 ; GFX10-NEXT: s_addk_i32 s32, 0x200
13134 ; GFX10-NEXT: s_getpc_b64 s[34:35]
13135 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4
13136 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12
13137 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
13138 ; GFX10-NEXT: s_mov_b32 s4, 0x20001
13139 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
13140 ; GFX10-NEXT: s_mov_b32 s5, 0x40003
13141 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
13142 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
13143 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
13144 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
13145 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
13146 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
13147 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
13148 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
13149 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13150 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13151 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13152 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13153 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
13154 ; GFX10-NEXT: s_mov_b32 s33, s34
13155 ; GFX10-NEXT: s_waitcnt vmcnt(0)
13156 ; GFX10-NEXT: s_setpc_b64 s[30:31]
13158 ; GFX11-LABEL: test_call_external_void_func_v4i16_imm_inreg:
13160 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13161 ; GFX11-NEXT: s_mov_b32 s0, s33
13162 ; GFX11-NEXT: s_mov_b32 s33, s32
13163 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13164 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
13165 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13166 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
13167 ; GFX11-NEXT: s_add_i32 s32, s32, 16
13168 ; GFX11-NEXT: s_getpc_b64 s[0:1]
13169 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4
13170 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12
13171 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
13172 ; GFX11-NEXT: s_mov_b32 s4, 0x20001
13173 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
13174 ; GFX11-NEXT: s_mov_b32 s5, 0x40003
13175 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
13176 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
13177 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
13178 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
13179 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
13180 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
13181 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
13182 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
13183 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
13184 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13185 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
13186 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13187 ; GFX11-NEXT: s_add_i32 s32, s32, -16
13188 ; GFX11-NEXT: s_mov_b32 s33, s0
13189 ; GFX11-NEXT: s_waitcnt vmcnt(0)
13190 ; GFX11-NEXT: s_setpc_b64 s[30:31]
13192 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_imm_inreg:
13193 ; GFX10-SCRATCH: ; %bb.0:
13194 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13195 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
13196 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
13197 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13198 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
13199 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13200 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13201 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
13202 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
13203 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
13204 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4
13205 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12
13206 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
13207 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001
13208 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
13209 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40003
13210 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
13211 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
13212 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
13213 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
13214 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
13215 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
13216 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
13217 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
13218 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13219 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
13220 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13221 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13222 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
13223 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
13224 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
13225 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
13226 call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg <i16 1, i16 2, i16 3, i16 4>)
13230 define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 {
13231 ; GFX9-LABEL: test_call_external_void_func_v2f16_inreg:
13233 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13234 ; GFX9-NEXT: s_mov_b32 s34, s33
13235 ; GFX9-NEXT: s_mov_b32 s33, s32
13236 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13237 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13238 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13239 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
13240 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
13241 ; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0
13242 ; GFX9-NEXT: s_addk_i32 s32, 0x400
13243 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
13244 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
13245 ; GFX9-NEXT: s_getpc_b64 s[34:35]
13246 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f16_inreg@rel32@lo+4
13247 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16_inreg@rel32@hi+12
13248 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
13249 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
13250 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
13251 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
13252 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
13253 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13254 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13255 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13256 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
13257 ; GFX9-NEXT: s_mov_b32 s33, s34
13258 ; GFX9-NEXT: s_waitcnt vmcnt(0)
13259 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13261 ; GFX10-LABEL: test_call_external_void_func_v2f16_inreg:
13263 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13264 ; GFX10-NEXT: s_mov_b32 s34, s33
13265 ; GFX10-NEXT: s_mov_b32 s33, s32
13266 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13267 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13268 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13269 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13270 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
13271 ; GFX10-NEXT: s_addk_i32 s32, 0x200
13272 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
13273 ; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0
13274 ; GFX10-NEXT: s_getpc_b64 s[34:35]
13275 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f16_inreg@rel32@lo+4
13276 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16_inreg@rel32@hi+12
13277 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
13278 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
13279 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
13280 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
13281 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
13282 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
13283 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
13284 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13285 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13286 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13287 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13288 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
13289 ; GFX10-NEXT: s_mov_b32 s33, s34
13290 ; GFX10-NEXT: s_waitcnt vmcnt(0)
13291 ; GFX10-NEXT: s_setpc_b64 s[30:31]
13293 ; GFX11-LABEL: test_call_external_void_func_v2f16_inreg:
13295 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13296 ; GFX11-NEXT: s_mov_b32 s0, s33
13297 ; GFX11-NEXT: s_mov_b32 s33, s32
13298 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13299 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
13300 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13301 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
13302 ; GFX11-NEXT: s_add_i32 s32, s32, 16
13303 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
13304 ; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x0
13305 ; GFX11-NEXT: s_getpc_b64 s[0:1]
13306 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2f16_inreg@rel32@lo+4
13307 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2f16_inreg@rel32@hi+12
13308 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
13309 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
13310 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
13311 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
13312 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
13313 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
13314 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
13315 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
13316 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13317 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
13318 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13319 ; GFX11-NEXT: s_add_i32 s32, s32, -16
13320 ; GFX11-NEXT: s_mov_b32 s33, s0
13321 ; GFX11-NEXT: s_waitcnt vmcnt(0)
13322 ; GFX11-NEXT: s_setpc_b64 s[30:31]
13324 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f16_inreg:
13325 ; GFX10-SCRATCH: ; %bb.0:
13326 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13327 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
13328 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
13329 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13330 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
13331 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13332 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13333 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3
13334 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
13335 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
13336 ; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0
13337 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
13338 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f16_inreg@rel32@lo+4
13339 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f16_inreg@rel32@hi+12
13340 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
13341 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
13342 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
13343 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
13344 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
13345 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
13346 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3
13347 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13348 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
13349 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13350 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13351 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
13352 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
13353 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
13354 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
13355 %val = load <2 x half>, ptr addrspace(4) undef
13356 call amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> inreg %val)
13360 define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 {
13361 ; GFX9-LABEL: test_call_external_void_func_v2i32_inreg:
13363 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13364 ; GFX9-NEXT: s_mov_b32 s34, s33
13365 ; GFX9-NEXT: s_mov_b32 s33, s32
13366 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13367 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13368 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13369 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
13370 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
13371 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
13372 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0
13373 ; GFX9-NEXT: s_addk_i32 s32, 0x400
13374 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
13375 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
13376 ; GFX9-NEXT: s_getpc_b64 s[34:35]
13377 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4
13378 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12
13379 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
13380 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
13381 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
13382 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
13383 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
13384 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
13385 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13386 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13387 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13388 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
13389 ; GFX9-NEXT: s_mov_b32 s33, s34
13390 ; GFX9-NEXT: s_waitcnt vmcnt(0)
13391 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13393 ; GFX10-LABEL: test_call_external_void_func_v2i32_inreg:
13395 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13396 ; GFX10-NEXT: s_mov_b32 s34, s33
13397 ; GFX10-NEXT: s_mov_b32 s33, s32
13398 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13399 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13400 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13401 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13402 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
13403 ; GFX10-NEXT: s_addk_i32 s32, 0x200
13404 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
13405 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
13406 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0
13407 ; GFX10-NEXT: s_getpc_b64 s[34:35]
13408 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4
13409 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12
13410 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
13411 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
13412 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
13413 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
13414 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
13415 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
13416 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
13417 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
13418 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13419 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13420 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13421 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13422 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
13423 ; GFX10-NEXT: s_mov_b32 s33, s34
13424 ; GFX10-NEXT: s_waitcnt vmcnt(0)
13425 ; GFX10-NEXT: s_setpc_b64 s[30:31]
13427 ; GFX11-LABEL: test_call_external_void_func_v2i32_inreg:
13429 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13430 ; GFX11-NEXT: s_mov_b32 s0, s33
13431 ; GFX11-NEXT: s_mov_b32 s33, s32
13432 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13433 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
13434 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13435 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
13436 ; GFX11-NEXT: s_add_i32 s32, s32, 16
13437 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
13438 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
13439 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
13440 ; GFX11-NEXT: s_getpc_b64 s[0:1]
13441 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4
13442 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12
13443 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
13444 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
13445 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
13446 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
13447 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
13448 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
13449 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
13450 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
13451 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
13452 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13453 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
13454 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13455 ; GFX11-NEXT: s_add_i32 s32, s32, -16
13456 ; GFX11-NEXT: s_mov_b32 s33, s0
13457 ; GFX11-NEXT: s_waitcnt vmcnt(0)
13458 ; GFX11-NEXT: s_setpc_b64 s[30:31]
13460 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_inreg:
13461 ; GFX10-SCRATCH: ; %bb.0:
13462 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13463 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
13464 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
13465 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13466 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
13467 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13468 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13469 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
13470 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
13471 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
13472 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
13473 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
13474 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
13475 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4
13476 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12
13477 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
13478 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
13479 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
13480 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
13481 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
13482 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
13483 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
13484 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
13485 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13486 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
13487 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13488 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13489 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
13490 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
13491 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
13492 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
13493 %val = load <2 x i32>, ptr addrspace(4) undef
13494 call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg %val)
13498 define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 {
13499 ; GFX9-LABEL: test_call_external_void_func_v2i32_imm_inreg:
13501 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13502 ; GFX9-NEXT: s_mov_b32 s34, s33
13503 ; GFX9-NEXT: s_mov_b32 s33, s32
13504 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13505 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13506 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13507 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
13508 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
13509 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
13510 ; GFX9-NEXT: s_addk_i32 s32, 0x400
13511 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
13512 ; GFX9-NEXT: s_mov_b32 s4, 1
13513 ; GFX9-NEXT: s_mov_b32 s5, 2
13514 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
13515 ; GFX9-NEXT: s_getpc_b64 s[34:35]
13516 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4
13517 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12
13518 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
13519 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
13520 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
13521 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
13522 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
13523 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
13524 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13525 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13526 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13527 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
13528 ; GFX9-NEXT: s_mov_b32 s33, s34
13529 ; GFX9-NEXT: s_waitcnt vmcnt(0)
13530 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13532 ; GFX10-LABEL: test_call_external_void_func_v2i32_imm_inreg:
13534 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13535 ; GFX10-NEXT: s_mov_b32 s34, s33
13536 ; GFX10-NEXT: s_mov_b32 s33, s32
13537 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13538 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13539 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13540 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13541 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
13542 ; GFX10-NEXT: s_addk_i32 s32, 0x200
13543 ; GFX10-NEXT: s_getpc_b64 s[34:35]
13544 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4
13545 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12
13546 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
13547 ; GFX10-NEXT: s_mov_b32 s4, 1
13548 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
13549 ; GFX10-NEXT: s_mov_b32 s5, 2
13550 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
13551 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
13552 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
13553 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
13554 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
13555 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
13556 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
13557 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
13558 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13559 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13560 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13561 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13562 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
13563 ; GFX10-NEXT: s_mov_b32 s33, s34
13564 ; GFX10-NEXT: s_waitcnt vmcnt(0)
13565 ; GFX10-NEXT: s_setpc_b64 s[30:31]
13567 ; GFX11-LABEL: test_call_external_void_func_v2i32_imm_inreg:
13569 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13570 ; GFX11-NEXT: s_mov_b32 s0, s33
13571 ; GFX11-NEXT: s_mov_b32 s33, s32
13572 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13573 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
13574 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13575 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
13576 ; GFX11-NEXT: s_add_i32 s32, s32, 16
13577 ; GFX11-NEXT: s_getpc_b64 s[0:1]
13578 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4
13579 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12
13580 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
13581 ; GFX11-NEXT: s_mov_b32 s4, 1
13582 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
13583 ; GFX11-NEXT: s_mov_b32 s5, 2
13584 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
13585 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
13586 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
13587 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
13588 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
13589 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
13590 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
13591 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
13592 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
13593 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13594 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
13595 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13596 ; GFX11-NEXT: s_add_i32 s32, s32, -16
13597 ; GFX11-NEXT: s_mov_b32 s33, s0
13598 ; GFX11-NEXT: s_waitcnt vmcnt(0)
13599 ; GFX11-NEXT: s_setpc_b64 s[30:31]
13601 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_imm_inreg:
13602 ; GFX10-SCRATCH: ; %bb.0:
13603 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13604 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
13605 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
13606 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13607 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
13608 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13609 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13610 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4
13611 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
13612 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
13613 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4
13614 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12
13615 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
13616 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
13617 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
13618 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
13619 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
13620 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
13621 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
13622 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
13623 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
13624 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
13625 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
13626 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4
13627 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13628 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
13629 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13630 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13631 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
13632 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
13633 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
13634 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
13635 call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg <i32 1, i32 2>)
13639 define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 {
13640 ; GFX9-LABEL: test_call_external_void_func_v3i32_imm_inreg:
13642 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13643 ; GFX9-NEXT: s_mov_b32 s34, s33
13644 ; GFX9-NEXT: s_mov_b32 s33, s32
13645 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13646 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13647 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13648 ; GFX9-NEXT: v_writelane_b32 v40, s34, 5
13649 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
13650 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
13651 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
13652 ; GFX9-NEXT: s_addk_i32 s32, 0x400
13653 ; GFX9-NEXT: v_writelane_b32 v40, s30, 3
13654 ; GFX9-NEXT: s_mov_b32 s4, 3
13655 ; GFX9-NEXT: s_mov_b32 s5, 4
13656 ; GFX9-NEXT: s_mov_b32 s6, 5
13657 ; GFX9-NEXT: v_writelane_b32 v40, s31, 4
13658 ; GFX9-NEXT: s_getpc_b64 s[34:35]
13659 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_inreg@rel32@lo+4
13660 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_inreg@rel32@hi+12
13661 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
13662 ; GFX9-NEXT: v_readlane_b32 s31, v40, 4
13663 ; GFX9-NEXT: v_readlane_b32 s30, v40, 3
13664 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
13665 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
13666 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
13667 ; GFX9-NEXT: v_readlane_b32 s34, v40, 5
13668 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13669 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13670 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13671 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
13672 ; GFX9-NEXT: s_mov_b32 s33, s34
13673 ; GFX9-NEXT: s_waitcnt vmcnt(0)
13674 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13676 ; GFX10-LABEL: test_call_external_void_func_v3i32_imm_inreg:
13678 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13679 ; GFX10-NEXT: s_mov_b32 s34, s33
13680 ; GFX10-NEXT: s_mov_b32 s33, s32
13681 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13682 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13683 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13684 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13685 ; GFX10-NEXT: v_writelane_b32 v40, s34, 5
13686 ; GFX10-NEXT: s_addk_i32 s32, 0x200
13687 ; GFX10-NEXT: s_getpc_b64 s[34:35]
13688 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_inreg@rel32@lo+4
13689 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_inreg@rel32@hi+12
13690 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
13691 ; GFX10-NEXT: s_mov_b32 s4, 3
13692 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
13693 ; GFX10-NEXT: s_mov_b32 s5, 4
13694 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
13695 ; GFX10-NEXT: s_mov_b32 s6, 5
13696 ; GFX10-NEXT: v_writelane_b32 v40, s30, 3
13697 ; GFX10-NEXT: v_writelane_b32 v40, s31, 4
13698 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
13699 ; GFX10-NEXT: v_readlane_b32 s31, v40, 4
13700 ; GFX10-NEXT: v_readlane_b32 s30, v40, 3
13701 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
13702 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
13703 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
13704 ; GFX10-NEXT: v_readlane_b32 s34, v40, 5
13705 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13706 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13707 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13708 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13709 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
13710 ; GFX10-NEXT: s_mov_b32 s33, s34
13711 ; GFX10-NEXT: s_waitcnt vmcnt(0)
13712 ; GFX10-NEXT: s_setpc_b64 s[30:31]
13714 ; GFX11-LABEL: test_call_external_void_func_v3i32_imm_inreg:
13716 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13717 ; GFX11-NEXT: s_mov_b32 s0, s33
13718 ; GFX11-NEXT: s_mov_b32 s33, s32
13719 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13720 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
13721 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13722 ; GFX11-NEXT: v_writelane_b32 v40, s0, 5
13723 ; GFX11-NEXT: s_add_i32 s32, s32, 16
13724 ; GFX11-NEXT: s_getpc_b64 s[0:1]
13725 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_inreg@rel32@lo+4
13726 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_inreg@rel32@hi+12
13727 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
13728 ; GFX11-NEXT: s_mov_b32 s4, 3
13729 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
13730 ; GFX11-NEXT: s_mov_b32 s5, 4
13731 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
13732 ; GFX11-NEXT: s_mov_b32 s6, 5
13733 ; GFX11-NEXT: v_writelane_b32 v40, s30, 3
13734 ; GFX11-NEXT: v_writelane_b32 v40, s31, 4
13735 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
13736 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
13737 ; GFX11-NEXT: v_readlane_b32 s31, v40, 4
13738 ; GFX11-NEXT: v_readlane_b32 s30, v40, 3
13739 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
13740 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
13741 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
13742 ; GFX11-NEXT: v_readlane_b32 s0, v40, 5
13743 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13744 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
13745 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13746 ; GFX11-NEXT: s_add_i32 s32, s32, -16
13747 ; GFX11-NEXT: s_mov_b32 s33, s0
13748 ; GFX11-NEXT: s_waitcnt vmcnt(0)
13749 ; GFX11-NEXT: s_setpc_b64 s[30:31]
13751 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_imm_inreg:
13752 ; GFX10-SCRATCH: ; %bb.0:
13753 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13754 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
13755 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
13756 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13757 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
13758 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13759 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13760 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 5
13761 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
13762 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
13763 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_inreg@rel32@lo+4
13764 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_inreg@rel32@hi+12
13765 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
13766 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3
13767 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
13768 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4
13769 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
13770 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5
13771 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3
13772 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4
13773 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
13774 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4
13775 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3
13776 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
13777 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
13778 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
13779 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 5
13780 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13781 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
13782 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13783 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13784 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
13785 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
13786 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
13787 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
13788 call amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> inreg <i32 3, i32 4, i32 5>)
13792 define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 {
13793 ; GFX9-LABEL: test_call_external_void_func_v3i32_i32_inreg:
13795 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13796 ; GFX9-NEXT: s_mov_b32 s34, s33
13797 ; GFX9-NEXT: s_mov_b32 s33, s32
13798 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13799 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13800 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13801 ; GFX9-NEXT: v_writelane_b32 v40, s34, 6
13802 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
13803 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
13804 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
13805 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
13806 ; GFX9-NEXT: s_addk_i32 s32, 0x400
13807 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4
13808 ; GFX9-NEXT: s_mov_b32 s4, 3
13809 ; GFX9-NEXT: s_mov_b32 s5, 4
13810 ; GFX9-NEXT: s_mov_b32 s6, 5
13811 ; GFX9-NEXT: s_mov_b32 s7, 6
13812 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5
13813 ; GFX9-NEXT: s_getpc_b64 s[34:35]
13814 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32_inreg@rel32@lo+4
13815 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32_inreg@rel32@hi+12
13816 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
13817 ; GFX9-NEXT: v_readlane_b32 s31, v40, 5
13818 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4
13819 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
13820 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
13821 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
13822 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
13823 ; GFX9-NEXT: v_readlane_b32 s34, v40, 6
13824 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13825 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13826 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13827 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
13828 ; GFX9-NEXT: s_mov_b32 s33, s34
13829 ; GFX9-NEXT: s_waitcnt vmcnt(0)
13830 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13832 ; GFX10-LABEL: test_call_external_void_func_v3i32_i32_inreg:
13834 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13835 ; GFX10-NEXT: s_mov_b32 s34, s33
13836 ; GFX10-NEXT: s_mov_b32 s33, s32
13837 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13838 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13839 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13840 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13841 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6
13842 ; GFX10-NEXT: s_addk_i32 s32, 0x200
13843 ; GFX10-NEXT: s_getpc_b64 s[34:35]
13844 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32_inreg@rel32@lo+4
13845 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32_inreg@rel32@hi+12
13846 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
13847 ; GFX10-NEXT: s_mov_b32 s4, 3
13848 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
13849 ; GFX10-NEXT: s_mov_b32 s5, 4
13850 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
13851 ; GFX10-NEXT: s_mov_b32 s6, 5
13852 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
13853 ; GFX10-NEXT: s_mov_b32 s7, 6
13854 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4
13855 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5
13856 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
13857 ; GFX10-NEXT: v_readlane_b32 s31, v40, 5
13858 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4
13859 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
13860 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
13861 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
13862 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
13863 ; GFX10-NEXT: v_readlane_b32 s34, v40, 6
13864 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
13865 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13866 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
13867 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
13868 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
13869 ; GFX10-NEXT: s_mov_b32 s33, s34
13870 ; GFX10-NEXT: s_waitcnt vmcnt(0)
13871 ; GFX10-NEXT: s_setpc_b64 s[30:31]
13873 ; GFX11-LABEL: test_call_external_void_func_v3i32_i32_inreg:
13875 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13876 ; GFX11-NEXT: s_mov_b32 s0, s33
13877 ; GFX11-NEXT: s_mov_b32 s33, s32
13878 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13879 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
13880 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13881 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6
13882 ; GFX11-NEXT: s_add_i32 s32, s32, 16
13883 ; GFX11-NEXT: s_getpc_b64 s[0:1]
13884 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_i32_inreg@rel32@lo+4
13885 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32_inreg@rel32@hi+12
13886 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
13887 ; GFX11-NEXT: s_mov_b32 s4, 3
13888 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
13889 ; GFX11-NEXT: s_mov_b32 s5, 4
13890 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
13891 ; GFX11-NEXT: s_mov_b32 s6, 5
13892 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
13893 ; GFX11-NEXT: s_mov_b32 s7, 6
13894 ; GFX11-NEXT: v_writelane_b32 v40, s30, 4
13895 ; GFX11-NEXT: v_writelane_b32 v40, s31, 5
13896 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
13897 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
13898 ; GFX11-NEXT: v_readlane_b32 s31, v40, 5
13899 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4
13900 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
13901 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
13902 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
13903 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
13904 ; GFX11-NEXT: v_readlane_b32 s0, v40, 6
13905 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
13906 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
13907 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
13908 ; GFX11-NEXT: s_add_i32 s32, s32, -16
13909 ; GFX11-NEXT: s_mov_b32 s33, s0
13910 ; GFX11-NEXT: s_waitcnt vmcnt(0)
13911 ; GFX11-NEXT: s_setpc_b64 s[30:31]
13913 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_i32_inreg:
13914 ; GFX10-SCRATCH: ; %bb.0:
13915 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13916 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
13917 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
13918 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13919 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
13920 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13921 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13922 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6
13923 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
13924 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
13925 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_i32_inreg@rel32@lo+4
13926 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32_inreg@rel32@hi+12
13927 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
13928 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3
13929 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
13930 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4
13931 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
13932 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5
13933 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
13934 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 6
13935 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4
13936 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5
13937 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
13938 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5
13939 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4
13940 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
13941 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
13942 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
13943 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
13944 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 6
13945 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
13946 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
13947 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
13948 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
13949 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
13950 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
13951 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
13952 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
13953 call amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> inreg <i32 3, i32 4, i32 5>, i32 inreg 6)
13957 define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 {
13958 ; GFX9-LABEL: test_call_external_void_func_v4i32_inreg:
13960 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13961 ; GFX9-NEXT: s_mov_b32 s34, s33
13962 ; GFX9-NEXT: s_mov_b32 s33, s32
13963 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13964 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
13965 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13966 ; GFX9-NEXT: v_writelane_b32 v40, s34, 6
13967 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
13968 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
13969 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
13970 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
13971 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0
13972 ; GFX9-NEXT: s_addk_i32 s32, 0x400
13973 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4
13974 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5
13975 ; GFX9-NEXT: s_getpc_b64 s[34:35]
13976 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4
13977 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12
13978 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
13979 ; GFX9-NEXT: v_readlane_b32 s31, v40, 5
13980 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4
13981 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
13982 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
13983 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
13984 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
13985 ; GFX9-NEXT: v_readlane_b32 s34, v40, 6
13986 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
13987 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
13988 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
13989 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
13990 ; GFX9-NEXT: s_mov_b32 s33, s34
13991 ; GFX9-NEXT: s_waitcnt vmcnt(0)
13992 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13994 ; GFX10-LABEL: test_call_external_void_func_v4i32_inreg:
13996 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13997 ; GFX10-NEXT: s_mov_b32 s34, s33
13998 ; GFX10-NEXT: s_mov_b32 s33, s32
13999 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14000 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14001 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14002 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14003 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6
14004 ; GFX10-NEXT: s_addk_i32 s32, 0x200
14005 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
14006 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
14007 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
14008 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
14009 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0
14010 ; GFX10-NEXT: s_getpc_b64 s[34:35]
14011 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4
14012 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12
14013 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4
14014 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5
14015 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
14016 ; GFX10-NEXT: v_readlane_b32 s31, v40, 5
14017 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4
14018 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
14019 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
14020 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
14021 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
14022 ; GFX10-NEXT: v_readlane_b32 s34, v40, 6
14023 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14024 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14025 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14026 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14027 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
14028 ; GFX10-NEXT: s_mov_b32 s33, s34
14029 ; GFX10-NEXT: s_waitcnt vmcnt(0)
14030 ; GFX10-NEXT: s_setpc_b64 s[30:31]
14032 ; GFX11-LABEL: test_call_external_void_func_v4i32_inreg:
14034 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14035 ; GFX11-NEXT: s_mov_b32 s0, s33
14036 ; GFX11-NEXT: s_mov_b32 s33, s32
14037 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14038 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
14039 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14040 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6
14041 ; GFX11-NEXT: s_add_i32 s32, s32, 16
14042 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
14043 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
14044 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
14045 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
14046 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
14047 ; GFX11-NEXT: s_getpc_b64 s[0:1]
14048 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4
14049 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12
14050 ; GFX11-NEXT: v_writelane_b32 v40, s30, 4
14051 ; GFX11-NEXT: v_writelane_b32 v40, s31, 5
14052 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
14053 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
14054 ; GFX11-NEXT: v_readlane_b32 s31, v40, 5
14055 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4
14056 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
14057 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
14058 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
14059 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
14060 ; GFX11-NEXT: v_readlane_b32 s0, v40, 6
14061 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14062 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
14063 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14064 ; GFX11-NEXT: s_add_i32 s32, s32, -16
14065 ; GFX11-NEXT: s_mov_b32 s33, s0
14066 ; GFX11-NEXT: s_waitcnt vmcnt(0)
14067 ; GFX11-NEXT: s_setpc_b64 s[30:31]
14069 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_inreg:
14070 ; GFX10-SCRATCH: ; %bb.0:
14071 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14072 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
14073 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
14074 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
14075 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
14076 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
14077 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
14078 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6
14079 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
14080 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
14081 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
14082 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
14083 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
14084 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
14085 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
14086 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4
14087 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12
14088 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4
14089 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5
14090 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
14091 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5
14092 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4
14093 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
14094 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
14095 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
14096 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
14097 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 6
14098 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
14099 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
14100 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
14101 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
14102 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
14103 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
14104 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
14105 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
14106 %val = load <4 x i32>, ptr addrspace(4) undef
14107 call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg %val)
14111 define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 {
14112 ; GFX9-LABEL: test_call_external_void_func_v4i32_imm_inreg:
14114 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14115 ; GFX9-NEXT: s_mov_b32 s34, s33
14116 ; GFX9-NEXT: s_mov_b32 s33, s32
14117 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
14118 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14119 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
14120 ; GFX9-NEXT: v_writelane_b32 v40, s34, 6
14121 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
14122 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
14123 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
14124 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
14125 ; GFX9-NEXT: s_addk_i32 s32, 0x400
14126 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4
14127 ; GFX9-NEXT: s_mov_b32 s4, 1
14128 ; GFX9-NEXT: s_mov_b32 s5, 2
14129 ; GFX9-NEXT: s_mov_b32 s6, 3
14130 ; GFX9-NEXT: s_mov_b32 s7, 4
14131 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5
14132 ; GFX9-NEXT: s_getpc_b64 s[34:35]
14133 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4
14134 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12
14135 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
14136 ; GFX9-NEXT: v_readlane_b32 s31, v40, 5
14137 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4
14138 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
14139 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
14140 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
14141 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
14142 ; GFX9-NEXT: v_readlane_b32 s34, v40, 6
14143 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
14144 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14145 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
14146 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
14147 ; GFX9-NEXT: s_mov_b32 s33, s34
14148 ; GFX9-NEXT: s_waitcnt vmcnt(0)
14149 ; GFX9-NEXT: s_setpc_b64 s[30:31]
14151 ; GFX10-LABEL: test_call_external_void_func_v4i32_imm_inreg:
14153 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14154 ; GFX10-NEXT: s_mov_b32 s34, s33
14155 ; GFX10-NEXT: s_mov_b32 s33, s32
14156 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14157 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14158 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14159 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14160 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6
14161 ; GFX10-NEXT: s_addk_i32 s32, 0x200
14162 ; GFX10-NEXT: s_getpc_b64 s[34:35]
14163 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4
14164 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12
14165 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
14166 ; GFX10-NEXT: s_mov_b32 s4, 1
14167 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
14168 ; GFX10-NEXT: s_mov_b32 s5, 2
14169 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
14170 ; GFX10-NEXT: s_mov_b32 s6, 3
14171 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
14172 ; GFX10-NEXT: s_mov_b32 s7, 4
14173 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4
14174 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5
14175 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
14176 ; GFX10-NEXT: v_readlane_b32 s31, v40, 5
14177 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4
14178 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
14179 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
14180 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
14181 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
14182 ; GFX10-NEXT: v_readlane_b32 s34, v40, 6
14183 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14184 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14185 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14186 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14187 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
14188 ; GFX10-NEXT: s_mov_b32 s33, s34
14189 ; GFX10-NEXT: s_waitcnt vmcnt(0)
14190 ; GFX10-NEXT: s_setpc_b64 s[30:31]
14192 ; GFX11-LABEL: test_call_external_void_func_v4i32_imm_inreg:
14194 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14195 ; GFX11-NEXT: s_mov_b32 s0, s33
14196 ; GFX11-NEXT: s_mov_b32 s33, s32
14197 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14198 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
14199 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14200 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6
14201 ; GFX11-NEXT: s_add_i32 s32, s32, 16
14202 ; GFX11-NEXT: s_getpc_b64 s[0:1]
14203 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4
14204 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12
14205 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
14206 ; GFX11-NEXT: s_mov_b32 s4, 1
14207 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
14208 ; GFX11-NEXT: s_mov_b32 s5, 2
14209 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
14210 ; GFX11-NEXT: s_mov_b32 s6, 3
14211 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
14212 ; GFX11-NEXT: s_mov_b32 s7, 4
14213 ; GFX11-NEXT: v_writelane_b32 v40, s30, 4
14214 ; GFX11-NEXT: v_writelane_b32 v40, s31, 5
14215 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
14216 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
14217 ; GFX11-NEXT: v_readlane_b32 s31, v40, 5
14218 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4
14219 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
14220 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
14221 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
14222 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
14223 ; GFX11-NEXT: v_readlane_b32 s0, v40, 6
14224 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14225 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
14226 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14227 ; GFX11-NEXT: s_add_i32 s32, s32, -16
14228 ; GFX11-NEXT: s_mov_b32 s33, s0
14229 ; GFX11-NEXT: s_waitcnt vmcnt(0)
14230 ; GFX11-NEXT: s_setpc_b64 s[30:31]
14232 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_imm_inreg:
14233 ; GFX10-SCRATCH: ; %bb.0:
14234 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14235 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
14236 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
14237 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
14238 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
14239 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
14240 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
14241 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6
14242 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
14243 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
14244 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4
14245 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12
14246 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
14247 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
14248 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
14249 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
14250 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
14251 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
14252 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
14253 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
14254 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4
14255 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5
14256 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
14257 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5
14258 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4
14259 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
14260 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
14261 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
14262 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
14263 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 6
14264 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
14265 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
14266 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
14267 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
14268 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
14269 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
14270 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
14271 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
14272 call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg <i32 1, i32 2, i32 3, i32 4>)
14276 define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 {
14277 ; GFX9-LABEL: test_call_external_void_func_v5i32_imm_inreg:
14279 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14280 ; GFX9-NEXT: s_mov_b32 s34, s33
14281 ; GFX9-NEXT: s_mov_b32 s33, s32
14282 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
14283 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14284 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
14285 ; GFX9-NEXT: v_writelane_b32 v40, s34, 7
14286 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
14287 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
14288 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
14289 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
14290 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
14291 ; GFX9-NEXT: s_addk_i32 s32, 0x400
14292 ; GFX9-NEXT: v_writelane_b32 v40, s30, 5
14293 ; GFX9-NEXT: s_mov_b32 s4, 1
14294 ; GFX9-NEXT: s_mov_b32 s5, 2
14295 ; GFX9-NEXT: s_mov_b32 s6, 3
14296 ; GFX9-NEXT: s_mov_b32 s7, 4
14297 ; GFX9-NEXT: s_mov_b32 s8, 5
14298 ; GFX9-NEXT: v_writelane_b32 v40, s31, 6
14299 ; GFX9-NEXT: s_getpc_b64 s[34:35]
14300 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5i32_inreg@rel32@lo+4
14301 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32_inreg@rel32@hi+12
14302 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
14303 ; GFX9-NEXT: v_readlane_b32 s31, v40, 6
14304 ; GFX9-NEXT: v_readlane_b32 s30, v40, 5
14305 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
14306 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
14307 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
14308 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
14309 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
14310 ; GFX9-NEXT: v_readlane_b32 s34, v40, 7
14311 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
14312 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14313 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
14314 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
14315 ; GFX9-NEXT: s_mov_b32 s33, s34
14316 ; GFX9-NEXT: s_waitcnt vmcnt(0)
14317 ; GFX9-NEXT: s_setpc_b64 s[30:31]
14319 ; GFX10-LABEL: test_call_external_void_func_v5i32_imm_inreg:
14321 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14322 ; GFX10-NEXT: s_mov_b32 s34, s33
14323 ; GFX10-NEXT: s_mov_b32 s33, s32
14324 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14325 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14326 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14327 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14328 ; GFX10-NEXT: v_writelane_b32 v40, s34, 7
14329 ; GFX10-NEXT: s_addk_i32 s32, 0x200
14330 ; GFX10-NEXT: s_getpc_b64 s[34:35]
14331 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5i32_inreg@rel32@lo+4
14332 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32_inreg@rel32@hi+12
14333 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
14334 ; GFX10-NEXT: s_mov_b32 s4, 1
14335 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
14336 ; GFX10-NEXT: s_mov_b32 s5, 2
14337 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
14338 ; GFX10-NEXT: s_mov_b32 s6, 3
14339 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
14340 ; GFX10-NEXT: s_mov_b32 s7, 4
14341 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
14342 ; GFX10-NEXT: s_mov_b32 s8, 5
14343 ; GFX10-NEXT: v_writelane_b32 v40, s30, 5
14344 ; GFX10-NEXT: v_writelane_b32 v40, s31, 6
14345 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
14346 ; GFX10-NEXT: v_readlane_b32 s31, v40, 6
14347 ; GFX10-NEXT: v_readlane_b32 s30, v40, 5
14348 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
14349 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
14350 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
14351 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
14352 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
14353 ; GFX10-NEXT: v_readlane_b32 s34, v40, 7
14354 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14355 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14356 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14357 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14358 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
14359 ; GFX10-NEXT: s_mov_b32 s33, s34
14360 ; GFX10-NEXT: s_waitcnt vmcnt(0)
14361 ; GFX10-NEXT: s_setpc_b64 s[30:31]
14363 ; GFX11-LABEL: test_call_external_void_func_v5i32_imm_inreg:
14365 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14366 ; GFX11-NEXT: s_mov_b32 s0, s33
14367 ; GFX11-NEXT: s_mov_b32 s33, s32
14368 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14369 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
14370 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14371 ; GFX11-NEXT: v_writelane_b32 v40, s0, 7
14372 ; GFX11-NEXT: s_add_i32 s32, s32, 16
14373 ; GFX11-NEXT: s_getpc_b64 s[0:1]
14374 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v5i32_inreg@rel32@lo+4
14375 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32_inreg@rel32@hi+12
14376 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
14377 ; GFX11-NEXT: s_mov_b32 s4, 1
14378 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
14379 ; GFX11-NEXT: s_mov_b32 s5, 2
14380 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
14381 ; GFX11-NEXT: s_mov_b32 s6, 3
14382 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
14383 ; GFX11-NEXT: s_mov_b32 s7, 4
14384 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
14385 ; GFX11-NEXT: s_mov_b32 s8, 5
14386 ; GFX11-NEXT: v_writelane_b32 v40, s30, 5
14387 ; GFX11-NEXT: v_writelane_b32 v40, s31, 6
14388 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
14389 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
14390 ; GFX11-NEXT: v_readlane_b32 s31, v40, 6
14391 ; GFX11-NEXT: v_readlane_b32 s30, v40, 5
14392 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
14393 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
14394 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
14395 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
14396 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
14397 ; GFX11-NEXT: v_readlane_b32 s0, v40, 7
14398 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14399 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
14400 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14401 ; GFX11-NEXT: s_add_i32 s32, s32, -16
14402 ; GFX11-NEXT: s_mov_b32 s33, s0
14403 ; GFX11-NEXT: s_waitcnt vmcnt(0)
14404 ; GFX11-NEXT: s_setpc_b64 s[30:31]
14406 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i32_imm_inreg:
14407 ; GFX10-SCRATCH: ; %bb.0:
14408 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14409 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
14410 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
14411 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
14412 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
14413 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
14414 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
14415 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 7
14416 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
14417 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
14418 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5i32_inreg@rel32@lo+4
14419 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32_inreg@rel32@hi+12
14420 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
14421 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
14422 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
14423 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
14424 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
14425 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
14426 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
14427 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
14428 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
14429 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5
14430 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5
14431 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6
14432 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
14433 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6
14434 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5
14435 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
14436 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
14437 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
14438 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
14439 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
14440 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 7
14441 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
14442 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
14443 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
14444 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
14445 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
14446 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
14447 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
14448 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
14449 call amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> inreg <i32 1, i32 2, i32 3, i32 4, i32 5>)
14453 define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 {
14454 ; GFX9-LABEL: test_call_external_void_func_v8i32_inreg:
14456 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14457 ; GFX9-NEXT: s_mov_b32 s34, s33
14458 ; GFX9-NEXT: s_mov_b32 s33, s32
14459 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
14460 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14461 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
14462 ; GFX9-NEXT: v_writelane_b32 v40, s34, 10
14463 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
14464 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
14465 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
14466 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
14467 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
14468 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
14469 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5
14470 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6
14471 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7
14472 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
14473 ; GFX9-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0
14474 ; GFX9-NEXT: s_addk_i32 s32, 0x400
14475 ; GFX9-NEXT: v_writelane_b32 v40, s30, 8
14476 ; GFX9-NEXT: v_writelane_b32 v40, s31, 9
14477 ; GFX9-NEXT: s_getpc_b64 s[34:35]
14478 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4
14479 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12
14480 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
14481 ; GFX9-NEXT: v_readlane_b32 s31, v40, 9
14482 ; GFX9-NEXT: v_readlane_b32 s30, v40, 8
14483 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7
14484 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6
14485 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5
14486 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
14487 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
14488 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
14489 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
14490 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
14491 ; GFX9-NEXT: v_readlane_b32 s34, v40, 10
14492 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
14493 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14494 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
14495 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
14496 ; GFX9-NEXT: s_mov_b32 s33, s34
14497 ; GFX9-NEXT: s_waitcnt vmcnt(0)
14498 ; GFX9-NEXT: s_setpc_b64 s[30:31]
14500 ; GFX10-LABEL: test_call_external_void_func_v8i32_inreg:
14502 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14503 ; GFX10-NEXT: s_mov_b32 s34, s33
14504 ; GFX10-NEXT: s_mov_b32 s33, s32
14505 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14506 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14507 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14508 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14509 ; GFX10-NEXT: v_writelane_b32 v40, s34, 10
14510 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
14511 ; GFX10-NEXT: s_addk_i32 s32, 0x200
14512 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
14513 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
14514 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
14515 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
14516 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
14517 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5
14518 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6
14519 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7
14520 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
14521 ; GFX10-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0
14522 ; GFX10-NEXT: s_getpc_b64 s[34:35]
14523 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4
14524 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12
14525 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8
14526 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9
14527 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
14528 ; GFX10-NEXT: v_readlane_b32 s31, v40, 9
14529 ; GFX10-NEXT: v_readlane_b32 s30, v40, 8
14530 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7
14531 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6
14532 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5
14533 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
14534 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
14535 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
14536 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
14537 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
14538 ; GFX10-NEXT: v_readlane_b32 s34, v40, 10
14539 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14540 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14541 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14542 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14543 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
14544 ; GFX10-NEXT: s_mov_b32 s33, s34
14545 ; GFX10-NEXT: s_waitcnt vmcnt(0)
14546 ; GFX10-NEXT: s_setpc_b64 s[30:31]
14548 ; GFX11-LABEL: test_call_external_void_func_v8i32_inreg:
14550 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14551 ; GFX11-NEXT: s_mov_b32 s0, s33
14552 ; GFX11-NEXT: s_mov_b32 s33, s32
14553 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14554 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
14555 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14556 ; GFX11-NEXT: v_writelane_b32 v40, s0, 10
14557 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
14558 ; GFX11-NEXT: s_add_i32 s32, s32, 16
14559 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
14560 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
14561 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
14562 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
14563 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
14564 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5
14565 ; GFX11-NEXT: v_writelane_b32 v40, s10, 6
14566 ; GFX11-NEXT: v_writelane_b32 v40, s11, 7
14567 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
14568 ; GFX11-NEXT: s_load_b256 s[4:11], s[0:1], 0x0
14569 ; GFX11-NEXT: s_getpc_b64 s[0:1]
14570 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4
14571 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12
14572 ; GFX11-NEXT: v_writelane_b32 v40, s30, 8
14573 ; GFX11-NEXT: v_writelane_b32 v40, s31, 9
14574 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
14575 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
14576 ; GFX11-NEXT: v_readlane_b32 s31, v40, 9
14577 ; GFX11-NEXT: v_readlane_b32 s30, v40, 8
14578 ; GFX11-NEXT: v_readlane_b32 s11, v40, 7
14579 ; GFX11-NEXT: v_readlane_b32 s10, v40, 6
14580 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5
14581 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
14582 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
14583 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
14584 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
14585 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
14586 ; GFX11-NEXT: v_readlane_b32 s0, v40, 10
14587 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14588 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
14589 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14590 ; GFX11-NEXT: s_add_i32 s32, s32, -16
14591 ; GFX11-NEXT: s_mov_b32 s33, s0
14592 ; GFX11-NEXT: s_waitcnt vmcnt(0)
14593 ; GFX11-NEXT: s_setpc_b64 s[30:31]
14595 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_inreg:
14596 ; GFX10-SCRATCH: ; %bb.0:
14597 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14598 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
14599 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
14600 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
14601 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
14602 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
14603 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
14604 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10
14605 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
14606 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
14607 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
14608 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
14609 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
14610 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
14611 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
14612 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
14613 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6
14614 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7
14615 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
14616 ; GFX10-SCRATCH-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0
14617 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
14618 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4
14619 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12
14620 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8
14621 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9
14622 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
14623 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9
14624 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8
14625 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7
14626 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6
14627 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
14628 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
14629 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
14630 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
14631 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
14632 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
14633 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 10
14634 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
14635 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
14636 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
14637 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
14638 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
14639 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
14640 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
14641 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
14642 %ptr = load ptr addrspace(4), ptr addrspace(4) undef
14643 %val = load <8 x i32>, ptr addrspace(4) %ptr
14644 call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg %val)
14648 define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 {
14649 ; GFX9-LABEL: test_call_external_void_func_v8i32_imm_inreg:
14651 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14652 ; GFX9-NEXT: s_mov_b32 s34, s33
14653 ; GFX9-NEXT: s_mov_b32 s33, s32
14654 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
14655 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14656 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
14657 ; GFX9-NEXT: v_writelane_b32 v40, s34, 10
14658 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
14659 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
14660 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
14661 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
14662 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
14663 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5
14664 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6
14665 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7
14666 ; GFX9-NEXT: s_addk_i32 s32, 0x400
14667 ; GFX9-NEXT: v_writelane_b32 v40, s30, 8
14668 ; GFX9-NEXT: s_mov_b32 s4, 1
14669 ; GFX9-NEXT: s_mov_b32 s5, 2
14670 ; GFX9-NEXT: s_mov_b32 s6, 3
14671 ; GFX9-NEXT: s_mov_b32 s7, 4
14672 ; GFX9-NEXT: s_mov_b32 s8, 5
14673 ; GFX9-NEXT: s_mov_b32 s9, 6
14674 ; GFX9-NEXT: s_mov_b32 s10, 7
14675 ; GFX9-NEXT: s_mov_b32 s11, 8
14676 ; GFX9-NEXT: v_writelane_b32 v40, s31, 9
14677 ; GFX9-NEXT: s_getpc_b64 s[34:35]
14678 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4
14679 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12
14680 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
14681 ; GFX9-NEXT: v_readlane_b32 s31, v40, 9
14682 ; GFX9-NEXT: v_readlane_b32 s30, v40, 8
14683 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7
14684 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6
14685 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5
14686 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
14687 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
14688 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
14689 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
14690 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
14691 ; GFX9-NEXT: v_readlane_b32 s34, v40, 10
14692 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
14693 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14694 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
14695 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
14696 ; GFX9-NEXT: s_mov_b32 s33, s34
14697 ; GFX9-NEXT: s_waitcnt vmcnt(0)
14698 ; GFX9-NEXT: s_setpc_b64 s[30:31]
14700 ; GFX10-LABEL: test_call_external_void_func_v8i32_imm_inreg:
14702 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14703 ; GFX10-NEXT: s_mov_b32 s34, s33
14704 ; GFX10-NEXT: s_mov_b32 s33, s32
14705 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14706 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14707 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14708 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14709 ; GFX10-NEXT: v_writelane_b32 v40, s34, 10
14710 ; GFX10-NEXT: s_addk_i32 s32, 0x200
14711 ; GFX10-NEXT: s_getpc_b64 s[34:35]
14712 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4
14713 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12
14714 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
14715 ; GFX10-NEXT: s_mov_b32 s4, 1
14716 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
14717 ; GFX10-NEXT: s_mov_b32 s5, 2
14718 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
14719 ; GFX10-NEXT: s_mov_b32 s6, 3
14720 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
14721 ; GFX10-NEXT: s_mov_b32 s7, 4
14722 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
14723 ; GFX10-NEXT: s_mov_b32 s8, 5
14724 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5
14725 ; GFX10-NEXT: s_mov_b32 s9, 6
14726 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6
14727 ; GFX10-NEXT: s_mov_b32 s10, 7
14728 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7
14729 ; GFX10-NEXT: s_mov_b32 s11, 8
14730 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8
14731 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9
14732 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
14733 ; GFX10-NEXT: v_readlane_b32 s31, v40, 9
14734 ; GFX10-NEXT: v_readlane_b32 s30, v40, 8
14735 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7
14736 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6
14737 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5
14738 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
14739 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
14740 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
14741 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
14742 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
14743 ; GFX10-NEXT: v_readlane_b32 s34, v40, 10
14744 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14745 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14746 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14747 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14748 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
14749 ; GFX10-NEXT: s_mov_b32 s33, s34
14750 ; GFX10-NEXT: s_waitcnt vmcnt(0)
14751 ; GFX10-NEXT: s_setpc_b64 s[30:31]
14753 ; GFX11-LABEL: test_call_external_void_func_v8i32_imm_inreg:
14755 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14756 ; GFX11-NEXT: s_mov_b32 s0, s33
14757 ; GFX11-NEXT: s_mov_b32 s33, s32
14758 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14759 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
14760 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14761 ; GFX11-NEXT: v_writelane_b32 v40, s0, 10
14762 ; GFX11-NEXT: s_add_i32 s32, s32, 16
14763 ; GFX11-NEXT: s_getpc_b64 s[0:1]
14764 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4
14765 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12
14766 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
14767 ; GFX11-NEXT: s_mov_b32 s4, 1
14768 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
14769 ; GFX11-NEXT: s_mov_b32 s5, 2
14770 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
14771 ; GFX11-NEXT: s_mov_b32 s6, 3
14772 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
14773 ; GFX11-NEXT: s_mov_b32 s7, 4
14774 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
14775 ; GFX11-NEXT: s_mov_b32 s8, 5
14776 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5
14777 ; GFX11-NEXT: s_mov_b32 s9, 6
14778 ; GFX11-NEXT: v_writelane_b32 v40, s10, 6
14779 ; GFX11-NEXT: s_mov_b32 s10, 7
14780 ; GFX11-NEXT: v_writelane_b32 v40, s11, 7
14781 ; GFX11-NEXT: s_mov_b32 s11, 8
14782 ; GFX11-NEXT: v_writelane_b32 v40, s30, 8
14783 ; GFX11-NEXT: v_writelane_b32 v40, s31, 9
14784 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
14785 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
14786 ; GFX11-NEXT: v_readlane_b32 s31, v40, 9
14787 ; GFX11-NEXT: v_readlane_b32 s30, v40, 8
14788 ; GFX11-NEXT: v_readlane_b32 s11, v40, 7
14789 ; GFX11-NEXT: v_readlane_b32 s10, v40, 6
14790 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5
14791 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
14792 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
14793 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
14794 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
14795 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
14796 ; GFX11-NEXT: v_readlane_b32 s0, v40, 10
14797 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14798 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
14799 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14800 ; GFX11-NEXT: s_add_i32 s32, s32, -16
14801 ; GFX11-NEXT: s_mov_b32 s33, s0
14802 ; GFX11-NEXT: s_waitcnt vmcnt(0)
14803 ; GFX11-NEXT: s_setpc_b64 s[30:31]
14805 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_imm_inreg:
14806 ; GFX10-SCRATCH: ; %bb.0:
14807 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14808 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
14809 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
14810 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
14811 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
14812 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
14813 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
14814 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10
14815 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
14816 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
14817 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4
14818 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12
14819 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
14820 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
14821 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
14822 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
14823 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
14824 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
14825 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
14826 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
14827 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
14828 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5
14829 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
14830 ; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 6
14831 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6
14832 ; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 7
14833 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7
14834 ; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 8
14835 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8
14836 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9
14837 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
14838 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9
14839 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8
14840 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7
14841 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6
14842 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
14843 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
14844 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
14845 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
14846 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
14847 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
14848 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 10
14849 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
14850 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
14851 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
14852 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
14853 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
14854 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
14855 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
14856 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
14857 call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
14861 define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 {
14862 ; GFX9-LABEL: test_call_external_void_func_v16i32_inreg:
14864 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14865 ; GFX9-NEXT: s_mov_b32 s34, s33
14866 ; GFX9-NEXT: s_mov_b32 s33, s32
14867 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
14868 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14869 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
14870 ; GFX9-NEXT: v_writelane_b32 v40, s34, 18
14871 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
14872 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
14873 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
14874 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
14875 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
14876 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5
14877 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6
14878 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7
14879 ; GFX9-NEXT: v_writelane_b32 v40, s12, 8
14880 ; GFX9-NEXT: v_writelane_b32 v40, s13, 9
14881 ; GFX9-NEXT: v_writelane_b32 v40, s14, 10
14882 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
14883 ; GFX9-NEXT: v_writelane_b32 v40, s15, 11
14884 ; GFX9-NEXT: v_writelane_b32 v40, s16, 12
14885 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13
14886 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14
14887 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15
14888 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
14889 ; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0
14890 ; GFX9-NEXT: s_addk_i32 s32, 0x400
14891 ; GFX9-NEXT: v_writelane_b32 v40, s30, 16
14892 ; GFX9-NEXT: v_writelane_b32 v40, s31, 17
14893 ; GFX9-NEXT: s_getpc_b64 s[34:35]
14894 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i32_inreg@rel32@lo+4
14895 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32_inreg@rel32@hi+12
14896 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
14897 ; GFX9-NEXT: v_readlane_b32 s31, v40, 17
14898 ; GFX9-NEXT: v_readlane_b32 s30, v40, 16
14899 ; GFX9-NEXT: v_readlane_b32 s19, v40, 15
14900 ; GFX9-NEXT: v_readlane_b32 s18, v40, 14
14901 ; GFX9-NEXT: v_readlane_b32 s17, v40, 13
14902 ; GFX9-NEXT: v_readlane_b32 s16, v40, 12
14903 ; GFX9-NEXT: v_readlane_b32 s15, v40, 11
14904 ; GFX9-NEXT: v_readlane_b32 s14, v40, 10
14905 ; GFX9-NEXT: v_readlane_b32 s13, v40, 9
14906 ; GFX9-NEXT: v_readlane_b32 s12, v40, 8
14907 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7
14908 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6
14909 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5
14910 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
14911 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
14912 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
14913 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
14914 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
14915 ; GFX9-NEXT: v_readlane_b32 s34, v40, 18
14916 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
14917 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14918 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
14919 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
14920 ; GFX9-NEXT: s_mov_b32 s33, s34
14921 ; GFX9-NEXT: s_waitcnt vmcnt(0)
14922 ; GFX9-NEXT: s_setpc_b64 s[30:31]
14924 ; GFX10-LABEL: test_call_external_void_func_v16i32_inreg:
14926 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14927 ; GFX10-NEXT: s_mov_b32 s34, s33
14928 ; GFX10-NEXT: s_mov_b32 s33, s32
14929 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14930 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
14931 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14932 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14933 ; GFX10-NEXT: v_writelane_b32 v40, s34, 18
14934 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
14935 ; GFX10-NEXT: s_addk_i32 s32, 0x200
14936 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
14937 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
14938 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
14939 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
14940 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
14941 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5
14942 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6
14943 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7
14944 ; GFX10-NEXT: v_writelane_b32 v40, s12, 8
14945 ; GFX10-NEXT: v_writelane_b32 v40, s13, 9
14946 ; GFX10-NEXT: v_writelane_b32 v40, s14, 10
14947 ; GFX10-NEXT: v_writelane_b32 v40, s15, 11
14948 ; GFX10-NEXT: v_writelane_b32 v40, s16, 12
14949 ; GFX10-NEXT: v_writelane_b32 v40, s17, 13
14950 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14
14951 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15
14952 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
14953 ; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0
14954 ; GFX10-NEXT: s_getpc_b64 s[34:35]
14955 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i32_inreg@rel32@lo+4
14956 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32_inreg@rel32@hi+12
14957 ; GFX10-NEXT: v_writelane_b32 v40, s30, 16
14958 ; GFX10-NEXT: v_writelane_b32 v40, s31, 17
14959 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
14960 ; GFX10-NEXT: v_readlane_b32 s31, v40, 17
14961 ; GFX10-NEXT: v_readlane_b32 s30, v40, 16
14962 ; GFX10-NEXT: v_readlane_b32 s19, v40, 15
14963 ; GFX10-NEXT: v_readlane_b32 s18, v40, 14
14964 ; GFX10-NEXT: v_readlane_b32 s17, v40, 13
14965 ; GFX10-NEXT: v_readlane_b32 s16, v40, 12
14966 ; GFX10-NEXT: v_readlane_b32 s15, v40, 11
14967 ; GFX10-NEXT: v_readlane_b32 s14, v40, 10
14968 ; GFX10-NEXT: v_readlane_b32 s13, v40, 9
14969 ; GFX10-NEXT: v_readlane_b32 s12, v40, 8
14970 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7
14971 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6
14972 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5
14973 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
14974 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
14975 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
14976 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
14977 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
14978 ; GFX10-NEXT: v_readlane_b32 s34, v40, 18
14979 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
14980 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
14981 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
14982 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
14983 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
14984 ; GFX10-NEXT: s_mov_b32 s33, s34
14985 ; GFX10-NEXT: s_waitcnt vmcnt(0)
14986 ; GFX10-NEXT: s_setpc_b64 s[30:31]
14988 ; GFX11-LABEL: test_call_external_void_func_v16i32_inreg:
14990 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14991 ; GFX11-NEXT: s_mov_b32 s0, s33
14992 ; GFX11-NEXT: s_mov_b32 s33, s32
14993 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
14994 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
14995 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
14996 ; GFX11-NEXT: v_writelane_b32 v40, s0, 18
14997 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
14998 ; GFX11-NEXT: s_add_i32 s32, s32, 16
14999 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
15000 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
15001 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
15002 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
15003 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
15004 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5
15005 ; GFX11-NEXT: v_writelane_b32 v40, s10, 6
15006 ; GFX11-NEXT: v_writelane_b32 v40, s11, 7
15007 ; GFX11-NEXT: v_writelane_b32 v40, s12, 8
15008 ; GFX11-NEXT: v_writelane_b32 v40, s13, 9
15009 ; GFX11-NEXT: v_writelane_b32 v40, s14, 10
15010 ; GFX11-NEXT: v_writelane_b32 v40, s15, 11
15011 ; GFX11-NEXT: v_writelane_b32 v40, s16, 12
15012 ; GFX11-NEXT: v_writelane_b32 v40, s17, 13
15013 ; GFX11-NEXT: v_writelane_b32 v40, s18, 14
15014 ; GFX11-NEXT: v_writelane_b32 v40, s19, 15
15015 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
15016 ; GFX11-NEXT: s_load_b512 s[4:19], s[0:1], 0x0
15017 ; GFX11-NEXT: s_getpc_b64 s[0:1]
15018 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_inreg@rel32@lo+4
15019 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_inreg@rel32@hi+12
15020 ; GFX11-NEXT: v_writelane_b32 v40, s30, 16
15021 ; GFX11-NEXT: v_writelane_b32 v40, s31, 17
15022 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
15023 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
15024 ; GFX11-NEXT: v_readlane_b32 s31, v40, 17
15025 ; GFX11-NEXT: v_readlane_b32 s30, v40, 16
15026 ; GFX11-NEXT: v_readlane_b32 s19, v40, 15
15027 ; GFX11-NEXT: v_readlane_b32 s18, v40, 14
15028 ; GFX11-NEXT: v_readlane_b32 s17, v40, 13
15029 ; GFX11-NEXT: v_readlane_b32 s16, v40, 12
15030 ; GFX11-NEXT: v_readlane_b32 s15, v40, 11
15031 ; GFX11-NEXT: v_readlane_b32 s14, v40, 10
15032 ; GFX11-NEXT: v_readlane_b32 s13, v40, 9
15033 ; GFX11-NEXT: v_readlane_b32 s12, v40, 8
15034 ; GFX11-NEXT: v_readlane_b32 s11, v40, 7
15035 ; GFX11-NEXT: v_readlane_b32 s10, v40, 6
15036 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5
15037 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
15038 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
15039 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
15040 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
15041 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
15042 ; GFX11-NEXT: v_readlane_b32 s0, v40, 18
15043 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
15044 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
15045 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
15046 ; GFX11-NEXT: s_add_i32 s32, s32, -16
15047 ; GFX11-NEXT: s_mov_b32 s33, s0
15048 ; GFX11-NEXT: s_waitcnt vmcnt(0)
15049 ; GFX11-NEXT: s_setpc_b64 s[30:31]
15051 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i32_inreg:
15052 ; GFX10-SCRATCH: ; %bb.0:
15053 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15054 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
15055 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
15056 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
15057 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
15058 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
15059 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
15060 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 18
15061 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
15062 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
15063 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
15064 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
15065 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
15066 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
15067 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
15068 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
15069 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6
15070 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7
15071 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s12, 8
15072 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s13, 9
15073 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s14, 10
15074 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s15, 11
15075 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s16, 12
15076 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13
15077 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14
15078 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15
15079 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
15080 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0
15081 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
15082 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_inreg@rel32@lo+4
15083 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_inreg@rel32@hi+12
15084 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16
15085 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17
15086 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
15087 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 17
15088 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 16
15089 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15
15090 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14
15091 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13
15092 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s16, v40, 12
15093 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s15, v40, 11
15094 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s14, v40, 10
15095 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s13, v40, 9
15096 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s12, v40, 8
15097 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7
15098 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6
15099 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
15100 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
15101 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
15102 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
15103 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
15104 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
15105 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 18
15106 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
15107 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
15108 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
15109 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
15110 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
15111 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
15112 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
15113 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
15114 %ptr = load ptr addrspace(4), ptr addrspace(4) undef
15115 %val = load <16 x i32>, ptr addrspace(4) %ptr
15116 call amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> inreg %val)
15120 define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 {
15121 ; GFX9-LABEL: test_call_external_void_func_v32i32_inreg:
15123 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15124 ; GFX9-NEXT: s_mov_b32 s34, s33
15125 ; GFX9-NEXT: s_mov_b32 s33, s32
15126 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
15127 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
15128 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
15129 ; GFX9-NEXT: v_writelane_b32 v40, s34, 28
15130 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
15131 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
15132 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
15133 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
15134 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
15135 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5
15136 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6
15137 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7
15138 ; GFX9-NEXT: v_writelane_b32 v40, s12, 8
15139 ; GFX9-NEXT: v_writelane_b32 v40, s13, 9
15140 ; GFX9-NEXT: v_writelane_b32 v40, s14, 10
15141 ; GFX9-NEXT: v_writelane_b32 v40, s15, 11
15142 ; GFX9-NEXT: v_writelane_b32 v40, s16, 12
15143 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13
15144 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14
15145 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15
15146 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
15147 ; GFX9-NEXT: v_writelane_b32 v40, s20, 16
15148 ; GFX9-NEXT: v_writelane_b32 v40, s21, 17
15149 ; GFX9-NEXT: v_writelane_b32 v40, s22, 18
15150 ; GFX9-NEXT: v_writelane_b32 v40, s23, 19
15151 ; GFX9-NEXT: v_writelane_b32 v40, s24, 20
15152 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
15153 ; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40
15154 ; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0
15155 ; GFX9-NEXT: v_writelane_b32 v40, s25, 21
15156 ; GFX9-NEXT: v_writelane_b32 v40, s26, 22
15157 ; GFX9-NEXT: v_writelane_b32 v40, s27, 23
15158 ; GFX9-NEXT: s_addk_i32 s32, 0x400
15159 ; GFX9-NEXT: v_writelane_b32 v40, s28, 24
15160 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
15161 ; GFX9-NEXT: v_mov_b32_e32 v0, s46
15162 ; GFX9-NEXT: v_writelane_b32 v40, s29, 25
15163 ; GFX9-NEXT: v_mov_b32_e32 v1, s47
15164 ; GFX9-NEXT: v_mov_b32_e32 v2, s48
15165 ; GFX9-NEXT: v_mov_b32_e32 v3, s49
15166 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
15167 ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
15168 ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
15169 ; GFX9-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12
15170 ; GFX9-NEXT: v_mov_b32_e32 v0, s50
15171 ; GFX9-NEXT: v_writelane_b32 v40, s30, 26
15172 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
15173 ; GFX9-NEXT: v_mov_b32_e32 v0, s51
15174 ; GFX9-NEXT: s_mov_b32 s20, s36
15175 ; GFX9-NEXT: s_mov_b32 s21, s37
15176 ; GFX9-NEXT: s_mov_b32 s22, s38
15177 ; GFX9-NEXT: s_mov_b32 s23, s39
15178 ; GFX9-NEXT: s_mov_b32 s24, s40
15179 ; GFX9-NEXT: s_mov_b32 s25, s41
15180 ; GFX9-NEXT: s_mov_b32 s26, s42
15181 ; GFX9-NEXT: s_mov_b32 s27, s43
15182 ; GFX9-NEXT: s_mov_b32 s28, s44
15183 ; GFX9-NEXT: s_mov_b32 s29, s45
15184 ; GFX9-NEXT: v_writelane_b32 v40, s31, 27
15185 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
15186 ; GFX9-NEXT: s_getpc_b64 s[34:35]
15187 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_inreg@rel32@lo+4
15188 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_inreg@rel32@hi+12
15189 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
15190 ; GFX9-NEXT: v_readlane_b32 s31, v40, 27
15191 ; GFX9-NEXT: v_readlane_b32 s30, v40, 26
15192 ; GFX9-NEXT: v_readlane_b32 s29, v40, 25
15193 ; GFX9-NEXT: v_readlane_b32 s28, v40, 24
15194 ; GFX9-NEXT: v_readlane_b32 s27, v40, 23
15195 ; GFX9-NEXT: v_readlane_b32 s26, v40, 22
15196 ; GFX9-NEXT: v_readlane_b32 s25, v40, 21
15197 ; GFX9-NEXT: v_readlane_b32 s24, v40, 20
15198 ; GFX9-NEXT: v_readlane_b32 s23, v40, 19
15199 ; GFX9-NEXT: v_readlane_b32 s22, v40, 18
15200 ; GFX9-NEXT: v_readlane_b32 s21, v40, 17
15201 ; GFX9-NEXT: v_readlane_b32 s20, v40, 16
15202 ; GFX9-NEXT: v_readlane_b32 s19, v40, 15
15203 ; GFX9-NEXT: v_readlane_b32 s18, v40, 14
15204 ; GFX9-NEXT: v_readlane_b32 s17, v40, 13
15205 ; GFX9-NEXT: v_readlane_b32 s16, v40, 12
15206 ; GFX9-NEXT: v_readlane_b32 s15, v40, 11
15207 ; GFX9-NEXT: v_readlane_b32 s14, v40, 10
15208 ; GFX9-NEXT: v_readlane_b32 s13, v40, 9
15209 ; GFX9-NEXT: v_readlane_b32 s12, v40, 8
15210 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7
15211 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6
15212 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5
15213 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
15214 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
15215 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
15216 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
15217 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
15218 ; GFX9-NEXT: v_readlane_b32 s34, v40, 28
15219 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
15220 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
15221 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
15222 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
15223 ; GFX9-NEXT: s_mov_b32 s33, s34
15224 ; GFX9-NEXT: s_waitcnt vmcnt(0)
15225 ; GFX9-NEXT: s_setpc_b64 s[30:31]
15227 ; GFX10-LABEL: test_call_external_void_func_v32i32_inreg:
15229 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15230 ; GFX10-NEXT: s_mov_b32 s34, s33
15231 ; GFX10-NEXT: s_mov_b32 s33, s32
15232 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
15233 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
15234 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
15235 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
15236 ; GFX10-NEXT: v_writelane_b32 v40, s34, 28
15237 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
15238 ; GFX10-NEXT: s_addk_i32 s32, 0x200
15239 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
15240 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
15241 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
15242 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
15243 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
15244 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5
15245 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6
15246 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7
15247 ; GFX10-NEXT: v_writelane_b32 v40, s12, 8
15248 ; GFX10-NEXT: v_writelane_b32 v40, s13, 9
15249 ; GFX10-NEXT: v_writelane_b32 v40, s14, 10
15250 ; GFX10-NEXT: v_writelane_b32 v40, s15, 11
15251 ; GFX10-NEXT: v_writelane_b32 v40, s16, 12
15252 ; GFX10-NEXT: v_writelane_b32 v40, s17, 13
15253 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14
15254 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15
15255 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
15256 ; GFX10-NEXT: s_clause 0x1
15257 ; GFX10-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40
15258 ; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0
15259 ; GFX10-NEXT: s_getpc_b64 s[34:35]
15260 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_inreg@rel32@lo+4
15261 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_inreg@rel32@hi+12
15262 ; GFX10-NEXT: v_writelane_b32 v40, s20, 16
15263 ; GFX10-NEXT: v_writelane_b32 v40, s21, 17
15264 ; GFX10-NEXT: v_writelane_b32 v40, s22, 18
15265 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
15266 ; GFX10-NEXT: v_mov_b32_e32 v0, s46
15267 ; GFX10-NEXT: v_writelane_b32 v40, s23, 19
15268 ; GFX10-NEXT: v_mov_b32_e32 v1, s47
15269 ; GFX10-NEXT: v_mov_b32_e32 v2, s48
15270 ; GFX10-NEXT: v_mov_b32_e32 v3, s49
15271 ; GFX10-NEXT: s_mov_b32 s20, s36
15272 ; GFX10-NEXT: v_writelane_b32 v40, s24, 20
15273 ; GFX10-NEXT: s_mov_b32 s21, s37
15274 ; GFX10-NEXT: s_mov_b32 s22, s38
15275 ; GFX10-NEXT: s_mov_b32 s23, s39
15276 ; GFX10-NEXT: s_mov_b32 s24, s40
15277 ; GFX10-NEXT: v_writelane_b32 v40, s25, 21
15278 ; GFX10-NEXT: s_mov_b32 s25, s41
15279 ; GFX10-NEXT: v_mov_b32_e32 v4, s50
15280 ; GFX10-NEXT: v_mov_b32_e32 v5, s51
15281 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32
15282 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
15283 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
15284 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12
15285 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16
15286 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20
15287 ; GFX10-NEXT: v_writelane_b32 v40, s26, 22
15288 ; GFX10-NEXT: s_mov_b32 s26, s42
15289 ; GFX10-NEXT: v_writelane_b32 v40, s27, 23
15290 ; GFX10-NEXT: s_mov_b32 s27, s43
15291 ; GFX10-NEXT: v_writelane_b32 v40, s28, 24
15292 ; GFX10-NEXT: s_mov_b32 s28, s44
15293 ; GFX10-NEXT: v_writelane_b32 v40, s29, 25
15294 ; GFX10-NEXT: s_mov_b32 s29, s45
15295 ; GFX10-NEXT: v_writelane_b32 v40, s30, 26
15296 ; GFX10-NEXT: v_writelane_b32 v40, s31, 27
15297 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
15298 ; GFX10-NEXT: v_readlane_b32 s31, v40, 27
15299 ; GFX10-NEXT: v_readlane_b32 s30, v40, 26
15300 ; GFX10-NEXT: v_readlane_b32 s29, v40, 25
15301 ; GFX10-NEXT: v_readlane_b32 s28, v40, 24
15302 ; GFX10-NEXT: v_readlane_b32 s27, v40, 23
15303 ; GFX10-NEXT: v_readlane_b32 s26, v40, 22
15304 ; GFX10-NEXT: v_readlane_b32 s25, v40, 21
15305 ; GFX10-NEXT: v_readlane_b32 s24, v40, 20
15306 ; GFX10-NEXT: v_readlane_b32 s23, v40, 19
15307 ; GFX10-NEXT: v_readlane_b32 s22, v40, 18
15308 ; GFX10-NEXT: v_readlane_b32 s21, v40, 17
15309 ; GFX10-NEXT: v_readlane_b32 s20, v40, 16
15310 ; GFX10-NEXT: v_readlane_b32 s19, v40, 15
15311 ; GFX10-NEXT: v_readlane_b32 s18, v40, 14
15312 ; GFX10-NEXT: v_readlane_b32 s17, v40, 13
15313 ; GFX10-NEXT: v_readlane_b32 s16, v40, 12
15314 ; GFX10-NEXT: v_readlane_b32 s15, v40, 11
15315 ; GFX10-NEXT: v_readlane_b32 s14, v40, 10
15316 ; GFX10-NEXT: v_readlane_b32 s13, v40, 9
15317 ; GFX10-NEXT: v_readlane_b32 s12, v40, 8
15318 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7
15319 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6
15320 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5
15321 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
15322 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
15323 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
15324 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
15325 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
15326 ; GFX10-NEXT: v_readlane_b32 s34, v40, 28
15327 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
15328 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
15329 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
15330 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
15331 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
15332 ; GFX10-NEXT: s_mov_b32 s33, s34
15333 ; GFX10-NEXT: s_waitcnt vmcnt(0)
15334 ; GFX10-NEXT: s_setpc_b64 s[30:31]
15336 ; GFX11-LABEL: test_call_external_void_func_v32i32_inreg:
15338 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15339 ; GFX11-NEXT: s_mov_b32 s0, s33
15340 ; GFX11-NEXT: s_mov_b32 s33, s32
15341 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
15342 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
15343 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
15344 ; GFX11-NEXT: v_writelane_b32 v40, s0, 28
15345 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
15346 ; GFX11-NEXT: s_add_i32 s32, s32, 16
15347 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
15348 ; GFX11-NEXT: s_add_i32 s2, s32, 16
15349 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
15350 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
15351 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
15352 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
15353 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
15354 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5
15355 ; GFX11-NEXT: v_writelane_b32 v40, s10, 6
15356 ; GFX11-NEXT: v_writelane_b32 v40, s11, 7
15357 ; GFX11-NEXT: v_writelane_b32 v40, s12, 8
15358 ; GFX11-NEXT: v_writelane_b32 v40, s13, 9
15359 ; GFX11-NEXT: v_writelane_b32 v40, s14, 10
15360 ; GFX11-NEXT: v_writelane_b32 v40, s15, 11
15361 ; GFX11-NEXT: v_writelane_b32 v40, s16, 12
15362 ; GFX11-NEXT: v_writelane_b32 v40, s17, 13
15363 ; GFX11-NEXT: v_writelane_b32 v40, s18, 14
15364 ; GFX11-NEXT: v_writelane_b32 v40, s19, 15
15365 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
15366 ; GFX11-NEXT: s_clause 0x1
15367 ; GFX11-NEXT: s_load_b512 s[36:51], s[0:1], 0x40
15368 ; GFX11-NEXT: s_load_b512 s[4:19], s[0:1], 0x0
15369 ; GFX11-NEXT: s_getpc_b64 s[0:1]
15370 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_inreg@rel32@lo+4
15371 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_inreg@rel32@hi+12
15372 ; GFX11-NEXT: v_writelane_b32 v40, s20, 16
15373 ; GFX11-NEXT: v_writelane_b32 v40, s21, 17
15374 ; GFX11-NEXT: v_writelane_b32 v40, s22, 18
15375 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
15376 ; GFX11-NEXT: v_dual_mov_b32 v4, s50 :: v_dual_mov_b32 v5, s51
15377 ; GFX11-NEXT: v_writelane_b32 v40, s23, 19
15378 ; GFX11-NEXT: v_dual_mov_b32 v0, s46 :: v_dual_mov_b32 v1, s47
15379 ; GFX11-NEXT: v_dual_mov_b32 v2, s48 :: v_dual_mov_b32 v3, s49
15380 ; GFX11-NEXT: v_writelane_b32 v40, s24, 20
15381 ; GFX11-NEXT: s_mov_b32 s20, s36
15382 ; GFX11-NEXT: s_mov_b32 s21, s37
15383 ; GFX11-NEXT: s_mov_b32 s22, s38
15384 ; GFX11-NEXT: s_mov_b32 s23, s39
15385 ; GFX11-NEXT: v_writelane_b32 v40, s25, 21
15386 ; GFX11-NEXT: s_mov_b32 s24, s40
15387 ; GFX11-NEXT: s_mov_b32 s25, s41
15388 ; GFX11-NEXT: scratch_store_b64 off, v[4:5], s2
15389 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
15390 ; GFX11-NEXT: v_writelane_b32 v40, s26, 22
15391 ; GFX11-NEXT: s_mov_b32 s26, s42
15392 ; GFX11-NEXT: v_writelane_b32 v40, s27, 23
15393 ; GFX11-NEXT: s_mov_b32 s27, s43
15394 ; GFX11-NEXT: v_writelane_b32 v40, s28, 24
15395 ; GFX11-NEXT: s_mov_b32 s28, s44
15396 ; GFX11-NEXT: v_writelane_b32 v40, s29, 25
15397 ; GFX11-NEXT: s_mov_b32 s29, s45
15398 ; GFX11-NEXT: v_writelane_b32 v40, s30, 26
15399 ; GFX11-NEXT: v_writelane_b32 v40, s31, 27
15400 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
15401 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
15402 ; GFX11-NEXT: v_readlane_b32 s31, v40, 27
15403 ; GFX11-NEXT: v_readlane_b32 s30, v40, 26
15404 ; GFX11-NEXT: v_readlane_b32 s29, v40, 25
15405 ; GFX11-NEXT: v_readlane_b32 s28, v40, 24
15406 ; GFX11-NEXT: v_readlane_b32 s27, v40, 23
15407 ; GFX11-NEXT: v_readlane_b32 s26, v40, 22
15408 ; GFX11-NEXT: v_readlane_b32 s25, v40, 21
15409 ; GFX11-NEXT: v_readlane_b32 s24, v40, 20
15410 ; GFX11-NEXT: v_readlane_b32 s23, v40, 19
15411 ; GFX11-NEXT: v_readlane_b32 s22, v40, 18
15412 ; GFX11-NEXT: v_readlane_b32 s21, v40, 17
15413 ; GFX11-NEXT: v_readlane_b32 s20, v40, 16
15414 ; GFX11-NEXT: v_readlane_b32 s19, v40, 15
15415 ; GFX11-NEXT: v_readlane_b32 s18, v40, 14
15416 ; GFX11-NEXT: v_readlane_b32 s17, v40, 13
15417 ; GFX11-NEXT: v_readlane_b32 s16, v40, 12
15418 ; GFX11-NEXT: v_readlane_b32 s15, v40, 11
15419 ; GFX11-NEXT: v_readlane_b32 s14, v40, 10
15420 ; GFX11-NEXT: v_readlane_b32 s13, v40, 9
15421 ; GFX11-NEXT: v_readlane_b32 s12, v40, 8
15422 ; GFX11-NEXT: v_readlane_b32 s11, v40, 7
15423 ; GFX11-NEXT: v_readlane_b32 s10, v40, 6
15424 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5
15425 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
15426 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
15427 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
15428 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
15429 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
15430 ; GFX11-NEXT: v_readlane_b32 s0, v40, 28
15431 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
15432 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
15433 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
15434 ; GFX11-NEXT: s_add_i32 s32, s32, -16
15435 ; GFX11-NEXT: s_mov_b32 s33, s0
15436 ; GFX11-NEXT: s_waitcnt vmcnt(0)
15437 ; GFX11-NEXT: s_setpc_b64 s[30:31]
15439 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_inreg:
15440 ; GFX10-SCRATCH: ; %bb.0:
15441 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15442 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
15443 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
15444 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
15445 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
15446 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
15447 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
15448 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 28
15449 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
15450 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
15451 ; GFX10-SCRATCH-NEXT: s_add_i32 s2, s32, 16
15452 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
15453 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
15454 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
15455 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
15456 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
15457 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
15458 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6
15459 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7
15460 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s12, 8
15461 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s13, 9
15462 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s14, 10
15463 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s15, 11
15464 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s16, 12
15465 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13
15466 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14
15467 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15
15468 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
15469 ; GFX10-SCRATCH-NEXT: s_clause 0x1
15470 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x40
15471 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0
15472 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
15473 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_inreg@rel32@lo+4
15474 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_inreg@rel32@hi+12
15475 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16
15476 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17
15477 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18
15478 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
15479 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, s50
15480 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19
15481 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, s51
15482 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s46
15483 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s47
15484 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, s48
15485 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20
15486 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, s49
15487 ; GFX10-SCRATCH-NEXT: s_mov_b32 s20, s36
15488 ; GFX10-SCRATCH-NEXT: s_mov_b32 s21, s37
15489 ; GFX10-SCRATCH-NEXT: s_mov_b32 s22, s38
15490 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21
15491 ; GFX10-SCRATCH-NEXT: s_mov_b32 s23, s39
15492 ; GFX10-SCRATCH-NEXT: s_mov_b32 s24, s40
15493 ; GFX10-SCRATCH-NEXT: s_mov_b32 s25, s41
15494 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s2
15495 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32
15496 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22
15497 ; GFX10-SCRATCH-NEXT: s_mov_b32 s26, s42
15498 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23
15499 ; GFX10-SCRATCH-NEXT: s_mov_b32 s27, s43
15500 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24
15501 ; GFX10-SCRATCH-NEXT: s_mov_b32 s28, s44
15502 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s29, 25
15503 ; GFX10-SCRATCH-NEXT: s_mov_b32 s29, s45
15504 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26
15505 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27
15506 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
15507 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27
15508 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26
15509 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25
15510 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24
15511 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23
15512 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s26, v40, 22
15513 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s25, v40, 21
15514 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s24, v40, 20
15515 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s23, v40, 19
15516 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s22, v40, 18
15517 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s21, v40, 17
15518 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s20, v40, 16
15519 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15
15520 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14
15521 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13
15522 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s16, v40, 12
15523 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s15, v40, 11
15524 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s14, v40, 10
15525 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s13, v40, 9
15526 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s12, v40, 8
15527 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7
15528 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6
15529 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
15530 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
15531 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
15532 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
15533 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
15534 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
15535 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 28
15536 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
15537 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
15538 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
15539 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
15540 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
15541 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
15542 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
15543 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
15544 %ptr = load ptr addrspace(4), ptr addrspace(4) undef
15545 %val = load <32 x i32>, ptr addrspace(4) %ptr
15546 call amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> inreg %val)
15550 define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 {
15551 ; GFX9-LABEL: test_call_external_void_func_v32i32_i32_inreg:
15553 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15554 ; GFX9-NEXT: s_mov_b32 s34, s33
15555 ; GFX9-NEXT: s_mov_b32 s33, s32
15556 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
15557 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
15558 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
15559 ; GFX9-NEXT: v_writelane_b32 v40, s34, 28
15560 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
15561 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
15562 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2
15563 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3
15564 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4
15565 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5
15566 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6
15567 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7
15568 ; GFX9-NEXT: v_writelane_b32 v40, s12, 8
15569 ; GFX9-NEXT: v_writelane_b32 v40, s13, 9
15570 ; GFX9-NEXT: v_writelane_b32 v40, s14, 10
15571 ; GFX9-NEXT: v_writelane_b32 v40, s15, 11
15572 ; GFX9-NEXT: v_writelane_b32 v40, s16, 12
15573 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13
15574 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14
15575 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
15576 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15
15577 ; GFX9-NEXT: v_writelane_b32 v40, s20, 16
15578 ; GFX9-NEXT: v_writelane_b32 v40, s21, 17
15579 ; GFX9-NEXT: v_writelane_b32 v40, s22, 18
15580 ; GFX9-NEXT: v_writelane_b32 v40, s23, 19
15581 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
15582 ; GFX9-NEXT: s_load_dword s52, s[34:35], 0x0
15583 ; GFX9-NEXT: ; kill: killed $sgpr34_sgpr35
15584 ; GFX9-NEXT: ; kill: killed $sgpr34_sgpr35
15585 ; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40
15586 ; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0
15587 ; GFX9-NEXT: v_writelane_b32 v40, s24, 20
15588 ; GFX9-NEXT: v_writelane_b32 v40, s25, 21
15589 ; GFX9-NEXT: s_addk_i32 s32, 0x400
15590 ; GFX9-NEXT: v_writelane_b32 v40, s26, 22
15591 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
15592 ; GFX9-NEXT: v_mov_b32_e32 v0, s52
15593 ; GFX9-NEXT: v_writelane_b32 v40, s27, 23
15594 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
15595 ; GFX9-NEXT: v_mov_b32_e32 v0, s46
15596 ; GFX9-NEXT: v_writelane_b32 v40, s28, 24
15597 ; GFX9-NEXT: v_mov_b32_e32 v1, s47
15598 ; GFX9-NEXT: v_mov_b32_e32 v2, s48
15599 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
15600 ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
15601 ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
15602 ; GFX9-NEXT: v_mov_b32_e32 v0, s49
15603 ; GFX9-NEXT: v_writelane_b32 v40, s29, 25
15604 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
15605 ; GFX9-NEXT: v_mov_b32_e32 v0, s50
15606 ; GFX9-NEXT: v_writelane_b32 v40, s30, 26
15607 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
15608 ; GFX9-NEXT: v_mov_b32_e32 v0, s51
15609 ; GFX9-NEXT: s_mov_b32 s20, s36
15610 ; GFX9-NEXT: s_mov_b32 s21, s37
15611 ; GFX9-NEXT: s_mov_b32 s22, s38
15612 ; GFX9-NEXT: s_mov_b32 s23, s39
15613 ; GFX9-NEXT: s_mov_b32 s24, s40
15614 ; GFX9-NEXT: s_mov_b32 s25, s41
15615 ; GFX9-NEXT: s_mov_b32 s26, s42
15616 ; GFX9-NEXT: s_mov_b32 s27, s43
15617 ; GFX9-NEXT: s_mov_b32 s28, s44
15618 ; GFX9-NEXT: s_mov_b32 s29, s45
15619 ; GFX9-NEXT: v_writelane_b32 v40, s31, 27
15620 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
15621 ; GFX9-NEXT: s_getpc_b64 s[34:35]
15622 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32_inreg@rel32@lo+4
15623 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32_inreg@rel32@hi+12
15624 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
15625 ; GFX9-NEXT: v_readlane_b32 s31, v40, 27
15626 ; GFX9-NEXT: v_readlane_b32 s30, v40, 26
15627 ; GFX9-NEXT: v_readlane_b32 s29, v40, 25
15628 ; GFX9-NEXT: v_readlane_b32 s28, v40, 24
15629 ; GFX9-NEXT: v_readlane_b32 s27, v40, 23
15630 ; GFX9-NEXT: v_readlane_b32 s26, v40, 22
15631 ; GFX9-NEXT: v_readlane_b32 s25, v40, 21
15632 ; GFX9-NEXT: v_readlane_b32 s24, v40, 20
15633 ; GFX9-NEXT: v_readlane_b32 s23, v40, 19
15634 ; GFX9-NEXT: v_readlane_b32 s22, v40, 18
15635 ; GFX9-NEXT: v_readlane_b32 s21, v40, 17
15636 ; GFX9-NEXT: v_readlane_b32 s20, v40, 16
15637 ; GFX9-NEXT: v_readlane_b32 s19, v40, 15
15638 ; GFX9-NEXT: v_readlane_b32 s18, v40, 14
15639 ; GFX9-NEXT: v_readlane_b32 s17, v40, 13
15640 ; GFX9-NEXT: v_readlane_b32 s16, v40, 12
15641 ; GFX9-NEXT: v_readlane_b32 s15, v40, 11
15642 ; GFX9-NEXT: v_readlane_b32 s14, v40, 10
15643 ; GFX9-NEXT: v_readlane_b32 s13, v40, 9
15644 ; GFX9-NEXT: v_readlane_b32 s12, v40, 8
15645 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7
15646 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6
15647 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5
15648 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4
15649 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3
15650 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2
15651 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
15652 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
15653 ; GFX9-NEXT: v_readlane_b32 s34, v40, 28
15654 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
15655 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
15656 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
15657 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
15658 ; GFX9-NEXT: s_mov_b32 s33, s34
15659 ; GFX9-NEXT: s_waitcnt vmcnt(0)
15660 ; GFX9-NEXT: s_setpc_b64 s[30:31]
15662 ; GFX10-LABEL: test_call_external_void_func_v32i32_i32_inreg:
15664 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15665 ; GFX10-NEXT: s_mov_b32 s34, s33
15666 ; GFX10-NEXT: s_mov_b32 s33, s32
15667 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
15668 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
15669 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
15670 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
15671 ; GFX10-NEXT: v_writelane_b32 v40, s34, 28
15672 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
15673 ; GFX10-NEXT: s_addk_i32 s32, 0x200
15674 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
15675 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
15676 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2
15677 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3
15678 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4
15679 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5
15680 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6
15681 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7
15682 ; GFX10-NEXT: v_writelane_b32 v40, s12, 8
15683 ; GFX10-NEXT: v_writelane_b32 v40, s13, 9
15684 ; GFX10-NEXT: v_writelane_b32 v40, s14, 10
15685 ; GFX10-NEXT: v_writelane_b32 v40, s15, 11
15686 ; GFX10-NEXT: v_writelane_b32 v40, s16, 12
15687 ; GFX10-NEXT: v_writelane_b32 v40, s17, 13
15688 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14
15689 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15
15690 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
15691 ; GFX10-NEXT: s_clause 0x2
15692 ; GFX10-NEXT: s_load_dword s52, s[34:35], 0x0
15693 ; GFX10-NEXT: ; meta instruction
15694 ; GFX10-NEXT: ; meta instruction
15695 ; GFX10-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40
15696 ; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0
15697 ; GFX10-NEXT: s_getpc_b64 s[34:35]
15698 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32_inreg@rel32@lo+4
15699 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32_inreg@rel32@hi+12
15700 ; GFX10-NEXT: v_writelane_b32 v40, s20, 16
15701 ; GFX10-NEXT: v_writelane_b32 v40, s21, 17
15702 ; GFX10-NEXT: v_writelane_b32 v40, s22, 18
15703 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
15704 ; GFX10-NEXT: v_mov_b32_e32 v0, s52
15705 ; GFX10-NEXT: v_mov_b32_e32 v1, s47
15706 ; GFX10-NEXT: v_writelane_b32 v40, s23, 19
15707 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
15708 ; GFX10-NEXT: v_mov_b32_e32 v0, s46
15709 ; GFX10-NEXT: v_mov_b32_e32 v2, s48
15710 ; GFX10-NEXT: v_mov_b32_e32 v3, s49
15711 ; GFX10-NEXT: v_writelane_b32 v40, s24, 20
15712 ; GFX10-NEXT: s_mov_b32 s20, s36
15713 ; GFX10-NEXT: s_mov_b32 s21, s37
15714 ; GFX10-NEXT: s_mov_b32 s22, s38
15715 ; GFX10-NEXT: s_mov_b32 s23, s39
15716 ; GFX10-NEXT: v_writelane_b32 v40, s25, 21
15717 ; GFX10-NEXT: s_mov_b32 s24, s40
15718 ; GFX10-NEXT: s_mov_b32 s25, s41
15719 ; GFX10-NEXT: v_mov_b32_e32 v4, s50
15720 ; GFX10-NEXT: v_mov_b32_e32 v5, s51
15721 ; GFX10-NEXT: v_writelane_b32 v40, s26, 22
15722 ; GFX10-NEXT: s_mov_b32 s26, s42
15723 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32
15724 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
15725 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
15726 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12
15727 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16
15728 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20
15729 ; GFX10-NEXT: v_writelane_b32 v40, s27, 23
15730 ; GFX10-NEXT: s_mov_b32 s27, s43
15731 ; GFX10-NEXT: v_writelane_b32 v40, s28, 24
15732 ; GFX10-NEXT: s_mov_b32 s28, s44
15733 ; GFX10-NEXT: v_writelane_b32 v40, s29, 25
15734 ; GFX10-NEXT: s_mov_b32 s29, s45
15735 ; GFX10-NEXT: v_writelane_b32 v40, s30, 26
15736 ; GFX10-NEXT: v_writelane_b32 v40, s31, 27
15737 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
15738 ; GFX10-NEXT: v_readlane_b32 s31, v40, 27
15739 ; GFX10-NEXT: v_readlane_b32 s30, v40, 26
15740 ; GFX10-NEXT: v_readlane_b32 s29, v40, 25
15741 ; GFX10-NEXT: v_readlane_b32 s28, v40, 24
15742 ; GFX10-NEXT: v_readlane_b32 s27, v40, 23
15743 ; GFX10-NEXT: v_readlane_b32 s26, v40, 22
15744 ; GFX10-NEXT: v_readlane_b32 s25, v40, 21
15745 ; GFX10-NEXT: v_readlane_b32 s24, v40, 20
15746 ; GFX10-NEXT: v_readlane_b32 s23, v40, 19
15747 ; GFX10-NEXT: v_readlane_b32 s22, v40, 18
15748 ; GFX10-NEXT: v_readlane_b32 s21, v40, 17
15749 ; GFX10-NEXT: v_readlane_b32 s20, v40, 16
15750 ; GFX10-NEXT: v_readlane_b32 s19, v40, 15
15751 ; GFX10-NEXT: v_readlane_b32 s18, v40, 14
15752 ; GFX10-NEXT: v_readlane_b32 s17, v40, 13
15753 ; GFX10-NEXT: v_readlane_b32 s16, v40, 12
15754 ; GFX10-NEXT: v_readlane_b32 s15, v40, 11
15755 ; GFX10-NEXT: v_readlane_b32 s14, v40, 10
15756 ; GFX10-NEXT: v_readlane_b32 s13, v40, 9
15757 ; GFX10-NEXT: v_readlane_b32 s12, v40, 8
15758 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7
15759 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6
15760 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5
15761 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4
15762 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3
15763 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2
15764 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
15765 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
15766 ; GFX10-NEXT: v_readlane_b32 s34, v40, 28
15767 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
15768 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
15769 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
15770 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
15771 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
15772 ; GFX10-NEXT: s_mov_b32 s33, s34
15773 ; GFX10-NEXT: s_waitcnt vmcnt(0)
15774 ; GFX10-NEXT: s_setpc_b64 s[30:31]
15776 ; GFX11-LABEL: test_call_external_void_func_v32i32_i32_inreg:
15778 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15779 ; GFX11-NEXT: s_mov_b32 s0, s33
15780 ; GFX11-NEXT: s_mov_b32 s33, s32
15781 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
15782 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
15783 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
15784 ; GFX11-NEXT: v_writelane_b32 v40, s0, 28
15785 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
15786 ; GFX11-NEXT: s_add_i32 s32, s32, 16
15787 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
15788 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
15789 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2
15790 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3
15791 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4
15792 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5
15793 ; GFX11-NEXT: v_writelane_b32 v40, s10, 6
15794 ; GFX11-NEXT: v_writelane_b32 v40, s11, 7
15795 ; GFX11-NEXT: v_writelane_b32 v40, s12, 8
15796 ; GFX11-NEXT: v_writelane_b32 v40, s13, 9
15797 ; GFX11-NEXT: v_writelane_b32 v40, s14, 10
15798 ; GFX11-NEXT: v_writelane_b32 v40, s15, 11
15799 ; GFX11-NEXT: v_writelane_b32 v40, s16, 12
15800 ; GFX11-NEXT: v_writelane_b32 v40, s17, 13
15801 ; GFX11-NEXT: v_writelane_b32 v40, s18, 14
15802 ; GFX11-NEXT: v_writelane_b32 v40, s19, 15
15803 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
15804 ; GFX11-NEXT: s_clause 0x2
15805 ; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x0
15806 ; GFX11-NEXT: s_load_b512 s[36:51], s[0:1], 0x40
15807 ; GFX11-NEXT: s_load_b512 s[4:19], s[0:1], 0x0
15808 ; GFX11-NEXT: s_getpc_b64 s[0:1]
15809 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_i32_inreg@rel32@lo+4
15810 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_i32_inreg@rel32@hi+12
15811 ; GFX11-NEXT: s_add_i32 s3, s32, 16
15812 ; GFX11-NEXT: v_writelane_b32 v40, s20, 16
15813 ; GFX11-NEXT: v_writelane_b32 v40, s21, 17
15814 ; GFX11-NEXT: v_writelane_b32 v40, s22, 18
15815 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
15816 ; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v5, s51
15817 ; GFX11-NEXT: v_writelane_b32 v40, s23, 19
15818 ; GFX11-NEXT: v_dual_mov_b32 v4, s50 :: v_dual_mov_b32 v1, s47
15819 ; GFX11-NEXT: v_dual_mov_b32 v0, s46 :: v_dual_mov_b32 v3, s49
15820 ; GFX11-NEXT: v_writelane_b32 v40, s24, 20
15821 ; GFX11-NEXT: v_mov_b32_e32 v2, s48
15822 ; GFX11-NEXT: s_add_i32 s2, s32, 24
15823 ; GFX11-NEXT: s_mov_b32 s20, s36
15824 ; GFX11-NEXT: s_mov_b32 s21, s37
15825 ; GFX11-NEXT: v_writelane_b32 v40, s25, 21
15826 ; GFX11-NEXT: s_mov_b32 s22, s38
15827 ; GFX11-NEXT: s_mov_b32 s23, s39
15828 ; GFX11-NEXT: s_mov_b32 s24, s40
15829 ; GFX11-NEXT: s_mov_b32 s25, s41
15830 ; GFX11-NEXT: v_writelane_b32 v40, s26, 22
15831 ; GFX11-NEXT: s_mov_b32 s26, s42
15832 ; GFX11-NEXT: scratch_store_b32 off, v6, s2
15833 ; GFX11-NEXT: scratch_store_b64 off, v[4:5], s3
15834 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
15835 ; GFX11-NEXT: v_writelane_b32 v40, s27, 23
15836 ; GFX11-NEXT: s_mov_b32 s27, s43
15837 ; GFX11-NEXT: v_writelane_b32 v40, s28, 24
15838 ; GFX11-NEXT: s_mov_b32 s28, s44
15839 ; GFX11-NEXT: v_writelane_b32 v40, s29, 25
15840 ; GFX11-NEXT: s_mov_b32 s29, s45
15841 ; GFX11-NEXT: v_writelane_b32 v40, s30, 26
15842 ; GFX11-NEXT: v_writelane_b32 v40, s31, 27
15843 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
15844 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
15845 ; GFX11-NEXT: v_readlane_b32 s31, v40, 27
15846 ; GFX11-NEXT: v_readlane_b32 s30, v40, 26
15847 ; GFX11-NEXT: v_readlane_b32 s29, v40, 25
15848 ; GFX11-NEXT: v_readlane_b32 s28, v40, 24
15849 ; GFX11-NEXT: v_readlane_b32 s27, v40, 23
15850 ; GFX11-NEXT: v_readlane_b32 s26, v40, 22
15851 ; GFX11-NEXT: v_readlane_b32 s25, v40, 21
15852 ; GFX11-NEXT: v_readlane_b32 s24, v40, 20
15853 ; GFX11-NEXT: v_readlane_b32 s23, v40, 19
15854 ; GFX11-NEXT: v_readlane_b32 s22, v40, 18
15855 ; GFX11-NEXT: v_readlane_b32 s21, v40, 17
15856 ; GFX11-NEXT: v_readlane_b32 s20, v40, 16
15857 ; GFX11-NEXT: v_readlane_b32 s19, v40, 15
15858 ; GFX11-NEXT: v_readlane_b32 s18, v40, 14
15859 ; GFX11-NEXT: v_readlane_b32 s17, v40, 13
15860 ; GFX11-NEXT: v_readlane_b32 s16, v40, 12
15861 ; GFX11-NEXT: v_readlane_b32 s15, v40, 11
15862 ; GFX11-NEXT: v_readlane_b32 s14, v40, 10
15863 ; GFX11-NEXT: v_readlane_b32 s13, v40, 9
15864 ; GFX11-NEXT: v_readlane_b32 s12, v40, 8
15865 ; GFX11-NEXT: v_readlane_b32 s11, v40, 7
15866 ; GFX11-NEXT: v_readlane_b32 s10, v40, 6
15867 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5
15868 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4
15869 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3
15870 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2
15871 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
15872 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
15873 ; GFX11-NEXT: v_readlane_b32 s0, v40, 28
15874 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
15875 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
15876 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
15877 ; GFX11-NEXT: s_add_i32 s32, s32, -16
15878 ; GFX11-NEXT: s_mov_b32 s33, s0
15879 ; GFX11-NEXT: s_waitcnt vmcnt(0)
15880 ; GFX11-NEXT: s_setpc_b64 s[30:31]
15882 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_i32_inreg:
15883 ; GFX10-SCRATCH: ; %bb.0:
15884 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15885 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
15886 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
15887 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
15888 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
15889 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
15890 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
15891 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 28
15892 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
15893 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
15894 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
15895 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
15896 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
15897 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
15898 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
15899 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
15900 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6
15901 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7
15902 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s12, 8
15903 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s13, 9
15904 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s14, 10
15905 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s15, 11
15906 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s16, 12
15907 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13
15908 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14
15909 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15
15910 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
15911 ; GFX10-SCRATCH-NEXT: s_clause 0x2
15912 ; GFX10-SCRATCH-NEXT: s_load_dword s2, s[0:1], 0x0
15913 ; GFX10-SCRATCH-NEXT: ; meta instruction
15914 ; GFX10-SCRATCH-NEXT: ; meta instruction
15915 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x40
15916 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0
15917 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
15918 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_i32_inreg@rel32@lo+4
15919 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_i32_inreg@rel32@hi+12
15920 ; GFX10-SCRATCH-NEXT: s_add_i32 s3, s32, 16
15921 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16
15922 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17
15923 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18
15924 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
15925 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, s2
15926 ; GFX10-SCRATCH-NEXT: s_add_i32 s2, s32, 24
15927 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, s50
15928 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19
15929 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, s51
15930 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s46
15931 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s47
15932 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, s48
15933 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20
15934 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, s49
15935 ; GFX10-SCRATCH-NEXT: s_mov_b32 s20, s36
15936 ; GFX10-SCRATCH-NEXT: s_mov_b32 s21, s37
15937 ; GFX10-SCRATCH-NEXT: s_mov_b32 s22, s38
15938 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21
15939 ; GFX10-SCRATCH-NEXT: s_mov_b32 s23, s39
15940 ; GFX10-SCRATCH-NEXT: s_mov_b32 s24, s40
15941 ; GFX10-SCRATCH-NEXT: s_mov_b32 s25, s41
15942 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v6, s2
15943 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s3
15944 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32
15945 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22
15946 ; GFX10-SCRATCH-NEXT: s_mov_b32 s26, s42
15947 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23
15948 ; GFX10-SCRATCH-NEXT: s_mov_b32 s27, s43
15949 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24
15950 ; GFX10-SCRATCH-NEXT: s_mov_b32 s28, s44
15951 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s29, 25
15952 ; GFX10-SCRATCH-NEXT: s_mov_b32 s29, s45
15953 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26
15954 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27
15955 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
15956 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27
15957 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26
15958 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25
15959 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24
15960 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23
15961 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s26, v40, 22
15962 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s25, v40, 21
15963 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s24, v40, 20
15964 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s23, v40, 19
15965 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s22, v40, 18
15966 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s21, v40, 17
15967 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s20, v40, 16
15968 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15
15969 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14
15970 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13
15971 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s16, v40, 12
15972 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s15, v40, 11
15973 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s14, v40, 10
15974 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s13, v40, 9
15975 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s12, v40, 8
15976 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7
15977 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6
15978 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
15979 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
15980 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
15981 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
15982 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
15983 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
15984 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 28
15985 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
15986 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
15987 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
15988 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
15989 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
15990 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
15991 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
15992 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
15993 %ptr0 = load ptr addrspace(4), ptr addrspace(4) undef
15994 %val0 = load <32 x i32>, ptr addrspace(4) %ptr0
15995 %val1 = load i32, ptr addrspace(4) undef
15996 call amdgpu_gfx void @external_void_func_v32i32_i32_inreg(<32 x i32> inreg %val0, i32 inreg %val1)
16000 define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
16001 ; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64:
16002 ; GFX9: ; %bb.0: ; %entry
16003 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16004 ; GFX9-NEXT: s_mov_b32 s34, s33
16005 ; GFX9-NEXT: s_mov_b32 s33, s32
16006 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
16007 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
16008 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
16009 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33
16010 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4
16011 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
16012 ; GFX9-NEXT: s_addk_i32 s32, 0x400
16013 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
16014 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
16015 ; GFX9-NEXT: s_getpc_b64 s[34:35]
16016 ; GFX9-NEXT: s_add_u32 s34, s34, stack_passed_f64_arg@rel32@lo+4
16017 ; GFX9-NEXT: s_addc_u32 s35, s35, stack_passed_f64_arg@rel32@hi+12
16018 ; GFX9-NEXT: s_waitcnt vmcnt(1)
16019 ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32
16020 ; GFX9-NEXT: s_waitcnt vmcnt(1)
16021 ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4
16022 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
16023 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
16024 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
16025 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
16026 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
16027 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
16028 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
16029 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
16030 ; GFX9-NEXT: s_mov_b32 s33, s34
16031 ; GFX9-NEXT: s_waitcnt vmcnt(0)
16032 ; GFX9-NEXT: s_setpc_b64 s[30:31]
16034 ; GFX10-LABEL: stack_passed_arg_alignment_v32i32_f64:
16035 ; GFX10: ; %bb.0: ; %entry
16036 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16037 ; GFX10-NEXT: s_mov_b32 s34, s33
16038 ; GFX10-NEXT: s_mov_b32 s33, s32
16039 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
16040 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
16041 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
16042 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
16043 ; GFX10-NEXT: s_clause 0x1
16044 ; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33
16045 ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4
16046 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
16047 ; GFX10-NEXT: s_addk_i32 s32, 0x200
16048 ; GFX10-NEXT: s_getpc_b64 s[34:35]
16049 ; GFX10-NEXT: s_add_u32 s34, s34, stack_passed_f64_arg@rel32@lo+4
16050 ; GFX10-NEXT: s_addc_u32 s35, s35, stack_passed_f64_arg@rel32@hi+12
16051 ; GFX10-NEXT: s_waitcnt vmcnt(1)
16052 ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32
16053 ; GFX10-NEXT: s_waitcnt vmcnt(0)
16054 ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4
16055 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
16056 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
16057 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
16058 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
16059 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
16060 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
16061 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
16062 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
16063 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
16064 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
16065 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
16066 ; GFX10-NEXT: s_mov_b32 s33, s34
16067 ; GFX10-NEXT: s_waitcnt vmcnt(0)
16068 ; GFX10-NEXT: s_setpc_b64 s[30:31]
16070 ; GFX11-LABEL: stack_passed_arg_alignment_v32i32_f64:
16071 ; GFX11: ; %bb.0: ; %entry
16072 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16073 ; GFX11-NEXT: s_mov_b32 s0, s33
16074 ; GFX11-NEXT: s_mov_b32 s33, s32
16075 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
16076 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill
16077 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
16078 ; GFX11-NEXT: scratch_load_b64 v[32:33], off, s33
16079 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
16080 ; GFX11-NEXT: s_add_i32 s32, s32, 16
16081 ; GFX11-NEXT: s_getpc_b64 s[0:1]
16082 ; GFX11-NEXT: s_add_u32 s0, s0, stack_passed_f64_arg@rel32@lo+4
16083 ; GFX11-NEXT: s_addc_u32 s1, s1, stack_passed_f64_arg@rel32@hi+12
16084 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
16085 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
16086 ; GFX11-NEXT: s_waitcnt vmcnt(0)
16087 ; GFX11-NEXT: scratch_store_b64 off, v[32:33], s32
16088 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
16089 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
16090 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
16091 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
16092 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
16093 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload
16094 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
16095 ; GFX11-NEXT: s_add_i32 s32, s32, -16
16096 ; GFX11-NEXT: s_mov_b32 s33, s0
16097 ; GFX11-NEXT: s_waitcnt vmcnt(0)
16098 ; GFX11-NEXT: s_setpc_b64 s[30:31]
16100 ; GFX10-SCRATCH-LABEL: stack_passed_arg_alignment_v32i32_f64:
16101 ; GFX10-SCRATCH: ; %bb.0: ; %entry
16102 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16103 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
16104 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
16105 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
16106 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill
16107 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
16108 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
16109 ; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33
16110 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
16111 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
16112 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
16113 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, stack_passed_f64_arg@rel32@lo+4
16114 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, stack_passed_f64_arg@rel32@hi+12
16115 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
16116 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
16117 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
16118 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32
16119 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
16120 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
16121 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
16122 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
16123 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
16124 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload
16125 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
16126 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
16127 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
16128 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
16129 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
16130 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
16132 call amdgpu_gfx void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
16136 define amdgpu_gfx void @stack_12xv3i32() #0 {
16137 ; GFX9-LABEL: stack_12xv3i32:
16138 ; GFX9: ; %bb.0: ; %entry
16139 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16140 ; GFX9-NEXT: s_mov_b32 s34, s33
16141 ; GFX9-NEXT: s_mov_b32 s33, s32
16142 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
16143 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
16144 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
16145 ; GFX9-NEXT: s_addk_i32 s32, 0x400
16146 ; GFX9-NEXT: v_mov_b32_e32 v0, 12
16147 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
16148 ; GFX9-NEXT: v_mov_b32_e32 v0, 13
16149 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
16150 ; GFX9-NEXT: v_mov_b32_e32 v0, 14
16151 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
16152 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
16153 ; GFX9-NEXT: v_mov_b32_e32 v0, 15
16154 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
16155 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
16156 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
16157 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
16158 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
16159 ; GFX9-NEXT: v_mov_b32_e32 v3, 1
16160 ; GFX9-NEXT: v_mov_b32_e32 v4, 1
16161 ; GFX9-NEXT: v_mov_b32_e32 v5, 1
16162 ; GFX9-NEXT: v_mov_b32_e32 v6, 2
16163 ; GFX9-NEXT: v_mov_b32_e32 v7, 2
16164 ; GFX9-NEXT: v_mov_b32_e32 v8, 2
16165 ; GFX9-NEXT: v_mov_b32_e32 v9, 3
16166 ; GFX9-NEXT: v_mov_b32_e32 v10, 3
16167 ; GFX9-NEXT: v_mov_b32_e32 v11, 3
16168 ; GFX9-NEXT: v_mov_b32_e32 v12, 4
16169 ; GFX9-NEXT: v_mov_b32_e32 v13, 4
16170 ; GFX9-NEXT: v_mov_b32_e32 v14, 4
16171 ; GFX9-NEXT: v_mov_b32_e32 v15, 5
16172 ; GFX9-NEXT: v_mov_b32_e32 v16, 5
16173 ; GFX9-NEXT: v_mov_b32_e32 v17, 5
16174 ; GFX9-NEXT: v_mov_b32_e32 v18, 6
16175 ; GFX9-NEXT: v_mov_b32_e32 v19, 6
16176 ; GFX9-NEXT: v_mov_b32_e32 v20, 6
16177 ; GFX9-NEXT: v_mov_b32_e32 v21, 7
16178 ; GFX9-NEXT: v_mov_b32_e32 v22, 7
16179 ; GFX9-NEXT: v_mov_b32_e32 v23, 7
16180 ; GFX9-NEXT: v_mov_b32_e32 v24, 8
16181 ; GFX9-NEXT: v_mov_b32_e32 v25, 8
16182 ; GFX9-NEXT: v_mov_b32_e32 v26, 8
16183 ; GFX9-NEXT: v_mov_b32_e32 v27, 9
16184 ; GFX9-NEXT: v_mov_b32_e32 v28, 9
16185 ; GFX9-NEXT: v_mov_b32_e32 v29, 9
16186 ; GFX9-NEXT: v_mov_b32_e32 v30, 10
16187 ; GFX9-NEXT: v_mov_b32_e32 v31, 11
16188 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
16189 ; GFX9-NEXT: s_getpc_b64 s[34:35]
16190 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_12xv3i32@rel32@lo+4
16191 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_12xv3i32@rel32@hi+12
16192 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
16193 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
16194 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
16195 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
16196 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
16197 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
16198 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
16199 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
16200 ; GFX9-NEXT: s_mov_b32 s33, s34
16201 ; GFX9-NEXT: s_waitcnt vmcnt(0)
16202 ; GFX9-NEXT: s_setpc_b64 s[30:31]
16204 ; GFX10-LABEL: stack_12xv3i32:
16205 ; GFX10: ; %bb.0: ; %entry
16206 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16207 ; GFX10-NEXT: s_mov_b32 s34, s33
16208 ; GFX10-NEXT: s_mov_b32 s33, s32
16209 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
16210 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
16211 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
16212 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
16213 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
16214 ; GFX10-NEXT: v_mov_b32_e32 v0, 12
16215 ; GFX10-NEXT: v_mov_b32_e32 v1, 13
16216 ; GFX10-NEXT: v_mov_b32_e32 v2, 14
16217 ; GFX10-NEXT: s_addk_i32 s32, 0x200
16218 ; GFX10-NEXT: v_mov_b32_e32 v3, 15
16219 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
16220 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32
16221 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
16222 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
16223 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12
16224 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
16225 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
16226 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
16227 ; GFX10-NEXT: v_mov_b32_e32 v3, 1
16228 ; GFX10-NEXT: v_mov_b32_e32 v4, 1
16229 ; GFX10-NEXT: v_mov_b32_e32 v5, 1
16230 ; GFX10-NEXT: v_mov_b32_e32 v6, 2
16231 ; GFX10-NEXT: v_mov_b32_e32 v7, 2
16232 ; GFX10-NEXT: v_mov_b32_e32 v8, 2
16233 ; GFX10-NEXT: v_mov_b32_e32 v9, 3
16234 ; GFX10-NEXT: v_mov_b32_e32 v10, 3
16235 ; GFX10-NEXT: v_mov_b32_e32 v11, 3
16236 ; GFX10-NEXT: v_mov_b32_e32 v12, 4
16237 ; GFX10-NEXT: v_mov_b32_e32 v13, 4
16238 ; GFX10-NEXT: v_mov_b32_e32 v14, 4
16239 ; GFX10-NEXT: v_mov_b32_e32 v15, 5
16240 ; GFX10-NEXT: v_mov_b32_e32 v16, 5
16241 ; GFX10-NEXT: v_mov_b32_e32 v17, 5
16242 ; GFX10-NEXT: v_mov_b32_e32 v18, 6
16243 ; GFX10-NEXT: v_mov_b32_e32 v19, 6
16244 ; GFX10-NEXT: v_mov_b32_e32 v20, 6
16245 ; GFX10-NEXT: v_mov_b32_e32 v21, 7
16246 ; GFX10-NEXT: v_mov_b32_e32 v22, 7
16247 ; GFX10-NEXT: v_mov_b32_e32 v23, 7
16248 ; GFX10-NEXT: v_mov_b32_e32 v24, 8
16249 ; GFX10-NEXT: v_mov_b32_e32 v25, 8
16250 ; GFX10-NEXT: v_mov_b32_e32 v26, 8
16251 ; GFX10-NEXT: v_mov_b32_e32 v27, 9
16252 ; GFX10-NEXT: v_mov_b32_e32 v28, 9
16253 ; GFX10-NEXT: v_mov_b32_e32 v29, 9
16254 ; GFX10-NEXT: v_mov_b32_e32 v30, 10
16255 ; GFX10-NEXT: v_mov_b32_e32 v31, 11
16256 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
16257 ; GFX10-NEXT: s_getpc_b64 s[34:35]
16258 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_12xv3i32@rel32@lo+4
16259 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_12xv3i32@rel32@hi+12
16260 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
16261 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
16262 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
16263 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
16264 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
16265 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
16266 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
16267 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
16268 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
16269 ; GFX10-NEXT: s_mov_b32 s33, s34
16270 ; GFX10-NEXT: s_waitcnt vmcnt(0)
16271 ; GFX10-NEXT: s_setpc_b64 s[30:31]
16273 ; GFX11-LABEL: stack_12xv3i32:
16274 ; GFX11: ; %bb.0: ; %entry
16275 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16276 ; GFX11-NEXT: s_mov_b32 s0, s33
16277 ; GFX11-NEXT: s_mov_b32 s33, s32
16278 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
16279 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
16280 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
16281 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
16282 ; GFX11-NEXT: v_dual_mov_b32 v0, 12 :: v_dual_mov_b32 v1, 13
16283 ; GFX11-NEXT: v_dual_mov_b32 v2, 14 :: v_dual_mov_b32 v3, 15
16284 ; GFX11-NEXT: s_add_i32 s32, s32, 16
16285 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
16286 ; GFX11-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 1
16287 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
16288 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
16289 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 1
16290 ; GFX11-NEXT: v_dual_mov_b32 v6, 2 :: v_dual_mov_b32 v7, 2
16291 ; GFX11-NEXT: v_dual_mov_b32 v8, 2 :: v_dual_mov_b32 v9, 3
16292 ; GFX11-NEXT: v_dual_mov_b32 v10, 3 :: v_dual_mov_b32 v11, 3
16293 ; GFX11-NEXT: v_dual_mov_b32 v12, 4 :: v_dual_mov_b32 v13, 4
16294 ; GFX11-NEXT: v_dual_mov_b32 v14, 4 :: v_dual_mov_b32 v15, 5
16295 ; GFX11-NEXT: v_dual_mov_b32 v16, 5 :: v_dual_mov_b32 v17, 5
16296 ; GFX11-NEXT: v_dual_mov_b32 v18, 6 :: v_dual_mov_b32 v19, 6
16297 ; GFX11-NEXT: v_dual_mov_b32 v20, 6 :: v_dual_mov_b32 v21, 7
16298 ; GFX11-NEXT: v_dual_mov_b32 v22, 7 :: v_dual_mov_b32 v23, 7
16299 ; GFX11-NEXT: v_dual_mov_b32 v24, 8 :: v_dual_mov_b32 v25, 8
16300 ; GFX11-NEXT: v_dual_mov_b32 v26, 8 :: v_dual_mov_b32 v27, 9
16301 ; GFX11-NEXT: v_dual_mov_b32 v28, 9 :: v_dual_mov_b32 v29, 9
16302 ; GFX11-NEXT: v_dual_mov_b32 v30, 10 :: v_dual_mov_b32 v31, 11
16303 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
16304 ; GFX11-NEXT: s_getpc_b64 s[0:1]
16305 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4
16306 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12
16307 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
16308 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
16309 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
16310 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
16311 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
16312 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
16313 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
16314 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
16315 ; GFX11-NEXT: s_add_i32 s32, s32, -16
16316 ; GFX11-NEXT: s_mov_b32 s33, s0
16317 ; GFX11-NEXT: s_waitcnt vmcnt(0)
16318 ; GFX11-NEXT: s_setpc_b64 s[30:31]
16320 ; GFX10-SCRATCH-LABEL: stack_12xv3i32:
16321 ; GFX10-SCRATCH: ; %bb.0: ; %entry
16322 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16323 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
16324 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
16325 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
16326 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
16327 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
16328 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
16329 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
16330 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 12
16331 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 13
16332 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 14
16333 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 15
16334 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
16335 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
16336 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1
16337 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 1
16338 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32
16339 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
16340 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
16341 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0
16342 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 1
16343 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 2
16344 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 2
16345 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 2
16346 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v9, 3
16347 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v10, 3
16348 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v11, 3
16349 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, 4
16350 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v13, 4
16351 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v14, 4
16352 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v15, 5
16353 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 5
16354 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v17, 5
16355 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v18, 6
16356 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v19, 6
16357 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v20, 6
16358 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v21, 7
16359 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v22, 7
16360 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v23, 7
16361 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v24, 8
16362 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v25, 8
16363 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v26, 8
16364 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v27, 9
16365 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v28, 9
16366 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v29, 9
16367 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v30, 10
16368 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 11
16369 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
16370 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
16371 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4
16372 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12
16373 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
16374 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
16375 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
16376 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
16377 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
16378 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
16379 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
16380 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
16381 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
16382 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
16383 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
16384 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
16386 call amdgpu_gfx void @external_void_func_12xv3i32(
16387 <3 x i32><i32 0, i32 0, i32 0>,
16388 <3 x i32><i32 1, i32 1, i32 1>,
16389 <3 x i32><i32 2, i32 2, i32 2>,
16390 <3 x i32><i32 3, i32 3, i32 3>,
16391 <3 x i32><i32 4, i32 4, i32 4>,
16392 <3 x i32><i32 5, i32 5, i32 5>,
16393 <3 x i32><i32 6, i32 6, i32 6>,
16394 <3 x i32><i32 7, i32 7, i32 7>,
16395 <3 x i32><i32 8, i32 8, i32 8>,
16396 <3 x i32><i32 9, i32 9, i32 9>,
16397 <3 x i32><i32 10, i32 11, i32 12>,
16398 <3 x i32><i32 13, i32 14, i32 15>)
16402 define amdgpu_gfx void @stack_8xv5i32() #0 {
16403 ; GFX9-LABEL: stack_8xv5i32:
16404 ; GFX9: ; %bb.0: ; %entry
16405 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16406 ; GFX9-NEXT: s_mov_b32 s34, s33
16407 ; GFX9-NEXT: s_mov_b32 s33, s32
16408 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
16409 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
16410 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
16411 ; GFX9-NEXT: s_addk_i32 s32, 0x400
16412 ; GFX9-NEXT: v_mov_b32_e32 v0, 8
16413 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
16414 ; GFX9-NEXT: v_mov_b32_e32 v0, 9
16415 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
16416 ; GFX9-NEXT: v_mov_b32_e32 v0, 10
16417 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
16418 ; GFX9-NEXT: v_mov_b32_e32 v0, 11
16419 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
16420 ; GFX9-NEXT: v_mov_b32_e32 v0, 12
16421 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
16422 ; GFX9-NEXT: v_mov_b32_e32 v0, 13
16423 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
16424 ; GFX9-NEXT: v_mov_b32_e32 v0, 14
16425 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
16426 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
16427 ; GFX9-NEXT: v_mov_b32_e32 v0, 15
16428 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
16429 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
16430 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
16431 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
16432 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
16433 ; GFX9-NEXT: v_mov_b32_e32 v3, 0
16434 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
16435 ; GFX9-NEXT: v_mov_b32_e32 v5, 1
16436 ; GFX9-NEXT: v_mov_b32_e32 v6, 1
16437 ; GFX9-NEXT: v_mov_b32_e32 v7, 1
16438 ; GFX9-NEXT: v_mov_b32_e32 v8, 1
16439 ; GFX9-NEXT: v_mov_b32_e32 v9, 1
16440 ; GFX9-NEXT: v_mov_b32_e32 v10, 2
16441 ; GFX9-NEXT: v_mov_b32_e32 v11, 2
16442 ; GFX9-NEXT: v_mov_b32_e32 v12, 2
16443 ; GFX9-NEXT: v_mov_b32_e32 v13, 2
16444 ; GFX9-NEXT: v_mov_b32_e32 v14, 2
16445 ; GFX9-NEXT: v_mov_b32_e32 v15, 3
16446 ; GFX9-NEXT: v_mov_b32_e32 v16, 3
16447 ; GFX9-NEXT: v_mov_b32_e32 v17, 3
16448 ; GFX9-NEXT: v_mov_b32_e32 v18, 3
16449 ; GFX9-NEXT: v_mov_b32_e32 v19, 3
16450 ; GFX9-NEXT: v_mov_b32_e32 v20, 4
16451 ; GFX9-NEXT: v_mov_b32_e32 v21, 4
16452 ; GFX9-NEXT: v_mov_b32_e32 v22, 4
16453 ; GFX9-NEXT: v_mov_b32_e32 v23, 4
16454 ; GFX9-NEXT: v_mov_b32_e32 v24, 4
16455 ; GFX9-NEXT: v_mov_b32_e32 v25, 5
16456 ; GFX9-NEXT: v_mov_b32_e32 v26, 5
16457 ; GFX9-NEXT: v_mov_b32_e32 v27, 5
16458 ; GFX9-NEXT: v_mov_b32_e32 v28, 5
16459 ; GFX9-NEXT: v_mov_b32_e32 v29, 5
16460 ; GFX9-NEXT: v_mov_b32_e32 v30, 6
16461 ; GFX9-NEXT: v_mov_b32_e32 v31, 7
16462 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
16463 ; GFX9-NEXT: s_getpc_b64 s[34:35]
16464 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_8xv5i32@rel32@lo+4
16465 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5i32@rel32@hi+12
16466 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
16467 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
16468 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
16469 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
16470 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
16471 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
16472 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
16473 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
16474 ; GFX9-NEXT: s_mov_b32 s33, s34
16475 ; GFX9-NEXT: s_waitcnt vmcnt(0)
16476 ; GFX9-NEXT: s_setpc_b64 s[30:31]
16478 ; GFX10-LABEL: stack_8xv5i32:
16479 ; GFX10: ; %bb.0: ; %entry
16480 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16481 ; GFX10-NEXT: s_mov_b32 s34, s33
16482 ; GFX10-NEXT: s_mov_b32 s33, s32
16483 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
16484 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
16485 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
16486 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
16487 ; GFX10-NEXT: v_mov_b32_e32 v0, 8
16488 ; GFX10-NEXT: v_mov_b32_e32 v1, 9
16489 ; GFX10-NEXT: v_mov_b32_e32 v2, 10
16490 ; GFX10-NEXT: s_addk_i32 s32, 0x200
16491 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
16492 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32
16493 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
16494 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
16495 ; GFX10-NEXT: v_mov_b32_e32 v0, 11
16496 ; GFX10-NEXT: v_mov_b32_e32 v1, 12
16497 ; GFX10-NEXT: v_mov_b32_e32 v2, 13
16498 ; GFX10-NEXT: v_mov_b32_e32 v3, 14
16499 ; GFX10-NEXT: v_mov_b32_e32 v4, 15
16500 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
16501 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
16502 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:16
16503 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20
16504 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24
16505 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28
16506 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
16507 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
16508 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
16509 ; GFX10-NEXT: v_mov_b32_e32 v3, 0
16510 ; GFX10-NEXT: v_mov_b32_e32 v4, 0
16511 ; GFX10-NEXT: v_mov_b32_e32 v5, 1
16512 ; GFX10-NEXT: v_mov_b32_e32 v6, 1
16513 ; GFX10-NEXT: v_mov_b32_e32 v7, 1
16514 ; GFX10-NEXT: v_mov_b32_e32 v8, 1
16515 ; GFX10-NEXT: v_mov_b32_e32 v9, 1
16516 ; GFX10-NEXT: v_mov_b32_e32 v10, 2
16517 ; GFX10-NEXT: v_mov_b32_e32 v11, 2
16518 ; GFX10-NEXT: v_mov_b32_e32 v12, 2
16519 ; GFX10-NEXT: v_mov_b32_e32 v13, 2
16520 ; GFX10-NEXT: v_mov_b32_e32 v14, 2
16521 ; GFX10-NEXT: v_mov_b32_e32 v15, 3
16522 ; GFX10-NEXT: v_mov_b32_e32 v16, 3
16523 ; GFX10-NEXT: v_mov_b32_e32 v17, 3
16524 ; GFX10-NEXT: v_mov_b32_e32 v18, 3
16525 ; GFX10-NEXT: v_mov_b32_e32 v19, 3
16526 ; GFX10-NEXT: v_mov_b32_e32 v20, 4
16527 ; GFX10-NEXT: v_mov_b32_e32 v21, 4
16528 ; GFX10-NEXT: v_mov_b32_e32 v22, 4
16529 ; GFX10-NEXT: v_mov_b32_e32 v23, 4
16530 ; GFX10-NEXT: v_mov_b32_e32 v24, 4
16531 ; GFX10-NEXT: v_mov_b32_e32 v25, 5
16532 ; GFX10-NEXT: v_mov_b32_e32 v26, 5
16533 ; GFX10-NEXT: v_mov_b32_e32 v27, 5
16534 ; GFX10-NEXT: v_mov_b32_e32 v28, 5
16535 ; GFX10-NEXT: v_mov_b32_e32 v29, 5
16536 ; GFX10-NEXT: v_mov_b32_e32 v30, 6
16537 ; GFX10-NEXT: v_mov_b32_e32 v31, 7
16538 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
16539 ; GFX10-NEXT: s_getpc_b64 s[34:35]
16540 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_8xv5i32@rel32@lo+4
16541 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5i32@rel32@hi+12
16542 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
16543 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
16544 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
16545 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
16546 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
16547 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
16548 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
16549 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
16550 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
16551 ; GFX10-NEXT: s_mov_b32 s33, s34
16552 ; GFX10-NEXT: s_waitcnt vmcnt(0)
16553 ; GFX10-NEXT: s_setpc_b64 s[30:31]
16555 ; GFX11-LABEL: stack_8xv5i32:
16556 ; GFX11: ; %bb.0: ; %entry
16557 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16558 ; GFX11-NEXT: s_mov_b32 s0, s33
16559 ; GFX11-NEXT: s_mov_b32 s33, s32
16560 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
16561 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
16562 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
16563 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
16564 ; GFX11-NEXT: v_dual_mov_b32 v0, 8 :: v_dual_mov_b32 v1, 9
16565 ; GFX11-NEXT: v_dual_mov_b32 v2, 10 :: v_dual_mov_b32 v3, 11
16566 ; GFX11-NEXT: v_dual_mov_b32 v4, 12 :: v_dual_mov_b32 v5, 13
16567 ; GFX11-NEXT: v_dual_mov_b32 v6, 14 :: v_dual_mov_b32 v7, 15
16568 ; GFX11-NEXT: s_add_i32 s32, s32, 16
16569 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
16570 ; GFX11-NEXT: s_add_i32 s0, s32, 16
16571 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
16572 ; GFX11-NEXT: scratch_store_b128 off, v[4:7], s0
16573 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
16574 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0
16575 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 1
16576 ; GFX11-NEXT: v_dual_mov_b32 v6, 1 :: v_dual_mov_b32 v7, 1
16577 ; GFX11-NEXT: v_dual_mov_b32 v8, 1 :: v_dual_mov_b32 v9, 1
16578 ; GFX11-NEXT: v_dual_mov_b32 v10, 2 :: v_dual_mov_b32 v11, 2
16579 ; GFX11-NEXT: v_dual_mov_b32 v12, 2 :: v_dual_mov_b32 v13, 2
16580 ; GFX11-NEXT: v_dual_mov_b32 v14, 2 :: v_dual_mov_b32 v15, 3
16581 ; GFX11-NEXT: v_dual_mov_b32 v16, 3 :: v_dual_mov_b32 v17, 3
16582 ; GFX11-NEXT: v_dual_mov_b32 v18, 3 :: v_dual_mov_b32 v19, 3
16583 ; GFX11-NEXT: v_dual_mov_b32 v20, 4 :: v_dual_mov_b32 v21, 4
16584 ; GFX11-NEXT: v_dual_mov_b32 v22, 4 :: v_dual_mov_b32 v23, 4
16585 ; GFX11-NEXT: v_dual_mov_b32 v24, 4 :: v_dual_mov_b32 v25, 5
16586 ; GFX11-NEXT: v_dual_mov_b32 v26, 5 :: v_dual_mov_b32 v27, 5
16587 ; GFX11-NEXT: v_dual_mov_b32 v28, 5 :: v_dual_mov_b32 v29, 5
16588 ; GFX11-NEXT: v_dual_mov_b32 v30, 6 :: v_dual_mov_b32 v31, 7
16589 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
16590 ; GFX11-NEXT: s_getpc_b64 s[0:1]
16591 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4
16592 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12
16593 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
16594 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
16595 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
16596 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
16597 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
16598 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
16599 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
16600 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
16601 ; GFX11-NEXT: s_add_i32 s32, s32, -16
16602 ; GFX11-NEXT: s_mov_b32 s33, s0
16603 ; GFX11-NEXT: s_waitcnt vmcnt(0)
16604 ; GFX11-NEXT: s_setpc_b64 s[30:31]
16606 ; GFX10-SCRATCH-LABEL: stack_8xv5i32:
16607 ; GFX10-SCRATCH: ; %bb.0: ; %entry
16608 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16609 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
16610 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
16611 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
16612 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
16613 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
16614 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
16615 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
16616 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 8
16617 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 9
16618 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 10
16619 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 11
16620 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 12
16621 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 13
16622 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 14
16623 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 15
16624 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
16625 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
16626 ; GFX10-SCRATCH-NEXT: s_add_i32 s0, s32, 16
16627 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32
16628 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[4:7], s0
16629 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
16630 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
16631 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0
16632 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0
16633 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0
16634 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 1
16635 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 1
16636 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 1
16637 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 1
16638 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v9, 1
16639 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v10, 2
16640 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v11, 2
16641 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, 2
16642 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v13, 2
16643 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v14, 2
16644 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v15, 3
16645 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 3
16646 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v17, 3
16647 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v18, 3
16648 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v19, 3
16649 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v20, 4
16650 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v21, 4
16651 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v22, 4
16652 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v23, 4
16653 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v24, 4
16654 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v25, 5
16655 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v26, 5
16656 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v27, 5
16657 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v28, 5
16658 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v29, 5
16659 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v30, 6
16660 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 7
16661 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
16662 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
16663 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4
16664 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12
16665 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
16666 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
16667 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
16668 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
16669 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
16670 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
16671 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
16672 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
16673 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
16674 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
16675 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
16676 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
16678 call amdgpu_gfx void @external_void_func_8xv5i32(
16679 <5 x i32><i32 0, i32 0, i32 0, i32 0, i32 0>,
16680 <5 x i32><i32 1, i32 1, i32 1, i32 1, i32 1>,
16681 <5 x i32><i32 2, i32 2, i32 2, i32 2, i32 2>,
16682 <5 x i32><i32 3, i32 3, i32 3, i32 3, i32 3>,
16683 <5 x i32><i32 4, i32 4, i32 4, i32 4, i32 4>,
16684 <5 x i32><i32 5, i32 5, i32 5, i32 5, i32 5>,
16685 <5 x i32><i32 6, i32 7, i32 8, i32 9, i32 10>,
16686 <5 x i32><i32 11, i32 12, i32 13, i32 14, i32 15>)
16690 define amdgpu_gfx void @stack_8xv5f32() #0 {
16691 ; GFX9-LABEL: stack_8xv5f32:
16692 ; GFX9: ; %bb.0: ; %entry
16693 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16694 ; GFX9-NEXT: s_mov_b32 s34, s33
16695 ; GFX9-NEXT: s_mov_b32 s33, s32
16696 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
16697 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
16698 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
16699 ; GFX9-NEXT: s_addk_i32 s32, 0x400
16700 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41000000
16701 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
16702 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41100000
16703 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
16704 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41200000
16705 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
16706 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41300000
16707 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
16708 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41400000
16709 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
16710 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41500000
16711 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
16712 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000
16713 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
16714 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
16715 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000
16716 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
16717 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
16718 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
16719 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
16720 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
16721 ; GFX9-NEXT: v_mov_b32_e32 v3, 0
16722 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
16723 ; GFX9-NEXT: v_mov_b32_e32 v5, 1.0
16724 ; GFX9-NEXT: v_mov_b32_e32 v6, 1.0
16725 ; GFX9-NEXT: v_mov_b32_e32 v7, 1.0
16726 ; GFX9-NEXT: v_mov_b32_e32 v8, 1.0
16727 ; GFX9-NEXT: v_mov_b32_e32 v9, 1.0
16728 ; GFX9-NEXT: v_mov_b32_e32 v10, 2.0
16729 ; GFX9-NEXT: v_mov_b32_e32 v11, 2.0
16730 ; GFX9-NEXT: v_mov_b32_e32 v12, 2.0
16731 ; GFX9-NEXT: v_mov_b32_e32 v13, 2.0
16732 ; GFX9-NEXT: v_mov_b32_e32 v14, 2.0
16733 ; GFX9-NEXT: v_mov_b32_e32 v15, 0x40400000
16734 ; GFX9-NEXT: v_mov_b32_e32 v16, 0x40400000
16735 ; GFX9-NEXT: v_mov_b32_e32 v17, 0x40400000
16736 ; GFX9-NEXT: v_mov_b32_e32 v18, 0x40400000
16737 ; GFX9-NEXT: v_mov_b32_e32 v19, 0x40400000
16738 ; GFX9-NEXT: v_mov_b32_e32 v20, 4.0
16739 ; GFX9-NEXT: v_mov_b32_e32 v21, 4.0
16740 ; GFX9-NEXT: v_mov_b32_e32 v22, 4.0
16741 ; GFX9-NEXT: v_mov_b32_e32 v23, 4.0
16742 ; GFX9-NEXT: v_mov_b32_e32 v24, 4.0
16743 ; GFX9-NEXT: v_mov_b32_e32 v25, 0x40a00000
16744 ; GFX9-NEXT: v_mov_b32_e32 v26, 0x40a00000
16745 ; GFX9-NEXT: v_mov_b32_e32 v27, 0x40a00000
16746 ; GFX9-NEXT: v_mov_b32_e32 v28, 0x40a00000
16747 ; GFX9-NEXT: v_mov_b32_e32 v29, 0x40a00000
16748 ; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000
16749 ; GFX9-NEXT: v_mov_b32_e32 v31, 0x40e00000
16750 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
16751 ; GFX9-NEXT: s_getpc_b64 s[34:35]
16752 ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_8xv5f32@rel32@lo+4
16753 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5f32@rel32@hi+12
16754 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
16755 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
16756 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
16757 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
16758 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
16759 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
16760 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
16761 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
16762 ; GFX9-NEXT: s_mov_b32 s33, s34
16763 ; GFX9-NEXT: s_waitcnt vmcnt(0)
16764 ; GFX9-NEXT: s_setpc_b64 s[30:31]
16766 ; GFX10-LABEL: stack_8xv5f32:
16767 ; GFX10: ; %bb.0: ; %entry
16768 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16769 ; GFX10-NEXT: s_mov_b32 s34, s33
16770 ; GFX10-NEXT: s_mov_b32 s33, s32
16771 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
16772 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
16773 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
16774 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
16775 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x41000000
16776 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x41100000
16777 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x41200000
16778 ; GFX10-NEXT: s_addk_i32 s32, 0x200
16779 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
16780 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32
16781 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
16782 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
16783 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x41300000
16784 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x41400000
16785 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x41500000
16786 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x41600000
16787 ; GFX10-NEXT: v_mov_b32_e32 v4, 0x41700000
16788 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
16789 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
16790 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:16
16791 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20
16792 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24
16793 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28
16794 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
16795 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
16796 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
16797 ; GFX10-NEXT: v_mov_b32_e32 v3, 0
16798 ; GFX10-NEXT: v_mov_b32_e32 v4, 0
16799 ; GFX10-NEXT: v_mov_b32_e32 v5, 1.0
16800 ; GFX10-NEXT: v_mov_b32_e32 v6, 1.0
16801 ; GFX10-NEXT: v_mov_b32_e32 v7, 1.0
16802 ; GFX10-NEXT: v_mov_b32_e32 v8, 1.0
16803 ; GFX10-NEXT: v_mov_b32_e32 v9, 1.0
16804 ; GFX10-NEXT: v_mov_b32_e32 v10, 2.0
16805 ; GFX10-NEXT: v_mov_b32_e32 v11, 2.0
16806 ; GFX10-NEXT: v_mov_b32_e32 v12, 2.0
16807 ; GFX10-NEXT: v_mov_b32_e32 v13, 2.0
16808 ; GFX10-NEXT: v_mov_b32_e32 v14, 2.0
16809 ; GFX10-NEXT: v_mov_b32_e32 v15, 0x40400000
16810 ; GFX10-NEXT: v_mov_b32_e32 v16, 0x40400000
16811 ; GFX10-NEXT: v_mov_b32_e32 v17, 0x40400000
16812 ; GFX10-NEXT: v_mov_b32_e32 v18, 0x40400000
16813 ; GFX10-NEXT: v_mov_b32_e32 v19, 0x40400000
16814 ; GFX10-NEXT: v_mov_b32_e32 v20, 4.0
16815 ; GFX10-NEXT: v_mov_b32_e32 v21, 4.0
16816 ; GFX10-NEXT: v_mov_b32_e32 v22, 4.0
16817 ; GFX10-NEXT: v_mov_b32_e32 v23, 4.0
16818 ; GFX10-NEXT: v_mov_b32_e32 v24, 4.0
16819 ; GFX10-NEXT: v_mov_b32_e32 v25, 0x40a00000
16820 ; GFX10-NEXT: v_mov_b32_e32 v26, 0x40a00000
16821 ; GFX10-NEXT: v_mov_b32_e32 v27, 0x40a00000
16822 ; GFX10-NEXT: v_mov_b32_e32 v28, 0x40a00000
16823 ; GFX10-NEXT: v_mov_b32_e32 v29, 0x40a00000
16824 ; GFX10-NEXT: v_mov_b32_e32 v30, 0x40c00000
16825 ; GFX10-NEXT: v_mov_b32_e32 v31, 0x40e00000
16826 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
16827 ; GFX10-NEXT: s_getpc_b64 s[34:35]
16828 ; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_8xv5f32@rel32@lo+4
16829 ; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5f32@rel32@hi+12
16830 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
16831 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
16832 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
16833 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
16834 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
16835 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
16836 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
16837 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
16838 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
16839 ; GFX10-NEXT: s_mov_b32 s33, s34
16840 ; GFX10-NEXT: s_waitcnt vmcnt(0)
16841 ; GFX10-NEXT: s_setpc_b64 s[30:31]
16843 ; GFX11-LABEL: stack_8xv5f32:
16844 ; GFX11: ; %bb.0: ; %entry
16845 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16846 ; GFX11-NEXT: s_mov_b32 s0, s33
16847 ; GFX11-NEXT: s_mov_b32 s33, s32
16848 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
16849 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
16850 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
16851 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
16852 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x41000000
16853 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x41100000
16854 ; GFX11-NEXT: v_mov_b32_e32 v2, 0x41200000
16855 ; GFX11-NEXT: v_mov_b32_e32 v3, 0x41300000
16856 ; GFX11-NEXT: v_mov_b32_e32 v4, 0x41400000
16857 ; GFX11-NEXT: v_mov_b32_e32 v5, 0x41500000
16858 ; GFX11-NEXT: v_mov_b32_e32 v6, 0x41600000
16859 ; GFX11-NEXT: v_mov_b32_e32 v7, 0x41700000
16860 ; GFX11-NEXT: s_add_i32 s32, s32, 16
16861 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
16862 ; GFX11-NEXT: s_add_i32 s0, s32, 16
16863 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
16864 ; GFX11-NEXT: scratch_store_b128 off, v[4:7], s0
16865 ; GFX11-NEXT: v_mov_b32_e32 v6, 1.0
16866 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
16867 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0
16868 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 1.0
16869 ; GFX11-NEXT: v_dual_mov_b32 v7, 1.0 :: v_dual_mov_b32 v8, 1.0
16870 ; GFX11-NEXT: v_dual_mov_b32 v9, 1.0 :: v_dual_mov_b32 v10, 2.0
16871 ; GFX11-NEXT: v_dual_mov_b32 v11, 2.0 :: v_dual_mov_b32 v12, 2.0
16872 ; GFX11-NEXT: v_dual_mov_b32 v13, 2.0 :: v_dual_mov_b32 v14, 2.0
16873 ; GFX11-NEXT: v_dual_mov_b32 v15, 0x40400000 :: v_dual_mov_b32 v16, 0x40400000
16874 ; GFX11-NEXT: v_dual_mov_b32 v17, 0x40400000 :: v_dual_mov_b32 v18, 0x40400000
16875 ; GFX11-NEXT: v_dual_mov_b32 v19, 0x40400000 :: v_dual_mov_b32 v20, 4.0
16876 ; GFX11-NEXT: v_dual_mov_b32 v21, 4.0 :: v_dual_mov_b32 v22, 4.0
16877 ; GFX11-NEXT: v_dual_mov_b32 v23, 4.0 :: v_dual_mov_b32 v24, 4.0
16878 ; GFX11-NEXT: v_dual_mov_b32 v25, 0x40a00000 :: v_dual_mov_b32 v26, 0x40a00000
16879 ; GFX11-NEXT: v_dual_mov_b32 v27, 0x40a00000 :: v_dual_mov_b32 v28, 0x40a00000
16880 ; GFX11-NEXT: v_mov_b32_e32 v29, 0x40a00000
16881 ; GFX11-NEXT: v_mov_b32_e32 v30, 0x40c00000
16882 ; GFX11-NEXT: v_mov_b32_e32 v31, 0x40e00000
16883 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
16884 ; GFX11-NEXT: s_getpc_b64 s[0:1]
16885 ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4
16886 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12
16887 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
16888 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
16889 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
16890 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
16891 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
16892 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
16893 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
16894 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
16895 ; GFX11-NEXT: s_add_i32 s32, s32, -16
16896 ; GFX11-NEXT: s_mov_b32 s33, s0
16897 ; GFX11-NEXT: s_waitcnt vmcnt(0)
16898 ; GFX11-NEXT: s_setpc_b64 s[30:31]
16900 ; GFX10-SCRATCH-LABEL: stack_8xv5f32:
16901 ; GFX10-SCRATCH: ; %bb.0: ; %entry
16902 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16903 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33
16904 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
16905 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
16906 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
16907 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
16908 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
16909 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2
16910 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x41000000
16911 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x41100000
16912 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0x41200000
16913 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x41300000
16914 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0x41400000
16915 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0x41500000
16916 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 0x41600000
16917 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 0x41700000
16918 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
16919 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
16920 ; GFX10-SCRATCH-NEXT: s_add_i32 s0, s32, 16
16921 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32
16922 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[4:7], s0
16923 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
16924 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
16925 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0
16926 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0
16927 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0
16928 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 1.0
16929 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 1.0
16930 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 1.0
16931 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 1.0
16932 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v9, 1.0
16933 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v10, 2.0
16934 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v11, 2.0
16935 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, 2.0
16936 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v13, 2.0
16937 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v14, 2.0
16938 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v15, 0x40400000
16939 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 0x40400000
16940 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v17, 0x40400000
16941 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v18, 0x40400000
16942 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v19, 0x40400000
16943 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v20, 4.0
16944 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v21, 4.0
16945 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v22, 4.0
16946 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v23, 4.0
16947 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v24, 4.0
16948 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v25, 0x40a00000
16949 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v26, 0x40a00000
16950 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v27, 0x40a00000
16951 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v28, 0x40a00000
16952 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v29, 0x40a00000
16953 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v30, 0x40c00000
16954 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 0x40e00000
16955 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
16956 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
16957 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4
16958 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12
16959 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
16960 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
16961 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
16962 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2
16963 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1
16964 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
16965 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
16966 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1
16967 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
16968 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0
16969 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
16970 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
16972 call amdgpu_gfx void @external_void_func_8xv5f32(
16973 <5 x float><float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>,
16974 <5 x float><float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>,
16975 <5 x float><float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>,
16976 <5 x float><float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>,
16977 <5 x float><float 4.0, float 4.0, float 4.0, float 4.0, float 4.0>,
16978 <5 x float><float 5.0, float 5.0, float 5.0, float 5.0, float 5.0>,
16979 <5 x float><float 6.0, float 7.0, float 8.0, float 9.0, float 10.0>,
16980 <5 x float><float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>)
16984 declare hidden amdgpu_gfx void @byval_align16_f64_arg(<32 x i32>, ptr addrspace(5) byval(double) align 16) #0
16985 declare hidden amdgpu_gfx void @stack_passed_f64_arg(<32 x i32>, double) #0
16986 declare hidden amdgpu_gfx void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>,
16987 <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0
16988 declare hidden amdgpu_gfx void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>,
16989 <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) #0
16990 declare hidden amdgpu_gfx void @external_void_func_12xv3f32(<3 x float>, <3 x float>, <3 x float>, <3 x float>,
16991 <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) #0
16992 declare hidden amdgpu_gfx void @external_void_func_8xv5f32(<5 x float>, <5 x float>, <5 x float>, <5 x float>,
16993 <5 x float>, <5 x float>, <5 x float>, <5 x float>) #0
16994 attributes #0 = { nounwind }
16995 attributes #1 = { nounwind noinline }