1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s
3 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
4 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10-SCRATCH %s
6 declare hidden amdgpu_gfx void @external_void_func_i1(i1) #0
7 declare hidden amdgpu_gfx void @external_void_func_i1_signext(i1 signext) #0
8 declare hidden amdgpu_gfx void @external_void_func_i1_zeroext(i1 zeroext) #0
10 declare hidden amdgpu_gfx void @external_void_func_i8(i8) #0
11 declare hidden amdgpu_gfx void @external_void_func_i8_signext(i8 signext) #0
12 declare hidden amdgpu_gfx void @external_void_func_i8_zeroext(i8 zeroext) #0
14 declare hidden amdgpu_gfx void @external_void_func_i16(i16) #0
15 declare hidden amdgpu_gfx void @external_void_func_i16_signext(i16 signext) #0
16 declare hidden amdgpu_gfx void @external_void_func_i16_zeroext(i16 zeroext) #0
18 declare hidden amdgpu_gfx void @external_void_func_i32(i32) #0
19 declare hidden amdgpu_gfx void @external_void_func_i64(i64) #0
20 declare hidden amdgpu_gfx void @external_void_func_v2i64(<2 x i64>) #0
21 declare hidden amdgpu_gfx void @external_void_func_v3i64(<3 x i64>) #0
22 declare hidden amdgpu_gfx void @external_void_func_v4i64(<4 x i64>) #0
24 declare hidden amdgpu_gfx void @external_void_func_f16(half) #0
25 declare hidden amdgpu_gfx void @external_void_func_f32(float) #0
26 declare hidden amdgpu_gfx void @external_void_func_f64(double) #0
27 declare hidden amdgpu_gfx void @external_void_func_v2f32(<2 x float>) #0
28 declare hidden amdgpu_gfx void @external_void_func_v2f64(<2 x double>) #0
29 declare hidden amdgpu_gfx void @external_void_func_v3f32(<3 x float>) #0
30 declare hidden amdgpu_gfx void @external_void_func_v3f64(<3 x double>) #0
31 declare hidden amdgpu_gfx void @external_void_func_v5f32(<5 x float>) #0
33 declare hidden amdgpu_gfx void @external_void_func_v2i16(<2 x i16>) #0
34 declare hidden amdgpu_gfx void @external_void_func_v2f16(<2 x half>) #0
35 declare hidden amdgpu_gfx void @external_void_func_v3i16(<3 x i16>) #0
36 declare hidden amdgpu_gfx void @external_void_func_v3f16(<3 x half>) #0
37 declare hidden amdgpu_gfx void @external_void_func_v4i16(<4 x i16>) #0
38 declare hidden amdgpu_gfx void @external_void_func_v4f16(<4 x half>) #0
40 declare hidden amdgpu_gfx void @external_void_func_v2i32(<2 x i32>) #0
41 declare hidden amdgpu_gfx void @external_void_func_v3i32(<3 x i32>) #0
42 declare hidden amdgpu_gfx void @external_void_func_v3i32_i32(<3 x i32>, i32) #0
43 declare hidden amdgpu_gfx void @external_void_func_v4i32(<4 x i32>) #0
44 declare hidden amdgpu_gfx void @external_void_func_v5i32(<5 x i32>) #0
45 declare hidden amdgpu_gfx void @external_void_func_v8i32(<8 x i32>) #0
46 declare hidden amdgpu_gfx void @external_void_func_v16i32(<16 x i32>) #0
47 declare hidden amdgpu_gfx void @external_void_func_v32i32(<32 x i32>) #0
48 declare hidden amdgpu_gfx void @external_void_func_v32i32_i32(<32 x i32>, i32) #0
50 declare hidden amdgpu_gfx void @external_void_func_i1_inreg(i1 inreg) #0
51 declare hidden amdgpu_gfx void @external_void_func_i8_inreg(i8 inreg) #0
52 declare hidden amdgpu_gfx void @external_void_func_i16_inreg(i16 inreg) #0
53 declare hidden amdgpu_gfx void @external_void_func_i32_inreg(i32 inreg) #0
54 declare hidden amdgpu_gfx void @external_void_func_i64_inreg(i64 inreg) #0
55 declare hidden amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg) #0
56 declare hidden amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg) #0
57 declare hidden amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> inreg) #0
59 declare hidden amdgpu_gfx void @external_void_func_f16_inreg(half inreg) #0
60 declare hidden amdgpu_gfx void @external_void_func_f32_inreg(float inreg) #0
61 declare hidden amdgpu_gfx void @external_void_func_f64_inreg(double inreg) #0
62 declare hidden amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> inreg) #0
63 declare hidden amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> inreg) #0
64 declare hidden amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> inreg) #0
65 declare hidden amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> inreg) #0
66 declare hidden amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> inreg) #0
68 declare hidden amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> inreg) #0
69 declare hidden amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> inreg) #0
70 declare hidden amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg) #0
71 declare hidden amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg) #0
72 declare hidden amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg) #0
73 declare hidden amdgpu_gfx void @external_void_func_v4f16_inreg(<4 x half> inreg) #0
75 declare hidden amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg) #0
76 declare hidden amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> inreg) #0
77 declare hidden amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> inreg, i32 inreg) #0
78 declare hidden amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg) #0
79 declare hidden amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> inreg) #0
80 declare hidden amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg) #0
81 declare hidden amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> inreg) #0
82 declare hidden amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> inreg) #0
83 declare hidden amdgpu_gfx void @external_void_func_v32i32_i32_inreg(<32 x i32> inreg, i32 inreg) #0
85 ; return value and argument
86 declare hidden amdgpu_gfx i32 @external_i32_func_i32(i32) #0
89 declare hidden amdgpu_gfx void @external_void_func_struct_i8_i32({ i8, i32 }) #0
90 declare hidden amdgpu_gfx void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 })) #0
91 declare hidden amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }), { i8, i32 } addrspace(5)* byval({ i8, i32 })) #0
93 declare hidden amdgpu_gfx void @external_void_func_v16i8(<16 x i8>) #0
95 define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 {
96 ; GFX9-LABEL: test_call_external_void_func_i1_imm:
98 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
100 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
101 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
102 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
103 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
104 ; GFX9-NEXT: s_mov_b32 s33, s32
105 ; GFX9-NEXT: s_addk_i32 s32, 0x400
106 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
107 ; GFX9-NEXT: s_getpc_b64 s[4:5]
108 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4
109 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12
110 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
111 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32
112 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
113 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
114 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
115 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
116 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
117 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
118 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
119 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
120 ; GFX9-NEXT: s_waitcnt vmcnt(0)
121 ; GFX9-NEXT: s_setpc_b64 s[4:5]
123 ; GFX10-LABEL: test_call_external_void_func_i1_imm:
125 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
127 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
128 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
129 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
130 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
131 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
132 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
133 ; GFX10-NEXT: s_mov_b32 s33, s32
134 ; GFX10-NEXT: s_addk_i32 s32, 0x200
135 ; GFX10-NEXT: s_getpc_b64 s[4:5]
136 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4
137 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12
138 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
139 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32
140 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
141 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
142 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
143 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
144 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
145 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
146 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
147 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
148 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
149 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
150 ; GFX10-NEXT: s_waitcnt vmcnt(0)
151 ; GFX10-NEXT: s_setpc_b64 s[4:5]
153 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_imm:
154 ; GFX10-SCRATCH: ; %bb.0:
155 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
157 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
158 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
159 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
160 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
161 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
162 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
163 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
164 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
165 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
166 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1@rel32@lo+4
167 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1@rel32@hi+12
168 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
169 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32
170 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
171 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
172 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
173 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
174 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
175 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
176 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
177 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
178 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
179 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
180 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
181 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
182 call amdgpu_gfx void @external_void_func_i1(i1 true)
186 define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 {
187 ; GFX9-LABEL: test_call_external_void_func_i1_signext:
189 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
191 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
192 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
193 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
194 ; GFX9-NEXT: s_waitcnt vmcnt(0)
195 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
196 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
197 ; GFX9-NEXT: s_mov_b32 s33, s32
198 ; GFX9-NEXT: s_addk_i32 s32, 0x400
199 ; GFX9-NEXT: s_getpc_b64 s[4:5]
200 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4
201 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12
202 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
203 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
204 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32
205 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
206 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
207 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
208 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
209 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
210 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
211 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
212 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
213 ; GFX9-NEXT: s_waitcnt vmcnt(0)
214 ; GFX9-NEXT: s_setpc_b64 s[4:5]
216 ; GFX10-LABEL: test_call_external_void_func_i1_signext:
218 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
219 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
220 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
221 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
222 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
223 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
224 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
225 ; GFX10-NEXT: s_waitcnt vmcnt(0)
226 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
227 ; GFX10-NEXT: s_mov_b32 s33, s32
228 ; GFX10-NEXT: s_addk_i32 s32, 0x200
229 ; GFX10-NEXT: s_getpc_b64 s[4:5]
230 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4
231 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12
232 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
233 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
234 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
235 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32
236 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
237 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
238 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
239 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
240 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
241 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
242 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
243 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
244 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
245 ; GFX10-NEXT: s_waitcnt vmcnt(0)
246 ; GFX10-NEXT: s_setpc_b64 s[4:5]
248 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_signext:
249 ; GFX10-SCRATCH: ; %bb.0:
250 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
251 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
252 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
253 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
254 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
255 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
256 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
257 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
258 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
259 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
260 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
261 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
262 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1_signext@rel32@lo+4
263 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1_signext@rel32@hi+12
264 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
265 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
266 ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0
267 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32
268 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
269 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
270 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
271 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
272 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
273 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
274 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
275 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
276 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
277 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
278 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
279 %var = load volatile i1, i1 addrspace(1)* undef
280 call amdgpu_gfx void @external_void_func_i1_signext(i1 signext%var)
284 define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 {
285 ; GFX9-LABEL: test_call_external_void_func_i1_zeroext:
287 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
288 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
289 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
290 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
291 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
292 ; GFX9-NEXT: s_waitcnt vmcnt(0)
293 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
294 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
295 ; GFX9-NEXT: s_mov_b32 s33, s32
296 ; GFX9-NEXT: s_addk_i32 s32, 0x400
297 ; GFX9-NEXT: s_getpc_b64 s[4:5]
298 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4
299 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12
300 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
301 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
302 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32
303 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
304 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
305 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
306 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
307 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
308 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
309 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
310 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
311 ; GFX9-NEXT: s_waitcnt vmcnt(0)
312 ; GFX9-NEXT: s_setpc_b64 s[4:5]
314 ; GFX10-LABEL: test_call_external_void_func_i1_zeroext:
316 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
317 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
318 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
319 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
320 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
321 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
322 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
323 ; GFX10-NEXT: s_waitcnt vmcnt(0)
324 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
325 ; GFX10-NEXT: s_mov_b32 s33, s32
326 ; GFX10-NEXT: s_addk_i32 s32, 0x200
327 ; GFX10-NEXT: s_getpc_b64 s[4:5]
328 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4
329 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12
330 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
331 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
332 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
333 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32
334 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
335 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
336 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
337 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
338 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
339 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
340 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
341 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
342 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
343 ; GFX10-NEXT: s_waitcnt vmcnt(0)
344 ; GFX10-NEXT: s_setpc_b64 s[4:5]
346 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_zeroext:
347 ; GFX10-SCRATCH: ; %bb.0:
348 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
349 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
350 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
351 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
352 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
353 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
354 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
355 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
356 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
357 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
358 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
359 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
360 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1_zeroext@rel32@lo+4
361 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1_zeroext@rel32@hi+12
362 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
363 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
364 ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0
365 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32
366 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
367 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
368 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
369 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
370 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
371 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
372 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
373 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
374 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
375 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
376 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
377 %var = load volatile i1, i1 addrspace(1)* undef
378 call amdgpu_gfx void @external_void_func_i1_zeroext(i1 zeroext %var)
382 define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 {
383 ; GFX9-LABEL: test_call_external_void_func_i8_imm:
385 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
386 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
387 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
388 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
389 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
390 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
391 ; GFX9-NEXT: s_mov_b32 s33, s32
392 ; GFX9-NEXT: s_addk_i32 s32, 0x400
393 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
394 ; GFX9-NEXT: s_getpc_b64 s[4:5]
395 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4
396 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12
397 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
398 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
399 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
400 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
401 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
402 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
403 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
404 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
405 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
406 ; GFX9-NEXT: s_waitcnt vmcnt(0)
407 ; GFX9-NEXT: s_setpc_b64 s[4:5]
409 ; GFX10-LABEL: test_call_external_void_func_i8_imm:
411 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
412 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
413 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
414 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
415 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
416 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
417 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
418 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b
419 ; GFX10-NEXT: s_mov_b32 s33, s32
420 ; GFX10-NEXT: s_addk_i32 s32, 0x200
421 ; GFX10-NEXT: s_getpc_b64 s[4:5]
422 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4
423 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12
424 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
425 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
426 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
427 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
428 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
429 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
430 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
431 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
432 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
433 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
434 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
435 ; GFX10-NEXT: s_waitcnt vmcnt(0)
436 ; GFX10-NEXT: s_setpc_b64 s[4:5]
438 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm:
439 ; GFX10-SCRATCH: ; %bb.0:
440 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
441 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
442 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
443 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
444 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
445 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
446 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
447 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b
448 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
449 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
450 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
451 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8@rel32@lo+4
452 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8@rel32@hi+12
453 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
454 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
455 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
456 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
457 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
458 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
459 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
460 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
461 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
462 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
463 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
464 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
465 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
466 call amdgpu_gfx void @external_void_func_i8(i8 123)
470 define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 {
471 ; GFX9-LABEL: test_call_external_void_func_i8_signext:
473 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
474 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
475 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
476 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
477 ; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc
478 ; GFX9-NEXT: s_waitcnt vmcnt(0)
479 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
480 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
481 ; GFX9-NEXT: s_mov_b32 s33, s32
482 ; GFX9-NEXT: s_addk_i32 s32, 0x400
483 ; GFX9-NEXT: s_getpc_b64 s[4:5]
484 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4
485 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12
486 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
487 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
488 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
489 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
490 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
491 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
492 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
493 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
494 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
495 ; GFX9-NEXT: s_waitcnt vmcnt(0)
496 ; GFX9-NEXT: s_setpc_b64 s[4:5]
498 ; GFX10-LABEL: test_call_external_void_func_i8_signext:
500 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
501 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
502 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
503 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
504 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
505 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
506 ; GFX10-NEXT: global_load_sbyte v0, v[0:1], off glc dlc
507 ; GFX10-NEXT: s_waitcnt vmcnt(0)
508 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
509 ; GFX10-NEXT: s_mov_b32 s33, s32
510 ; GFX10-NEXT: s_addk_i32 s32, 0x200
511 ; GFX10-NEXT: s_getpc_b64 s[4:5]
512 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4
513 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12
514 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
515 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
516 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
517 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
518 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
519 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
520 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
521 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
522 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
523 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
524 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
525 ; GFX10-NEXT: s_waitcnt vmcnt(0)
526 ; GFX10-NEXT: s_setpc_b64 s[4:5]
528 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_signext:
529 ; GFX10-SCRATCH: ; %bb.0:
530 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
531 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
532 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
533 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
534 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
535 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
536 ; GFX10-SCRATCH-NEXT: global_load_sbyte v0, v[0:1], off glc dlc
537 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
538 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
539 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
540 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
541 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
542 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_signext@rel32@lo+4
543 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_signext@rel32@hi+12
544 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
545 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
546 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
547 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
548 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
549 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
550 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
551 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
552 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
553 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
554 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
555 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
556 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
557 %var = load volatile i8, i8 addrspace(1)* undef
558 call amdgpu_gfx void @external_void_func_i8_signext(i8 signext %var)
562 define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 {
563 ; GFX9-LABEL: test_call_external_void_func_i8_zeroext:
565 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
567 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
568 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
569 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
570 ; GFX9-NEXT: s_waitcnt vmcnt(0)
571 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
572 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
573 ; GFX9-NEXT: s_mov_b32 s33, s32
574 ; GFX9-NEXT: s_addk_i32 s32, 0x400
575 ; GFX9-NEXT: s_getpc_b64 s[4:5]
576 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4
577 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12
578 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
579 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
580 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
581 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
582 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
583 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
584 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
585 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
586 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
587 ; GFX9-NEXT: s_waitcnt vmcnt(0)
588 ; GFX9-NEXT: s_setpc_b64 s[4:5]
590 ; GFX10-LABEL: test_call_external_void_func_i8_zeroext:
592 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
593 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
594 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
595 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
596 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
597 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
598 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
599 ; GFX10-NEXT: s_waitcnt vmcnt(0)
600 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
601 ; GFX10-NEXT: s_mov_b32 s33, s32
602 ; GFX10-NEXT: s_addk_i32 s32, 0x200
603 ; GFX10-NEXT: s_getpc_b64 s[4:5]
604 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4
605 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12
606 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
607 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
608 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
609 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
610 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
611 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
612 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
613 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
614 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
615 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
616 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
617 ; GFX10-NEXT: s_waitcnt vmcnt(0)
618 ; GFX10-NEXT: s_setpc_b64 s[4:5]
620 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_zeroext:
621 ; GFX10-SCRATCH: ; %bb.0:
622 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
623 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
624 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
625 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
626 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
627 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
628 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
629 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
630 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
631 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
632 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
633 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
634 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_zeroext@rel32@lo+4
635 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_zeroext@rel32@hi+12
636 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
637 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
638 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
639 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
640 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
641 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
642 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
643 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
644 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
645 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
646 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
647 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
648 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
649 %var = load volatile i8, i8 addrspace(1)* undef
650 call amdgpu_gfx void @external_void_func_i8_zeroext(i8 zeroext %var)
654 define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 {
655 ; GFX9-LABEL: test_call_external_void_func_i16_imm:
657 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
658 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
659 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
660 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
661 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
662 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
663 ; GFX9-NEXT: s_mov_b32 s33, s32
664 ; GFX9-NEXT: s_addk_i32 s32, 0x400
665 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
666 ; GFX9-NEXT: s_getpc_b64 s[4:5]
667 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4
668 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12
669 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
670 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
671 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
672 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
673 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
674 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
675 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
676 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
677 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
678 ; GFX9-NEXT: s_waitcnt vmcnt(0)
679 ; GFX9-NEXT: s_setpc_b64 s[4:5]
681 ; GFX10-LABEL: test_call_external_void_func_i16_imm:
683 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
684 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
685 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
686 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
687 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
688 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
689 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
690 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b
691 ; GFX10-NEXT: s_mov_b32 s33, s32
692 ; GFX10-NEXT: s_addk_i32 s32, 0x200
693 ; GFX10-NEXT: s_getpc_b64 s[4:5]
694 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4
695 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12
696 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
697 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
698 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
699 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
700 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
701 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
702 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
703 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
704 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
705 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
706 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
707 ; GFX10-NEXT: s_waitcnt vmcnt(0)
708 ; GFX10-NEXT: s_setpc_b64 s[4:5]
710 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm:
711 ; GFX10-SCRATCH: ; %bb.0:
712 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
713 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
714 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
715 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
716 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
717 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
718 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
719 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b
720 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
721 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
722 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
723 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16@rel32@lo+4
724 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16@rel32@hi+12
725 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
726 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
727 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
728 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
729 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
730 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
731 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
732 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
733 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
734 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
735 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
736 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
737 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
738 call amdgpu_gfx void @external_void_func_i16(i16 123)
742 define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 {
743 ; GFX9-LABEL: test_call_external_void_func_i16_signext:
745 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
746 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
747 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
748 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
749 ; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc
750 ; GFX9-NEXT: s_waitcnt vmcnt(0)
751 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
752 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
753 ; GFX9-NEXT: s_mov_b32 s33, s32
754 ; GFX9-NEXT: s_addk_i32 s32, 0x400
755 ; GFX9-NEXT: s_getpc_b64 s[4:5]
756 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4
757 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12
758 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
759 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
760 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
761 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
762 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
763 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
764 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
765 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
766 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
767 ; GFX9-NEXT: s_waitcnt vmcnt(0)
768 ; GFX9-NEXT: s_setpc_b64 s[4:5]
770 ; GFX10-LABEL: test_call_external_void_func_i16_signext:
772 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
773 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
774 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
775 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
776 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
777 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
778 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc
779 ; GFX10-NEXT: s_waitcnt vmcnt(0)
780 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
781 ; GFX10-NEXT: s_mov_b32 s33, s32
782 ; GFX10-NEXT: s_addk_i32 s32, 0x200
783 ; GFX10-NEXT: s_getpc_b64 s[4:5]
784 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4
785 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12
786 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
787 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
788 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
789 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
790 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
791 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
792 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
793 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
794 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
795 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
796 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
797 ; GFX10-NEXT: s_waitcnt vmcnt(0)
798 ; GFX10-NEXT: s_setpc_b64 s[4:5]
800 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_signext:
801 ; GFX10-SCRATCH: ; %bb.0:
802 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
803 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
804 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
805 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
806 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
807 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
808 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off glc dlc
809 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
810 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
811 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
812 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
813 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
814 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16_signext@rel32@lo+4
815 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16_signext@rel32@hi+12
816 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
817 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
818 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
819 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
820 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
821 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
822 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
823 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
824 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
825 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
826 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
827 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
828 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
829 %var = load volatile i16, i16 addrspace(1)* undef
830 call amdgpu_gfx void @external_void_func_i16_signext(i16 signext %var)
834 define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 {
835 ; GFX9-LABEL: test_call_external_void_func_i16_zeroext:
837 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
838 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
839 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
840 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
841 ; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc
842 ; GFX9-NEXT: s_waitcnt vmcnt(0)
843 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
844 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
845 ; GFX9-NEXT: s_mov_b32 s33, s32
846 ; GFX9-NEXT: s_addk_i32 s32, 0x400
847 ; GFX9-NEXT: s_getpc_b64 s[4:5]
848 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4
849 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12
850 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
851 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
852 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
853 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
854 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
855 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
856 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
857 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
858 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
859 ; GFX9-NEXT: s_waitcnt vmcnt(0)
860 ; GFX9-NEXT: s_setpc_b64 s[4:5]
862 ; GFX10-LABEL: test_call_external_void_func_i16_zeroext:
864 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
865 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
866 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
867 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
868 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
869 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
870 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc
871 ; GFX10-NEXT: s_waitcnt vmcnt(0)
872 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
873 ; GFX10-NEXT: s_mov_b32 s33, s32
874 ; GFX10-NEXT: s_addk_i32 s32, 0x200
875 ; GFX10-NEXT: s_getpc_b64 s[4:5]
876 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4
877 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12
878 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
879 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
880 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
881 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
882 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
883 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
884 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
885 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
886 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
887 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
888 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
889 ; GFX10-NEXT: s_waitcnt vmcnt(0)
890 ; GFX10-NEXT: s_setpc_b64 s[4:5]
892 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_zeroext:
893 ; GFX10-SCRATCH: ; %bb.0:
894 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
895 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
896 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
897 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
898 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
899 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
900 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off glc dlc
901 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
902 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
903 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
904 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
905 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
906 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16_zeroext@rel32@lo+4
907 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16_zeroext@rel32@hi+12
908 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
909 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
910 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
911 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
912 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
913 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
914 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
915 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
916 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
917 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
918 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
919 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
920 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
921 %var = load volatile i16, i16 addrspace(1)* undef
922 call amdgpu_gfx void @external_void_func_i16_zeroext(i16 zeroext %var)
926 define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 {
927 ; GFX9-LABEL: test_call_external_void_func_i32_imm:
929 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
930 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
931 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
932 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
933 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
934 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
935 ; GFX9-NEXT: s_mov_b32 s33, s32
936 ; GFX9-NEXT: s_addk_i32 s32, 0x400
937 ; GFX9-NEXT: v_mov_b32_e32 v0, 42
938 ; GFX9-NEXT: s_getpc_b64 s[4:5]
939 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4
940 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12
941 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
942 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
943 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
944 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
945 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
946 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
947 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
948 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
949 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
950 ; GFX9-NEXT: s_waitcnt vmcnt(0)
951 ; GFX9-NEXT: s_setpc_b64 s[4:5]
953 ; GFX10-LABEL: test_call_external_void_func_i32_imm:
955 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
956 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
957 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
958 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
959 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
960 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
961 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
962 ; GFX10-NEXT: v_mov_b32_e32 v0, 42
963 ; GFX10-NEXT: s_mov_b32 s33, s32
964 ; GFX10-NEXT: s_addk_i32 s32, 0x200
965 ; GFX10-NEXT: s_getpc_b64 s[4:5]
966 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4
967 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12
968 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
969 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
970 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
971 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
972 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
973 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
974 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
975 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
976 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
977 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
978 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
979 ; GFX10-NEXT: s_waitcnt vmcnt(0)
980 ; GFX10-NEXT: s_setpc_b64 s[4:5]
982 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i32_imm:
983 ; GFX10-SCRATCH: ; %bb.0:
984 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
985 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
986 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
987 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
988 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
989 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
990 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
991 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 42
992 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
993 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
994 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
995 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i32@rel32@lo+4
996 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i32@rel32@hi+12
997 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
998 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
999 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1000 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
1001 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
1002 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1003 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
1004 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
1005 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1006 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1007 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
1008 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1009 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
1010 call amdgpu_gfx void @external_void_func_i32(i32 42)
1014 define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 {
1015 ; GFX9-LABEL: test_call_external_void_func_i64_imm:
1017 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1018 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
1019 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1020 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
1021 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
1022 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1023 ; GFX9-NEXT: s_mov_b32 s33, s32
1024 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1025 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
1026 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1027 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1028 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4
1029 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12
1030 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1031 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1032 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
1033 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
1034 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1035 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
1036 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
1037 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1038 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
1039 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1040 ; GFX9-NEXT: s_setpc_b64 s[4:5]
1042 ; GFX10-LABEL: test_call_external_void_func_i64_imm:
1044 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1045 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1046 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
1047 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1048 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1049 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
1050 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
1051 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b
1052 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
1053 ; GFX10-NEXT: s_mov_b32 s33, s32
1054 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1055 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1056 ; GFX10-NEXT: s_getpc_b64 s[4:5]
1057 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4
1058 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12
1059 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1060 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
1061 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
1062 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
1063 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1064 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
1065 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
1066 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1067 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1068 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
1069 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1070 ; GFX10-NEXT: s_setpc_b64 s[4:5]
1072 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i64_imm:
1073 ; GFX10-SCRATCH: ; %bb.0:
1074 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1075 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
1076 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
1077 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
1078 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1079 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
1080 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
1081 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b
1082 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
1083 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1084 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1085 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1086 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
1087 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i64@rel32@lo+4
1088 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i64@rel32@hi+12
1089 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1090 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1091 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
1092 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
1093 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1094 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
1095 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
1096 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1097 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1098 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
1099 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1100 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
1101 call amdgpu_gfx void @external_void_func_i64(i64 123)
1105 define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 {
1106 ; GFX9-LABEL: test_call_external_void_func_v2i64:
1108 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1109 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
1110 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1111 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
1112 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1113 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1114 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1115 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
1116 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1117 ; GFX9-NEXT: s_mov_b32 s33, s32
1118 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1119 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1120 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1121 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1122 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1123 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1124 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
1125 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
1126 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1127 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
1128 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
1129 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1130 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
1131 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1132 ; GFX9-NEXT: s_setpc_b64 s[4:5]
1134 ; GFX10-LABEL: test_call_external_void_func_v2i64:
1136 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1137 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1138 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
1139 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1140 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1141 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
1142 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1143 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
1144 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
1145 ; GFX10-NEXT: s_mov_b32 s33, s32
1146 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1147 ; GFX10-NEXT: s_getpc_b64 s[4:5]
1148 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1149 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1150 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1151 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1152 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1153 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
1154 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
1155 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
1156 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1157 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
1158 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
1159 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1160 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1161 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
1162 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1163 ; GFX10-NEXT: s_setpc_b64 s[4:5]
1165 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64:
1166 ; GFX10-SCRATCH: ; %bb.0:
1167 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1168 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
1169 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
1170 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
1171 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1172 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
1173 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
1174 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
1175 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
1176 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1177 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1178 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
1179 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4
1180 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12
1181 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1182 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1183 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1184 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1185 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
1186 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
1187 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1188 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
1189 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
1190 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1191 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1192 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
1193 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1194 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
1195 %val = load <2 x i64>, <2 x i64> addrspace(1)* null
1196 call amdgpu_gfx void @external_void_func_v2i64(<2 x i64> %val)
1200 define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 {
1201 ; GFX9-LABEL: test_call_external_void_func_v2i64_imm:
1203 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1204 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
1205 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1206 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
1207 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
1208 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1209 ; GFX9-NEXT: s_mov_b32 s33, s32
1210 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1211 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
1212 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
1213 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
1214 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
1215 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1216 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1217 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1218 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1219 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1220 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
1221 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
1222 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1223 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
1224 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
1225 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1226 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
1227 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1228 ; GFX9-NEXT: s_setpc_b64 s[4:5]
1230 ; GFX10-LABEL: test_call_external_void_func_v2i64_imm:
1232 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1233 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1234 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
1235 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1236 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1237 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
1238 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
1239 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
1240 ; GFX10-NEXT: v_mov_b32_e32 v1, 2
1241 ; GFX10-NEXT: v_mov_b32_e32 v2, 3
1242 ; GFX10-NEXT: v_mov_b32_e32 v3, 4
1243 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1244 ; GFX10-NEXT: s_mov_b32 s33, s32
1245 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1246 ; GFX10-NEXT: s_getpc_b64 s[4:5]
1247 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1248 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1249 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1250 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
1251 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
1252 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
1253 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1254 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
1255 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
1256 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1257 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1258 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
1259 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1260 ; GFX10-NEXT: s_setpc_b64 s[4:5]
1262 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_imm:
1263 ; GFX10-SCRATCH: ; %bb.0:
1264 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1265 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
1266 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
1267 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
1268 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1269 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
1270 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
1271 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
1272 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2
1273 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3
1274 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4
1275 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1276 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1277 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1278 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
1279 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4
1280 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12
1281 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1282 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1283 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
1284 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
1285 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1286 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
1287 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
1288 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1289 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1290 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
1291 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1292 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
1293 call amdgpu_gfx void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
1297 define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 {
1298 ; GFX9-LABEL: test_call_external_void_func_v3i64:
1300 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1301 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
1302 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1303 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
1304 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1305 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1306 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1307 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
1308 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1309 ; GFX9-NEXT: s_mov_b32 s33, s32
1310 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1311 ; GFX9-NEXT: v_mov_b32_e32 v4, 1
1312 ; GFX9-NEXT: v_mov_b32_e32 v5, 2
1313 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1314 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4
1315 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12
1316 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1317 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1318 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
1319 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
1320 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1321 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
1322 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
1323 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1324 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
1325 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1326 ; GFX9-NEXT: s_setpc_b64 s[4:5]
1328 ; GFX10-LABEL: test_call_external_void_func_v3i64:
1330 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1331 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1332 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
1333 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1334 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1335 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
1336 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1337 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
1338 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
1339 ; GFX10-NEXT: v_mov_b32_e32 v4, 1
1340 ; GFX10-NEXT: v_mov_b32_e32 v5, 2
1341 ; GFX10-NEXT: s_mov_b32 s33, s32
1342 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1343 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1344 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1345 ; GFX10-NEXT: s_getpc_b64 s[4:5]
1346 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4
1347 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12
1348 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1349 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
1350 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
1351 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
1352 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1353 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
1354 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
1355 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1356 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1357 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
1358 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1359 ; GFX10-NEXT: s_setpc_b64 s[4:5]
1361 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i64:
1362 ; GFX10-SCRATCH: ; %bb.0:
1363 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1364 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
1365 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
1366 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
1367 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1368 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
1369 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
1370 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
1371 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
1372 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1
1373 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 2
1374 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1375 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1376 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1377 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1378 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
1379 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i64@rel32@lo+4
1380 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64@rel32@hi+12
1381 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1382 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1383 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
1384 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
1385 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1386 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
1387 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
1388 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1389 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1390 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
1391 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1392 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
1393 %load = load <2 x i64>, <2 x i64> addrspace(1)* null
1394 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
1396 call amdgpu_gfx void @external_void_func_v3i64(<3 x i64> %val)
1400 define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 {
1401 ; GFX9-LABEL: test_call_external_void_func_v4i64:
1403 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1404 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
1405 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1406 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
1407 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1408 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
1409 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1410 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
1411 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1412 ; GFX9-NEXT: s_mov_b32 s33, s32
1413 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1414 ; GFX9-NEXT: v_mov_b32_e32 v4, 1
1415 ; GFX9-NEXT: v_mov_b32_e32 v5, 2
1416 ; GFX9-NEXT: v_mov_b32_e32 v6, 3
1417 ; GFX9-NEXT: v_mov_b32_e32 v7, 4
1418 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1419 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4
1420 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12
1421 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1422 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1423 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
1424 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
1425 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1426 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
1427 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
1428 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1429 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
1430 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1431 ; GFX9-NEXT: s_setpc_b64 s[4:5]
1433 ; GFX10-LABEL: test_call_external_void_func_v4i64:
1435 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1436 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1437 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
1438 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1439 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1440 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
1441 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1442 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
1443 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
1444 ; GFX10-NEXT: v_mov_b32_e32 v4, 1
1445 ; GFX10-NEXT: v_mov_b32_e32 v5, 2
1446 ; GFX10-NEXT: v_mov_b32_e32 v6, 3
1447 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1448 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1449 ; GFX10-NEXT: v_mov_b32_e32 v7, 4
1450 ; GFX10-NEXT: s_mov_b32 s33, s32
1451 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1452 ; GFX10-NEXT: s_getpc_b64 s[4:5]
1453 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4
1454 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12
1455 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1456 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
1457 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
1458 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
1459 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1460 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
1461 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
1462 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1463 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1464 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
1465 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1466 ; GFX10-NEXT: s_setpc_b64 s[4:5]
1468 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i64:
1469 ; GFX10-SCRATCH: ; %bb.0:
1470 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1471 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
1472 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
1473 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
1474 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1475 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
1476 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
1477 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
1478 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
1479 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1
1480 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 2
1481 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 3
1482 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
1483 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1484 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 4
1485 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1486 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1487 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
1488 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i64@rel32@lo+4
1489 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64@rel32@hi+12
1490 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1491 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1492 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
1493 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
1494 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1495 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
1496 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
1497 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1498 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1499 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
1500 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1501 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
1502 %load = load <2 x i64>, <2 x i64> addrspace(1)* null
1503 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1504 call amdgpu_gfx void @external_void_func_v4i64(<4 x i64> %val)
1508 define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 {
1509 ; GFX9-LABEL: test_call_external_void_func_f16_imm:
1511 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1512 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
1513 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1514 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
1515 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
1516 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1517 ; GFX9-NEXT: s_mov_b32 s33, s32
1518 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1519 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400
1520 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1521 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4
1522 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12
1523 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1524 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1525 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
1526 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
1527 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1528 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
1529 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
1530 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1531 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
1532 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1533 ; GFX9-NEXT: s_setpc_b64 s[4:5]
1535 ; GFX10-LABEL: test_call_external_void_func_f16_imm:
1537 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1538 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1539 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
1540 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1541 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1542 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
1543 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
1544 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400
1545 ; GFX10-NEXT: s_mov_b32 s33, s32
1546 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1547 ; GFX10-NEXT: s_getpc_b64 s[4:5]
1548 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4
1549 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12
1550 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1551 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1552 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
1553 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
1554 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
1555 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1556 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
1557 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
1558 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1559 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1560 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
1561 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1562 ; GFX10-NEXT: s_setpc_b64 s[4:5]
1564 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm:
1565 ; GFX10-SCRATCH: ; %bb.0:
1566 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1567 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
1568 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
1569 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
1570 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1571 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
1572 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
1573 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x4400
1574 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1575 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1576 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
1577 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f16@rel32@lo+4
1578 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f16@rel32@hi+12
1579 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1580 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1581 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1582 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
1583 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
1584 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1585 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
1586 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
1587 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1588 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1589 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
1590 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1591 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
1592 call amdgpu_gfx void @external_void_func_f16(half 4.0)
1596 define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 {
1597 ; GFX9-LABEL: test_call_external_void_func_f32_imm:
1599 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1600 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
1601 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1602 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
1603 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
1604 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1605 ; GFX9-NEXT: s_mov_b32 s33, s32
1606 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1607 ; GFX9-NEXT: v_mov_b32_e32 v0, 4.0
1608 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1609 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4
1610 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12
1611 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1612 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1613 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
1614 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
1615 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1616 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
1617 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
1618 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1619 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
1620 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1621 ; GFX9-NEXT: s_setpc_b64 s[4:5]
1623 ; GFX10-LABEL: test_call_external_void_func_f32_imm:
1625 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1626 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1627 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
1628 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1629 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1630 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
1631 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
1632 ; GFX10-NEXT: v_mov_b32_e32 v0, 4.0
1633 ; GFX10-NEXT: s_mov_b32 s33, s32
1634 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1635 ; GFX10-NEXT: s_getpc_b64 s[4:5]
1636 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4
1637 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12
1638 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1639 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1640 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
1641 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
1642 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
1643 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1644 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
1645 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
1646 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1647 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1648 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
1649 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1650 ; GFX10-NEXT: s_setpc_b64 s[4:5]
1652 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f32_imm:
1653 ; GFX10-SCRATCH: ; %bb.0:
1654 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1655 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
1656 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
1657 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
1658 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1659 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
1660 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
1661 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 4.0
1662 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1663 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1664 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
1665 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f32@rel32@lo+4
1666 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f32@rel32@hi+12
1667 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1668 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1669 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1670 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
1671 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
1672 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1673 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
1674 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
1675 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1676 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1677 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
1678 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1679 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
1680 call amdgpu_gfx void @external_void_func_f32(float 4.0)
1684 define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 {
1685 ; GFX9-LABEL: test_call_external_void_func_v2f32_imm:
1687 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1688 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
1689 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1690 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
1691 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
1692 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1693 ; GFX9-NEXT: s_mov_b32 s33, s32
1694 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1695 ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
1696 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
1697 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1698 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4
1699 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12
1700 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1701 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1702 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
1703 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
1704 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1705 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
1706 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
1707 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1708 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
1709 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1710 ; GFX9-NEXT: s_setpc_b64 s[4:5]
1712 ; GFX10-LABEL: test_call_external_void_func_v2f32_imm:
1714 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1715 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1716 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
1717 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1718 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1719 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
1720 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
1721 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0
1722 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
1723 ; GFX10-NEXT: s_mov_b32 s33, s32
1724 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1725 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1726 ; GFX10-NEXT: s_getpc_b64 s[4:5]
1727 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4
1728 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12
1729 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1730 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
1731 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
1732 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
1733 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1734 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
1735 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
1736 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1737 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1738 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
1739 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1740 ; GFX10-NEXT: s_setpc_b64 s[4:5]
1742 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f32_imm:
1743 ; GFX10-SCRATCH: ; %bb.0:
1744 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1745 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
1746 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
1747 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
1748 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1749 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
1750 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
1751 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0
1752 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0
1753 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1754 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1755 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1756 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
1757 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f32@rel32@lo+4
1758 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32@rel32@hi+12
1759 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1760 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1761 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
1762 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
1763 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1764 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
1765 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
1766 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1767 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1768 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
1769 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1770 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
1771 call amdgpu_gfx void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>)
1775 define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 {
1776 ; GFX9-LABEL: test_call_external_void_func_v3f32_imm:
1778 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1779 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
1780 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1781 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
1782 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
1783 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1784 ; GFX9-NEXT: s_mov_b32 s33, s32
1785 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1786 ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
1787 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
1788 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0
1789 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1790 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4
1791 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12
1792 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1793 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1794 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
1795 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
1796 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1797 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
1798 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
1799 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1800 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
1801 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1802 ; GFX9-NEXT: s_setpc_b64 s[4:5]
1804 ; GFX10-LABEL: test_call_external_void_func_v3f32_imm:
1806 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1807 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1808 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
1809 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1810 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1811 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
1812 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
1813 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0
1814 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
1815 ; GFX10-NEXT: v_mov_b32_e32 v2, 4.0
1816 ; GFX10-NEXT: s_mov_b32 s33, s32
1817 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1818 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1819 ; GFX10-NEXT: s_getpc_b64 s[4:5]
1820 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4
1821 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12
1822 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1823 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
1824 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
1825 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
1826 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1827 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
1828 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
1829 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1830 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1831 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
1832 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1833 ; GFX10-NEXT: s_setpc_b64 s[4:5]
1835 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f32_imm:
1836 ; GFX10-SCRATCH: ; %bb.0:
1837 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1838 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
1839 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
1840 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
1841 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1842 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
1843 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
1844 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0
1845 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0
1846 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 4.0
1847 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1848 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1849 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1850 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
1851 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f32@rel32@lo+4
1852 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32@rel32@hi+12
1853 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1854 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1855 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
1856 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
1857 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1858 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
1859 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
1860 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1861 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1862 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
1863 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1864 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
1865 call amdgpu_gfx void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>)
1869 define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 {
1870 ; GFX9-LABEL: test_call_external_void_func_v5f32_imm:
1872 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1873 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
1874 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1875 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
1876 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
1877 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1878 ; GFX9-NEXT: s_mov_b32 s33, s32
1879 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1880 ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
1881 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
1882 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0
1883 ; GFX9-NEXT: v_mov_b32_e32 v3, -1.0
1884 ; GFX9-NEXT: v_mov_b32_e32 v4, 0.5
1885 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1886 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4
1887 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12
1888 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1889 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1890 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
1891 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
1892 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1893 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
1894 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
1895 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1896 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
1897 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1898 ; GFX9-NEXT: s_setpc_b64 s[4:5]
1900 ; GFX10-LABEL: test_call_external_void_func_v5f32_imm:
1902 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1903 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1904 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
1905 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1906 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1907 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
1908 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
1909 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0
1910 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
1911 ; GFX10-NEXT: v_mov_b32_e32 v2, 4.0
1912 ; GFX10-NEXT: v_mov_b32_e32 v3, -1.0
1913 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1914 ; GFX10-NEXT: v_mov_b32_e32 v4, 0.5
1915 ; GFX10-NEXT: s_mov_b32 s33, s32
1916 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1917 ; GFX10-NEXT: s_getpc_b64 s[4:5]
1918 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4
1919 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12
1920 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1921 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
1922 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
1923 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
1924 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1925 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
1926 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
1927 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1928 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1929 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
1930 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1931 ; GFX10-NEXT: s_setpc_b64 s[4:5]
1933 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5f32_imm:
1934 ; GFX10-SCRATCH: ; %bb.0:
1935 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1936 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
1937 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
1938 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
1939 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1940 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
1941 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
1942 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0
1943 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0
1944 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 4.0
1945 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, -1.0
1946 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
1947 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0.5
1948 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
1949 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
1950 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
1951 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5f32@rel32@lo+4
1952 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32@rel32@hi+12
1953 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
1954 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
1955 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
1956 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
1957 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
1958 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
1959 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
1960 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1961 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
1962 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
1963 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
1964 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
1965 call amdgpu_gfx void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
1969 define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 {
1970 ; GFX9-LABEL: test_call_external_void_func_f64_imm:
1972 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1973 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
1974 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1975 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
1976 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
1977 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
1978 ; GFX9-NEXT: s_mov_b32 s33, s32
1979 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1980 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1981 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000
1982 ; GFX9-NEXT: s_getpc_b64 s[4:5]
1983 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4
1984 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12
1985 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
1986 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
1987 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
1988 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
1989 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1990 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
1991 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
1992 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1993 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
1994 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1995 ; GFX9-NEXT: s_setpc_b64 s[4:5]
1997 ; GFX10-LABEL: test_call_external_void_func_f64_imm:
1999 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2000 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2001 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
2002 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2003 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2004 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
2005 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
2006 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
2007 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x40100000
2008 ; GFX10-NEXT: s_mov_b32 s33, s32
2009 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2010 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2011 ; GFX10-NEXT: s_getpc_b64 s[4:5]
2012 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4
2013 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12
2014 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2015 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
2016 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
2017 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
2018 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2019 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
2020 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
2021 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2022 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2023 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
2024 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2025 ; GFX10-NEXT: s_setpc_b64 s[4:5]
2027 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f64_imm:
2028 ; GFX10-SCRATCH: ; %bb.0:
2029 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2030 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
2031 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
2032 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2033 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2034 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
2035 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
2036 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
2037 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40100000
2038 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2039 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2040 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2041 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2042 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f64@rel32@lo+4
2043 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f64@rel32@hi+12
2044 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2045 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2046 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
2047 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
2048 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2049 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
2050 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
2051 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2052 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2053 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
2054 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2055 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
2056 call amdgpu_gfx void @external_void_func_f64(double 4.0)
2060 define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 {
2061 ; GFX9-LABEL: test_call_external_void_func_v2f64_imm:
2063 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2064 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
2065 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2066 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
2067 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
2068 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2069 ; GFX9-NEXT: s_mov_b32 s33, s32
2070 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2071 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2072 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
2073 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
2074 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000
2075 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2076 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4
2077 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12
2078 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2079 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2080 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
2081 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
2082 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2083 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
2084 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
2085 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2086 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
2087 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2088 ; GFX9-NEXT: s_setpc_b64 s[4:5]
2090 ; GFX10-LABEL: test_call_external_void_func_v2f64_imm:
2092 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2093 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2094 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
2095 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2096 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2097 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
2098 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
2099 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
2100 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
2101 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
2102 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x40100000
2103 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2104 ; GFX10-NEXT: s_mov_b32 s33, s32
2105 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2106 ; GFX10-NEXT: s_getpc_b64 s[4:5]
2107 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4
2108 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12
2109 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2110 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
2111 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
2112 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
2113 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2114 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
2115 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
2116 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2117 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2118 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
2119 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2120 ; GFX10-NEXT: s_setpc_b64 s[4:5]
2122 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f64_imm:
2123 ; GFX10-SCRATCH: ; %bb.0:
2124 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2125 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
2126 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
2127 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2128 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2129 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
2130 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
2131 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
2132 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0
2133 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0
2134 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x40100000
2135 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2136 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2137 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2138 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2139 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f64@rel32@lo+4
2140 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64@rel32@hi+12
2141 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2142 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2143 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
2144 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
2145 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2146 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
2147 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
2148 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2149 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2150 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
2151 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2152 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
2153 call amdgpu_gfx void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
2157 define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 {
2158 ; GFX9-LABEL: test_call_external_void_func_v3f64_imm:
2160 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2161 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
2162 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2163 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
2164 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
2165 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2166 ; GFX9-NEXT: s_mov_b32 s33, s32
2167 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2168 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2169 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
2170 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
2171 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000
2172 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
2173 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x40200000
2174 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2175 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4
2176 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12
2177 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2178 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2179 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
2180 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
2181 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2182 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
2183 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
2184 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2185 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
2186 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2187 ; GFX9-NEXT: s_setpc_b64 s[4:5]
2189 ; GFX10-LABEL: test_call_external_void_func_v3f64_imm:
2191 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2192 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2193 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
2194 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2195 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2196 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
2197 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
2198 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
2199 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
2200 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
2201 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x40100000
2202 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2203 ; GFX10-NEXT: v_mov_b32_e32 v4, 0
2204 ; GFX10-NEXT: v_mov_b32_e32 v5, 0x40200000
2205 ; GFX10-NEXT: s_mov_b32 s33, s32
2206 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2207 ; GFX10-NEXT: s_getpc_b64 s[4:5]
2208 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4
2209 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12
2210 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2211 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
2212 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
2213 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
2214 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2215 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
2216 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
2217 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2218 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2219 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
2220 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2221 ; GFX10-NEXT: s_setpc_b64 s[4:5]
2223 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f64_imm:
2224 ; GFX10-SCRATCH: ; %bb.0:
2225 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2226 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
2227 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
2228 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2229 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2230 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
2231 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
2232 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
2233 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0
2234 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0
2235 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x40100000
2236 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2237 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0
2238 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0x40200000
2239 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2240 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2241 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2242 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64@rel32@lo+4
2243 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64@rel32@hi+12
2244 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2245 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2246 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
2247 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
2248 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2249 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
2250 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
2251 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2252 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2253 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
2254 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2255 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
2256 call amdgpu_gfx void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
2260 define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 {
2261 ; GFX9-LABEL: test_call_external_void_func_v2i16:
2263 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2264 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
2265 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2266 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
2267 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
2268 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
2269 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2270 ; GFX9-NEXT: s_mov_b32 s33, s32
2271 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2272 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2273 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4
2274 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12
2275 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2276 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2277 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
2278 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
2279 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2280 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
2281 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
2282 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2283 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
2284 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2285 ; GFX9-NEXT: s_setpc_b64 s[4:5]
2287 ; GFX10-LABEL: test_call_external_void_func_v2i16:
2289 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2290 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2291 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
2292 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2293 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2294 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
2295 ; GFX10-NEXT: global_load_dword v0, v[0:1], off
2296 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
2297 ; GFX10-NEXT: s_mov_b32 s33, s32
2298 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2299 ; GFX10-NEXT: s_getpc_b64 s[4:5]
2300 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4
2301 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12
2302 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2303 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2304 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
2305 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
2306 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
2307 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2308 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
2309 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
2310 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2311 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2312 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
2313 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2314 ; GFX10-NEXT: s_setpc_b64 s[4:5]
2316 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i16:
2317 ; GFX10-SCRATCH: ; %bb.0:
2318 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2319 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
2320 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
2321 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2322 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2323 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
2324 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off
2325 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
2326 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2327 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2328 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2329 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i16@rel32@lo+4
2330 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i16@rel32@hi+12
2331 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2332 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2333 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2334 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
2335 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
2336 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2337 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
2338 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
2339 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2340 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2341 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
2342 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2343 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
2344 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef
2345 call amdgpu_gfx void @external_void_func_v2i16(<2 x i16> %val)
2349 define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 {
2350 ; GFX9-LABEL: test_call_external_void_func_v3i16:
2352 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2353 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
2354 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2355 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
2356 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
2357 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
2358 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2359 ; GFX9-NEXT: s_mov_b32 s33, s32
2360 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2361 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2362 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2363 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2364 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2365 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2366 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
2367 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
2368 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2369 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
2370 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
2371 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2372 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
2373 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2374 ; GFX9-NEXT: s_setpc_b64 s[4:5]
2376 ; GFX10-LABEL: test_call_external_void_func_v3i16:
2378 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2379 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2380 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
2381 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2382 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2383 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
2384 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
2385 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
2386 ; GFX10-NEXT: s_mov_b32 s33, s32
2387 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2388 ; GFX10-NEXT: s_getpc_b64 s[4:5]
2389 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2390 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2391 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2392 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2393 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
2394 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
2395 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
2396 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2397 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
2398 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
2399 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2400 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2401 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
2402 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2403 ; GFX10-NEXT: s_setpc_b64 s[4:5]
2405 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16:
2406 ; GFX10-SCRATCH: ; %bb.0:
2407 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2408 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
2409 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
2410 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2411 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2412 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
2413 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
2414 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
2415 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2416 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2417 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2418 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4
2419 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12
2420 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2421 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2422 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2423 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
2424 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
2425 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2426 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
2427 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
2428 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2429 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2430 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
2431 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2432 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
2433 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef
2434 call amdgpu_gfx void @external_void_func_v3i16(<3 x i16> %val)
2438 define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 {
2439 ; GFX9-LABEL: test_call_external_void_func_v3f16:
2441 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2442 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
2443 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2444 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
2445 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
2446 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
2447 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2448 ; GFX9-NEXT: s_mov_b32 s33, s32
2449 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2450 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2451 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2452 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2453 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2454 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2455 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
2456 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
2457 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2458 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
2459 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
2460 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2461 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
2462 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2463 ; GFX9-NEXT: s_setpc_b64 s[4:5]
2465 ; GFX10-LABEL: test_call_external_void_func_v3f16:
2467 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2468 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2469 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
2470 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2471 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2472 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
2473 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
2474 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
2475 ; GFX10-NEXT: s_mov_b32 s33, s32
2476 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2477 ; GFX10-NEXT: s_getpc_b64 s[4:5]
2478 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2479 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2480 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2481 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2482 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
2483 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
2484 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
2485 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2486 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
2487 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
2488 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2489 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2490 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
2491 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2492 ; GFX10-NEXT: s_setpc_b64 s[4:5]
2494 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16:
2495 ; GFX10-SCRATCH: ; %bb.0:
2496 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2497 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
2498 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
2499 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2500 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2501 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
2502 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
2503 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
2504 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2505 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2506 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2507 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4
2508 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12
2509 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2510 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2511 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2512 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
2513 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
2514 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2515 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
2516 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
2517 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2518 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2519 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
2520 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2521 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
2522 %val = load <3 x half>, <3 x half> addrspace(1)* undef
2523 call amdgpu_gfx void @external_void_func_v3f16(<3 x half> %val)
2527 define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 {
2528 ; GFX9-LABEL: test_call_external_void_func_v3i16_imm:
2530 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2531 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
2532 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2533 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
2534 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
2535 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2536 ; GFX9-NEXT: s_mov_b32 s33, s32
2537 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2538 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
2539 ; GFX9-NEXT: v_mov_b32_e32 v1, 3
2540 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2541 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2542 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2543 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2544 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2545 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
2546 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
2547 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2548 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
2549 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
2550 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2551 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
2552 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2553 ; GFX9-NEXT: s_setpc_b64 s[4:5]
2555 ; GFX10-LABEL: test_call_external_void_func_v3i16_imm:
2557 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2558 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2559 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
2560 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2561 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2562 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
2563 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
2564 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001
2565 ; GFX10-NEXT: v_mov_b32_e32 v1, 3
2566 ; GFX10-NEXT: s_mov_b32 s33, s32
2567 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2568 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2569 ; GFX10-NEXT: s_getpc_b64 s[4:5]
2570 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2571 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2572 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2573 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
2574 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
2575 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
2576 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2577 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
2578 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
2579 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2580 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2581 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
2582 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2583 ; GFX10-NEXT: s_setpc_b64 s[4:5]
2585 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_imm:
2586 ; GFX10-SCRATCH: ; %bb.0:
2587 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2588 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
2589 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
2590 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2591 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2592 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
2593 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
2594 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x20001
2595 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 3
2596 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2597 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2598 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2599 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2600 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4
2601 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12
2602 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2603 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2604 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
2605 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
2606 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2607 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
2608 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
2609 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2610 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2611 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
2612 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2613 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
2614 call amdgpu_gfx void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>)
2618 define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 {
2619 ; GFX9-LABEL: test_call_external_void_func_v3f16_imm:
2621 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2622 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
2623 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2624 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
2625 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
2626 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2627 ; GFX9-NEXT: s_mov_b32 s33, s32
2628 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2629 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00
2630 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400
2631 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2632 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2633 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2634 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2635 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2636 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
2637 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
2638 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2639 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
2640 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
2641 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2642 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
2643 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2644 ; GFX9-NEXT: s_setpc_b64 s[4:5]
2646 ; GFX10-LABEL: test_call_external_void_func_v3f16_imm:
2648 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2649 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2650 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
2651 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2652 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2653 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
2654 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
2655 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x40003c00
2656 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x4400
2657 ; GFX10-NEXT: s_mov_b32 s33, s32
2658 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2659 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2660 ; GFX10-NEXT: s_getpc_b64 s[4:5]
2661 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2662 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2663 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2664 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
2665 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
2666 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
2667 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2668 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
2669 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
2670 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2671 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2672 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
2673 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2674 ; GFX10-NEXT: s_setpc_b64 s[4:5]
2676 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_imm:
2677 ; GFX10-SCRATCH: ; %bb.0:
2678 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2679 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
2680 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
2681 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2682 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2683 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
2684 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
2685 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x40003c00
2686 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x4400
2687 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2688 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2689 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2690 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2691 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4
2692 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12
2693 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2694 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2695 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
2696 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
2697 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2698 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
2699 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
2700 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2701 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2702 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
2703 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2704 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
2705 call amdgpu_gfx void @external_void_func_v3f16(<3 x half> <half 1.0, half 2.0, half 4.0>)
2709 define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 {
2710 ; GFX9-LABEL: test_call_external_void_func_v4i16:
2712 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2713 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
2714 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2715 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
2716 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
2717 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
2718 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2719 ; GFX9-NEXT: s_mov_b32 s33, s32
2720 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2721 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2722 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
2723 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
2724 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2725 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2726 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
2727 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
2728 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2729 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
2730 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
2731 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2732 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
2733 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2734 ; GFX9-NEXT: s_setpc_b64 s[4:5]
2736 ; GFX10-LABEL: test_call_external_void_func_v4i16:
2738 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2739 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2740 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
2741 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2742 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2743 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
2744 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
2745 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
2746 ; GFX10-NEXT: s_mov_b32 s33, s32
2747 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2748 ; GFX10-NEXT: s_getpc_b64 s[4:5]
2749 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
2750 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
2751 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2752 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2753 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
2754 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
2755 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
2756 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2757 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
2758 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
2759 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2760 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2761 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
2762 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2763 ; GFX10-NEXT: s_setpc_b64 s[4:5]
2765 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16:
2766 ; GFX10-SCRATCH: ; %bb.0:
2767 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2768 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
2769 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
2770 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2771 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2772 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
2773 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
2774 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
2775 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2776 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2777 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2778 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4
2779 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12
2780 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2781 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2782 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2783 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
2784 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
2785 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2786 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
2787 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
2788 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2789 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2790 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
2791 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2792 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
2793 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef
2794 call amdgpu_gfx void @external_void_func_v4i16(<4 x i16> %val)
2798 define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 {
2799 ; GFX9-LABEL: test_call_external_void_func_v4i16_imm:
2801 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2802 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
2803 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2804 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
2805 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
2806 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2807 ; GFX9-NEXT: s_mov_b32 s33, s32
2808 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2809 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
2810 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003
2811 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2812 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
2813 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
2814 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2815 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2816 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
2817 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
2818 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2819 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
2820 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
2821 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2822 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
2823 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2824 ; GFX9-NEXT: s_setpc_b64 s[4:5]
2826 ; GFX10-LABEL: test_call_external_void_func_v4i16_imm:
2828 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2829 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2830 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
2831 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2832 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2833 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
2834 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
2835 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001
2836 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x40003
2837 ; GFX10-NEXT: s_mov_b32 s33, s32
2838 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2839 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2840 ; GFX10-NEXT: s_getpc_b64 s[4:5]
2841 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
2842 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
2843 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2844 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
2845 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
2846 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
2847 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2848 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
2849 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
2850 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2851 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2852 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
2853 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2854 ; GFX10-NEXT: s_setpc_b64 s[4:5]
2856 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_imm:
2857 ; GFX10-SCRATCH: ; %bb.0:
2858 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2859 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
2860 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
2861 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2862 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2863 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
2864 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
2865 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x20001
2866 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40003
2867 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2868 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2869 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2870 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2871 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4
2872 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12
2873 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2874 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2875 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
2876 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
2877 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2878 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
2879 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
2880 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2881 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2882 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
2883 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2884 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
2885 call amdgpu_gfx void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
2889 define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 {
2890 ; GFX9-LABEL: test_call_external_void_func_v2f16:
2892 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2893 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
2894 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2895 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
2896 ; GFX9-NEXT: global_load_dword v0, v[0:1], off
2897 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
2898 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2899 ; GFX9-NEXT: s_mov_b32 s33, s32
2900 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2901 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2902 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4
2903 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12
2904 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2905 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2906 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
2907 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
2908 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2909 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
2910 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
2911 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2912 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
2913 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2914 ; GFX9-NEXT: s_setpc_b64 s[4:5]
2916 ; GFX10-LABEL: test_call_external_void_func_v2f16:
2918 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2919 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2920 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
2921 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2922 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2923 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
2924 ; GFX10-NEXT: global_load_dword v0, v[0:1], off
2925 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
2926 ; GFX10-NEXT: s_mov_b32 s33, s32
2927 ; GFX10-NEXT: s_addk_i32 s32, 0x200
2928 ; GFX10-NEXT: s_getpc_b64 s[4:5]
2929 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4
2930 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12
2931 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
2932 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
2933 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
2934 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
2935 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
2936 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
2937 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
2938 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
2939 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2940 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2941 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
2942 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2943 ; GFX10-NEXT: s_setpc_b64 s[4:5]
2945 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f16:
2946 ; GFX10-SCRATCH: ; %bb.0:
2947 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2948 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
2949 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
2950 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2951 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2952 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
2953 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off
2954 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
2955 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
2956 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
2957 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
2958 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f16@rel32@lo+4
2959 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f16@rel32@hi+12
2960 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
2961 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
2962 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
2963 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
2964 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
2965 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
2966 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
2967 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
2968 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2969 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
2970 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
2971 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
2972 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
2973 %val = load <2 x half>, <2 x half> addrspace(1)* undef
2974 call amdgpu_gfx void @external_void_func_v2f16(<2 x half> %val)
2978 define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 {
2979 ; GFX9-LABEL: test_call_external_void_func_v2i32:
2981 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2982 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
2983 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2984 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
2985 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
2986 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
2987 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
2988 ; GFX9-NEXT: s_mov_b32 s33, s32
2989 ; GFX9-NEXT: s_addk_i32 s32, 0x400
2990 ; GFX9-NEXT: s_getpc_b64 s[4:5]
2991 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
2992 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
2993 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2994 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
2995 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
2996 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
2997 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
2998 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
2999 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
3000 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3001 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
3002 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3003 ; GFX9-NEXT: s_setpc_b64 s[4:5]
3005 ; GFX10-LABEL: test_call_external_void_func_v2i32:
3007 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3008 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3009 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
3010 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3011 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3012 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
3013 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
3014 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
3015 ; GFX10-NEXT: s_mov_b32 s33, s32
3016 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3017 ; GFX10-NEXT: s_getpc_b64 s[4:5]
3018 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3019 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3020 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3021 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3022 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
3023 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
3024 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
3025 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3026 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
3027 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
3028 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3029 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3030 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
3031 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3032 ; GFX10-NEXT: s_setpc_b64 s[4:5]
3034 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32:
3035 ; GFX10-SCRATCH: ; %bb.0:
3036 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3037 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
3038 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
3039 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3040 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3041 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
3042 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
3043 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
3044 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3045 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3046 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
3047 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4
3048 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12
3049 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3050 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3051 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3052 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
3053 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
3054 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3055 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
3056 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
3057 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3058 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3059 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
3060 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3061 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
3062 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef
3063 call amdgpu_gfx void @external_void_func_v2i32(<2 x i32> %val)
3067 define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 {
3068 ; GFX9-LABEL: test_call_external_void_func_v2i32_imm:
3070 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3071 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
3072 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3073 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
3074 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
3075 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3076 ; GFX9-NEXT: s_mov_b32 s33, s32
3077 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3078 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
3079 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
3080 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3081 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3082 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3083 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3084 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3085 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
3086 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
3087 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3088 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
3089 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
3090 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3091 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
3092 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3093 ; GFX9-NEXT: s_setpc_b64 s[4:5]
3095 ; GFX10-LABEL: test_call_external_void_func_v2i32_imm:
3097 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3098 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3099 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
3100 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3101 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3102 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
3103 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
3104 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
3105 ; GFX10-NEXT: v_mov_b32_e32 v1, 2
3106 ; GFX10-NEXT: s_mov_b32 s33, s32
3107 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3108 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3109 ; GFX10-NEXT: s_getpc_b64 s[4:5]
3110 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3111 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3112 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3113 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
3114 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
3115 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
3116 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3117 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
3118 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
3119 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3120 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3121 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
3122 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3123 ; GFX10-NEXT: s_setpc_b64 s[4:5]
3125 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_imm:
3126 ; GFX10-SCRATCH: ; %bb.0:
3127 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3128 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
3129 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
3130 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3131 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3132 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
3133 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
3134 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
3135 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2
3136 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3137 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3138 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3139 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
3140 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4
3141 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12
3142 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3143 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3144 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
3145 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
3146 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3147 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
3148 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
3149 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3150 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3151 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
3152 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3153 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
3154 call amdgpu_gfx void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>)
3158 define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 {
3159 ; GFX9-LABEL: test_call_external_void_func_v3i32_imm:
3161 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3162 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
3163 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3164 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
3165 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
3166 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3167 ; GFX9-NEXT: s_mov_b32 s33, s32
3168 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3169 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
3170 ; GFX9-NEXT: v_mov_b32_e32 v1, 4
3171 ; GFX9-NEXT: v_mov_b32_e32 v2, 5
3172 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3173 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4
3174 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12
3175 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3176 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3177 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
3178 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
3179 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3180 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
3181 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
3182 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3183 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
3184 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3185 ; GFX9-NEXT: s_setpc_b64 s[4:5]
3187 ; GFX10-LABEL: test_call_external_void_func_v3i32_imm:
3189 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3190 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3191 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
3192 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3193 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3194 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
3195 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
3196 ; GFX10-NEXT: v_mov_b32_e32 v0, 3
3197 ; GFX10-NEXT: v_mov_b32_e32 v1, 4
3198 ; GFX10-NEXT: v_mov_b32_e32 v2, 5
3199 ; GFX10-NEXT: s_mov_b32 s33, s32
3200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3201 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3202 ; GFX10-NEXT: s_getpc_b64 s[4:5]
3203 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4
3204 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12
3205 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3206 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
3207 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
3208 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
3209 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3210 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
3211 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
3212 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3213 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3214 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
3215 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3216 ; GFX10-NEXT: s_setpc_b64 s[4:5]
3218 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_imm:
3219 ; GFX10-SCRATCH: ; %bb.0:
3220 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3221 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
3222 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
3223 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3224 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3225 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
3226 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
3227 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3
3228 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 4
3229 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 5
3230 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3231 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3232 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3233 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
3234 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32@rel32@lo+4
3235 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32@rel32@hi+12
3236 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3237 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3238 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
3239 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
3240 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3241 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
3242 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
3243 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3244 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3245 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
3246 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3247 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
3248 call amdgpu_gfx void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>)
3252 define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 {
3253 ; GFX9-LABEL: test_call_external_void_func_v3i32_i32:
3255 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3256 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
3257 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3258 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
3259 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
3260 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3261 ; GFX9-NEXT: s_mov_b32 s33, s32
3262 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3263 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
3264 ; GFX9-NEXT: v_mov_b32_e32 v1, 4
3265 ; GFX9-NEXT: v_mov_b32_e32 v2, 5
3266 ; GFX9-NEXT: v_mov_b32_e32 v3, 6
3267 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3268 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4
3269 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12
3270 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3271 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3272 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
3273 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
3274 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3275 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
3276 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
3277 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3278 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
3279 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3280 ; GFX9-NEXT: s_setpc_b64 s[4:5]
3282 ; GFX10-LABEL: test_call_external_void_func_v3i32_i32:
3284 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3285 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3286 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
3287 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3288 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3289 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
3290 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
3291 ; GFX10-NEXT: v_mov_b32_e32 v0, 3
3292 ; GFX10-NEXT: v_mov_b32_e32 v1, 4
3293 ; GFX10-NEXT: v_mov_b32_e32 v2, 5
3294 ; GFX10-NEXT: v_mov_b32_e32 v3, 6
3295 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3296 ; GFX10-NEXT: s_mov_b32 s33, s32
3297 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3298 ; GFX10-NEXT: s_getpc_b64 s[4:5]
3299 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4
3300 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12
3301 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3302 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
3303 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
3304 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
3305 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3306 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
3307 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
3308 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3309 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3310 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
3311 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3312 ; GFX10-NEXT: s_setpc_b64 s[4:5]
3314 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_i32:
3315 ; GFX10-SCRATCH: ; %bb.0:
3316 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3317 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
3318 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
3319 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3320 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3321 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
3322 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
3323 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3
3324 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 4
3325 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 5
3326 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 6
3327 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3328 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3329 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3330 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
3331 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_i32@rel32@lo+4
3332 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32@rel32@hi+12
3333 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3334 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3335 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
3336 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
3337 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3338 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
3339 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
3340 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3341 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3342 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
3343 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3344 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
3345 call amdgpu_gfx void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
3349 define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 {
3350 ; GFX9-LABEL: test_call_external_void_func_v4i32:
3352 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3353 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
3354 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3355 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
3356 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
3357 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
3358 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3359 ; GFX9-NEXT: s_mov_b32 s33, s32
3360 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3361 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3362 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3363 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3364 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3365 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3366 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
3367 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
3368 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3369 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
3370 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
3371 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3372 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
3373 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3374 ; GFX9-NEXT: s_setpc_b64 s[4:5]
3376 ; GFX10-LABEL: test_call_external_void_func_v4i32:
3378 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3379 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3380 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
3381 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3382 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3383 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
3384 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
3385 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
3386 ; GFX10-NEXT: s_mov_b32 s33, s32
3387 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3388 ; GFX10-NEXT: s_getpc_b64 s[4:5]
3389 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3390 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3391 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3392 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3393 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
3394 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
3395 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
3396 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3397 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
3398 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
3399 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3400 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3401 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
3402 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3403 ; GFX10-NEXT: s_setpc_b64 s[4:5]
3405 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32:
3406 ; GFX10-SCRATCH: ; %bb.0:
3407 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3408 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
3409 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
3410 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3411 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3412 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
3413 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
3414 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
3415 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3416 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3417 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
3418 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4
3419 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12
3420 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3421 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3422 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3423 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
3424 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
3425 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3426 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
3427 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
3428 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3429 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3430 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
3431 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3432 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
3433 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef
3434 call amdgpu_gfx void @external_void_func_v4i32(<4 x i32> %val)
3438 define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 {
3439 ; GFX9-LABEL: test_call_external_void_func_v4i32_imm:
3441 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3442 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
3443 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3444 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
3445 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
3446 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3447 ; GFX9-NEXT: s_mov_b32 s33, s32
3448 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3449 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
3450 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
3451 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
3452 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
3453 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3454 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3455 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3456 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3457 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3458 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
3459 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
3460 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3461 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
3462 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
3463 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3464 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
3465 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3466 ; GFX9-NEXT: s_setpc_b64 s[4:5]
3468 ; GFX10-LABEL: test_call_external_void_func_v4i32_imm:
3470 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3471 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3472 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
3473 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3474 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3475 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
3476 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
3477 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
3478 ; GFX10-NEXT: v_mov_b32_e32 v1, 2
3479 ; GFX10-NEXT: v_mov_b32_e32 v2, 3
3480 ; GFX10-NEXT: v_mov_b32_e32 v3, 4
3481 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3482 ; GFX10-NEXT: s_mov_b32 s33, s32
3483 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3484 ; GFX10-NEXT: s_getpc_b64 s[4:5]
3485 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3486 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3487 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3488 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
3489 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
3490 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
3491 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3492 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
3493 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
3494 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3495 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3496 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
3497 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3498 ; GFX10-NEXT: s_setpc_b64 s[4:5]
3500 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_imm:
3501 ; GFX10-SCRATCH: ; %bb.0:
3502 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3503 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
3504 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
3505 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3506 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3507 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
3508 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
3509 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
3510 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2
3511 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3
3512 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4
3513 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3514 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3515 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3516 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
3517 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4
3518 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12
3519 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3520 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3521 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
3522 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
3523 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3524 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
3525 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
3526 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3527 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3528 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
3529 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3530 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
3531 call amdgpu_gfx void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
3535 define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 {
3536 ; GFX9-LABEL: test_call_external_void_func_v5i32_imm:
3538 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3539 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
3540 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3541 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
3542 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
3543 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3544 ; GFX9-NEXT: s_mov_b32 s33, s32
3545 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3546 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
3547 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
3548 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
3549 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
3550 ; GFX9-NEXT: v_mov_b32_e32 v4, 5
3551 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3552 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4
3553 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12
3554 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3555 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3556 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
3557 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
3558 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3559 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
3560 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
3561 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3562 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
3563 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3564 ; GFX9-NEXT: s_setpc_b64 s[4:5]
3566 ; GFX10-LABEL: test_call_external_void_func_v5i32_imm:
3568 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3569 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3570 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
3571 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3572 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3573 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
3574 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
3575 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
3576 ; GFX10-NEXT: v_mov_b32_e32 v1, 2
3577 ; GFX10-NEXT: v_mov_b32_e32 v2, 3
3578 ; GFX10-NEXT: v_mov_b32_e32 v3, 4
3579 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3580 ; GFX10-NEXT: v_mov_b32_e32 v4, 5
3581 ; GFX10-NEXT: s_mov_b32 s33, s32
3582 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3583 ; GFX10-NEXT: s_getpc_b64 s[4:5]
3584 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4
3585 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12
3586 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3587 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
3588 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
3589 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
3590 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3591 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
3592 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
3593 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3594 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3595 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
3596 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3597 ; GFX10-NEXT: s_setpc_b64 s[4:5]
3599 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i32_imm:
3600 ; GFX10-SCRATCH: ; %bb.0:
3601 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3602 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
3603 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
3604 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3605 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3606 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
3607 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
3608 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
3609 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2
3610 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3
3611 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4
3612 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3613 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 5
3614 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3615 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3616 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
3617 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5i32@rel32@lo+4
3618 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32@rel32@hi+12
3619 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3620 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3621 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
3622 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
3623 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3624 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
3625 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
3626 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3627 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3628 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
3629 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3630 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
3631 call amdgpu_gfx void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
3635 define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 {
3636 ; GFX9-LABEL: test_call_external_void_func_v8i32:
3638 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3639 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
3640 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3641 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
3642 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
3643 ; GFX9-NEXT: v_mov_b32_e32 v8, 0
3644 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
3645 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3646 ; GFX9-NEXT: s_mov_b32 s33, s32
3647 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
3648 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[4:5]
3649 ; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[4:5] offset:16
3650 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3651 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3652 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
3653 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
3654 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3655 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3656 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
3657 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
3658 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3659 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
3660 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
3661 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3662 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
3663 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3664 ; GFX9-NEXT: s_setpc_b64 s[4:5]
3666 ; GFX10-LABEL: test_call_external_void_func_v8i32:
3668 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3669 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3670 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
3671 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3672 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3673 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
3674 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
3675 ; GFX10-NEXT: v_mov_b32_e32 v8, 0
3676 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
3677 ; GFX10-NEXT: s_mov_b32 s33, s32
3678 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3679 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3680 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3681 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
3682 ; GFX10-NEXT: s_clause 0x1
3683 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[4:5]
3684 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[4:5] offset:16
3685 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3686 ; GFX10-NEXT: s_getpc_b64 s[4:5]
3687 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
3688 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
3689 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
3690 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
3691 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
3692 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3693 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
3694 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
3695 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3696 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3697 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
3698 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3699 ; GFX10-NEXT: s_setpc_b64 s[4:5]
3701 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32:
3702 ; GFX10-SCRATCH: ; %bb.0:
3703 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3704 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
3705 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
3706 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3707 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3708 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
3709 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
3710 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 0
3711 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
3712 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3713 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3714 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3715 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3716 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
3717 ; GFX10-SCRATCH-NEXT: s_clause 0x1
3718 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v8, s[0:1]
3719 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v8, s[0:1] offset:16
3720 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3721 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
3722 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4
3723 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12
3724 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3725 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
3726 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
3727 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3728 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
3729 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
3730 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3731 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3732 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
3733 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3734 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
3735 %ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
3736 %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
3737 call amdgpu_gfx void @external_void_func_v8i32(<8 x i32> %val)
3741 define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 {
3742 ; GFX9-LABEL: test_call_external_void_func_v8i32_imm:
3744 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3745 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
3746 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3747 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
3748 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
3749 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3750 ; GFX9-NEXT: s_mov_b32 s33, s32
3751 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3752 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
3753 ; GFX9-NEXT: v_mov_b32_e32 v1, 2
3754 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
3755 ; GFX9-NEXT: v_mov_b32_e32 v3, 4
3756 ; GFX9-NEXT: v_mov_b32_e32 v4, 5
3757 ; GFX9-NEXT: v_mov_b32_e32 v5, 6
3758 ; GFX9-NEXT: v_mov_b32_e32 v6, 7
3759 ; GFX9-NEXT: v_mov_b32_e32 v7, 8
3760 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3761 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
3762 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
3763 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3764 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3765 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
3766 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
3767 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3768 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
3769 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
3770 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3771 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
3772 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3773 ; GFX9-NEXT: s_setpc_b64 s[4:5]
3775 ; GFX10-LABEL: test_call_external_void_func_v8i32_imm:
3777 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3778 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3779 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
3780 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3781 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3782 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
3783 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
3784 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
3785 ; GFX10-NEXT: v_mov_b32_e32 v1, 2
3786 ; GFX10-NEXT: v_mov_b32_e32 v2, 3
3787 ; GFX10-NEXT: v_mov_b32_e32 v3, 4
3788 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3789 ; GFX10-NEXT: v_mov_b32_e32 v4, 5
3790 ; GFX10-NEXT: v_mov_b32_e32 v5, 6
3791 ; GFX10-NEXT: v_mov_b32_e32 v6, 7
3792 ; GFX10-NEXT: v_mov_b32_e32 v7, 8
3793 ; GFX10-NEXT: s_mov_b32 s33, s32
3794 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3795 ; GFX10-NEXT: s_getpc_b64 s[4:5]
3796 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
3797 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
3798 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3799 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
3800 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
3801 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
3802 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3803 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
3804 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
3805 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3806 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3807 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
3808 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3809 ; GFX10-NEXT: s_setpc_b64 s[4:5]
3811 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_imm:
3812 ; GFX10-SCRATCH: ; %bb.0:
3813 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3814 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
3815 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
3816 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3817 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3818 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
3819 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
3820 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
3821 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2
3822 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3
3823 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4
3824 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3825 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 5
3826 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 6
3827 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 7
3828 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 8
3829 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3830 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3831 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
3832 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4
3833 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12
3834 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3835 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3836 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
3837 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
3838 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3839 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
3840 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
3841 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3842 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3843 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
3844 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3845 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
3846 call amdgpu_gfx void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
3850 define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 {
3851 ; GFX9-LABEL: test_call_external_void_func_v16i32:
3853 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3854 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
3855 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3856 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
3857 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
3858 ; GFX9-NEXT: v_mov_b32_e32 v16, 0
3859 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
3860 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3861 ; GFX9-NEXT: s_mov_b32 s33, s32
3862 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
3863 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[4:5]
3864 ; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[4:5] offset:16
3865 ; GFX9-NEXT: global_load_dwordx4 v[8:11], v16, s[4:5] offset:32
3866 ; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[4:5] offset:48
3867 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3868 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3869 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4
3870 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12
3871 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3872 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3873 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
3874 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
3875 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3876 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
3877 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
3878 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3879 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
3880 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3881 ; GFX9-NEXT: s_setpc_b64 s[4:5]
3883 ; GFX10-LABEL: test_call_external_void_func_v16i32:
3885 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3886 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3887 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
3888 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3889 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3890 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
3891 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
3892 ; GFX10-NEXT: v_mov_b32_e32 v16, 0
3893 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
3894 ; GFX10-NEXT: s_mov_b32 s33, s32
3895 ; GFX10-NEXT: s_addk_i32 s32, 0x200
3896 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
3897 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
3898 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
3899 ; GFX10-NEXT: s_clause 0x3
3900 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[4:5]
3901 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[4:5] offset:16
3902 ; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[4:5] offset:32
3903 ; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[4:5] offset:48
3904 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3905 ; GFX10-NEXT: s_getpc_b64 s[4:5]
3906 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4
3907 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12
3908 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
3909 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
3910 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
3911 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
3912 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
3913 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
3914 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3915 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
3916 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
3917 ; GFX10-NEXT: s_waitcnt vmcnt(0)
3918 ; GFX10-NEXT: s_setpc_b64 s[4:5]
3920 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i32:
3921 ; GFX10-SCRATCH: ; %bb.0:
3922 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3923 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
3924 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
3925 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3926 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3927 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
3928 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
3929 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 0
3930 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
3931 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
3932 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
3933 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
3934 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
3935 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
3936 ; GFX10-SCRATCH-NEXT: s_clause 0x3
3937 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v16, s[0:1]
3938 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v16, s[0:1] offset:16
3939 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[8:11], v16, s[0:1] offset:32
3940 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[12:15], v16, s[0:1] offset:48
3941 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3942 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
3943 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v16i32@rel32@lo+4
3944 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32@rel32@hi+12
3945 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
3946 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
3947 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
3948 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
3949 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
3950 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
3951 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3952 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
3953 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
3954 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
3955 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
3956 %ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
3957 %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
3958 call amdgpu_gfx void @external_void_func_v16i32(<16 x i32> %val)
3962 define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 {
3963 ; GFX9-LABEL: test_call_external_void_func_v32i32:
3965 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3966 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
3967 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3968 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
3969 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
3970 ; GFX9-NEXT: v_mov_b32_e32 v28, 0
3971 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
3972 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
3973 ; GFX9-NEXT: s_mov_b32 s33, s32
3974 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
3975 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[4:5]
3976 ; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[4:5] offset:16
3977 ; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[4:5] offset:32
3978 ; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[4:5] offset:48
3979 ; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[4:5] offset:64
3980 ; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[4:5] offset:80
3981 ; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[4:5] offset:96
3982 ; GFX9-NEXT: s_nop 0
3983 ; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[4:5] offset:112
3984 ; GFX9-NEXT: s_addk_i32 s32, 0x400
3985 ; GFX9-NEXT: s_getpc_b64 s[4:5]
3986 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v32i32@rel32@lo+4
3987 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32@rel32@hi+12
3988 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
3989 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
3990 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
3991 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
3992 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
3993 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
3994 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
3995 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3996 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
3997 ; GFX9-NEXT: s_waitcnt vmcnt(0)
3998 ; GFX9-NEXT: s_setpc_b64 s[4:5]
4000 ; GFX10-LABEL: test_call_external_void_func_v32i32:
4002 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4003 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4004 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
4005 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4006 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4007 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
4008 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
4009 ; GFX10-NEXT: v_mov_b32_e32 v32, 0
4010 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
4011 ; GFX10-NEXT: s_mov_b32 s33, s32
4012 ; GFX10-NEXT: s_addk_i32 s32, 0x200
4013 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4014 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4015 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
4016 ; GFX10-NEXT: s_clause 0x7
4017 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[4:5]
4018 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[4:5] offset:16
4019 ; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[4:5] offset:32
4020 ; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[4:5] offset:48
4021 ; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[4:5] offset:64
4022 ; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[4:5] offset:80
4023 ; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[4:5] offset:96
4024 ; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[4:5] offset:112
4025 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4026 ; GFX10-NEXT: s_getpc_b64 s[4:5]
4027 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v32i32@rel32@lo+4
4028 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32@rel32@hi+12
4029 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
4030 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
4031 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
4032 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
4033 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
4034 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
4035 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4036 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4037 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
4038 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4039 ; GFX10-NEXT: s_setpc_b64 s[4:5]
4041 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32:
4042 ; GFX10-SCRATCH: ; %bb.0:
4043 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4044 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
4045 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
4046 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
4047 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4048 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
4049 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
4050 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v32, 0
4051 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
4052 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4053 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
4054 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4055 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4056 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
4057 ; GFX10-SCRATCH-NEXT: s_clause 0x7
4058 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1]
4059 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v32, s[0:1] offset:16
4060 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[8:11], v32, s[0:1] offset:32
4061 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[12:15], v32, s[0:1] offset:48
4062 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v32, s[0:1] offset:64
4063 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:80
4064 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:96
4065 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:112
4066 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4067 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
4068 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32@rel32@lo+4
4069 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32@rel32@hi+12
4070 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4071 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
4072 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
4073 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
4074 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
4075 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
4076 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
4077 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4078 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
4079 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4080 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
4081 %ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
4082 %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
4083 call amdgpu_gfx void @external_void_func_v32i32(<32 x i32> %val)
4087 define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 {
4088 ; GFX9-LABEL: test_call_external_void_func_v32i32_i32:
4090 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4091 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
4092 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4093 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
4094 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
4095 ; GFX9-NEXT: v_mov_b32_e32 v28, 0
4096 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
4097 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
4098 ; GFX9-NEXT: s_mov_b32 s33, s32
4099 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
4100 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[4:5]
4101 ; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[4:5] offset:16
4102 ; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[4:5] offset:32
4103 ; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[4:5] offset:48
4104 ; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[4:5] offset:64
4105 ; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[4:5] offset:80
4106 ; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[4:5] offset:96
4107 ; GFX9-NEXT: s_nop 0
4108 ; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[4:5] offset:112
4109 ; GFX9-NEXT: s_addk_i32 s32, 0x400
4110 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4111 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4
4112 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12
4113 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
4114 ; GFX9-NEXT: s_waitcnt vmcnt(7)
4115 ; GFX9-NEXT: global_load_dword v32, v[0:1], off
4116 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4117 ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32
4118 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4119 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
4120 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
4121 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
4122 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
4123 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
4124 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4125 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
4126 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4127 ; GFX9-NEXT: s_setpc_b64 s[4:5]
4129 ; GFX10-LABEL: test_call_external_void_func_v32i32_i32:
4131 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4132 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4133 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
4134 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4135 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4136 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
4137 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
4138 ; GFX10-NEXT: v_mov_b32_e32 v32, 0
4139 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
4140 ; GFX10-NEXT: s_mov_b32 s33, s32
4141 ; GFX10-NEXT: s_addk_i32 s32, 0x200
4142 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4143 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4144 ; GFX10-NEXT: global_load_dword v33, v[0:1], off
4145 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
4146 ; GFX10-NEXT: s_clause 0x7
4147 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[4:5]
4148 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[4:5] offset:16
4149 ; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[4:5] offset:32
4150 ; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[4:5] offset:48
4151 ; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[4:5] offset:64
4152 ; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[4:5] offset:80
4153 ; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[4:5] offset:96
4154 ; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[4:5] offset:112
4155 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4156 ; GFX10-NEXT: s_getpc_b64 s[4:5]
4157 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4
4158 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12
4159 ; GFX10-NEXT: s_waitcnt vmcnt(8)
4160 ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32
4161 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
4162 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
4163 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
4164 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
4165 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
4166 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
4167 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4168 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4169 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
4170 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4171 ; GFX10-NEXT: s_setpc_b64 s[4:5]
4173 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_i32:
4174 ; GFX10-SCRATCH: ; %bb.0:
4175 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4176 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
4177 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
4178 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
4179 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4180 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
4181 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
4182 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v32, 0
4183 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
4184 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4185 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
4186 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4187 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4188 ; GFX10-SCRATCH-NEXT: global_load_dword v33, v[0:1], off
4189 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
4190 ; GFX10-SCRATCH-NEXT: s_clause 0x7
4191 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1]
4192 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v32, s[0:1] offset:16
4193 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[8:11], v32, s[0:1] offset:32
4194 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[12:15], v32, s[0:1] offset:48
4195 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v32, s[0:1] offset:64
4196 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:80
4197 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:96
4198 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:112
4199 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4200 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
4201 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_i32@rel32@lo+4
4202 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_i32@rel32@hi+12
4203 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(8)
4204 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v33, s32
4205 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4206 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
4207 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
4208 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
4209 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
4210 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
4211 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
4212 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4213 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
4214 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4215 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
4216 %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
4217 %val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0
4218 %val1 = load i32, i32 addrspace(1)* undef
4219 call amdgpu_gfx void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1)
4223 define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 {
4224 ; GFX9-LABEL: test_call_external_i32_func_i32_imm:
4226 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4227 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
4228 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
4229 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
4230 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
4231 ; GFX9-NEXT: s_mov_b32 s33, s32
4232 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
4233 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4234 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4235 ; GFX9-NEXT: s_addk_i32 s32, 0x400
4236 ; GFX9-NEXT: v_mov_b32_e32 v41, v0
4237 ; GFX9-NEXT: v_mov_b32_e32 v0, 42
4238 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4239 ; GFX9-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4
4240 ; GFX9-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12
4241 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
4242 ; GFX9-NEXT: v_mov_b32_e32 v42, v1
4243 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4244 ; GFX9-NEXT: global_store_dword v[41:42], v0, off
4245 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4246 ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
4247 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
4248 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
4249 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
4250 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
4251 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
4252 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
4253 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
4254 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
4255 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4256 ; GFX9-NEXT: s_setpc_b64 s[4:5]
4258 ; GFX10-LABEL: test_call_external_i32_func_i32_imm:
4260 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4261 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4262 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
4263 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
4264 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4265 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
4266 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
4267 ; GFX10-NEXT: s_mov_b32 s33, s32
4268 ; GFX10-NEXT: s_addk_i32 s32, 0x200
4269 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
4270 ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
4271 ; GFX10-NEXT: v_mov_b32_e32 v41, v0
4272 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4273 ; GFX10-NEXT: v_mov_b32_e32 v0, 42
4274 ; GFX10-NEXT: s_getpc_b64 s[4:5]
4275 ; GFX10-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4
4276 ; GFX10-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12
4277 ; GFX10-NEXT: v_mov_b32_e32 v42, v1
4278 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4279 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
4280 ; GFX10-NEXT: global_store_dword v[41:42], v0, off
4281 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4282 ; GFX10-NEXT: s_clause 0x1
4283 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33
4284 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
4285 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
4286 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
4287 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
4288 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
4289 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
4290 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
4291 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4292 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
4293 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4294 ; GFX10-NEXT: s_setpc_b64 s[4:5]
4296 ; GFX10-SCRATCH-LABEL: test_call_external_i32_func_i32_imm:
4297 ; GFX10-SCRATCH: ; %bb.0:
4298 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4299 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
4300 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
4301 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:8 ; 4-byte Folded Spill
4302 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4303 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
4304 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
4305 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4306 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
4307 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill
4308 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 ; 4-byte Folded Spill
4309 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, v0
4310 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4311 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 42
4312 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
4313 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_i32_func_i32@rel32@lo+4
4314 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_i32_func_i32@rel32@hi+12
4315 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, v1
4316 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4317 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4318 ; GFX10-SCRATCH-NEXT: global_store_dword v[41:42], v0, off
4319 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
4320 ; GFX10-SCRATCH-NEXT: s_clause 0x1
4321 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33
4322 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4
4323 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
4324 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
4325 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
4326 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
4327 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
4328 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload
4329 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4330 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
4331 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4332 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
4333 %val = call amdgpu_gfx i32 @external_i32_func_i32(i32 42)
4334 store volatile i32 %val, i32 addrspace(1)* %out
4338 define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 {
4339 ; GFX9-LABEL: test_call_external_void_func_struct_i8_i32:
4341 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4342 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
4343 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4344 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
4345 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
4346 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
4347 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
4348 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
4349 ; GFX9-NEXT: s_mov_b32 s33, s32
4350 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
4351 ; GFX9-NEXT: global_load_dword v1, v2, s[4:5] offset:4
4352 ; GFX9-NEXT: global_load_ubyte v0, v2, s[4:5]
4353 ; GFX9-NEXT: s_addk_i32 s32, 0x400
4354 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4355 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4
4356 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12
4357 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
4358 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4359 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
4360 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
4361 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
4362 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
4363 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
4364 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4365 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
4366 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4367 ; GFX9-NEXT: s_setpc_b64 s[4:5]
4369 ; GFX10-LABEL: test_call_external_void_func_struct_i8_i32:
4371 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4372 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4373 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
4374 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4375 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4376 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
4377 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
4378 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
4379 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
4380 ; GFX10-NEXT: s_mov_b32 s33, s32
4381 ; GFX10-NEXT: s_addk_i32 s32, 0x200
4382 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4383 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4384 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
4385 ; GFX10-NEXT: s_clause 0x1
4386 ; GFX10-NEXT: global_load_ubyte v0, v2, s[4:5]
4387 ; GFX10-NEXT: global_load_dword v1, v2, s[4:5] offset:4
4388 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4389 ; GFX10-NEXT: s_getpc_b64 s[4:5]
4390 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4
4391 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12
4392 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
4393 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
4394 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
4395 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
4396 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
4397 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
4398 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4399 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4400 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
4401 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4402 ; GFX10-NEXT: s_setpc_b64 s[4:5]
4404 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_struct_i8_i32:
4405 ; GFX10-SCRATCH: ; %bb.0:
4406 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4407 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
4408 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
4409 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
4410 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4411 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
4412 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
4413 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0
4414 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
4415 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4416 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
4417 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4418 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4419 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
4420 ; GFX10-SCRATCH-NEXT: s_clause 0x1
4421 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v2, s[0:1]
4422 ; GFX10-SCRATCH-NEXT: global_load_dword v1, v2, s[0:1] offset:4
4423 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4424 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
4425 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_struct_i8_i32@rel32@lo+4
4426 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_struct_i8_i32@rel32@hi+12
4427 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4428 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
4429 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
4430 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
4431 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
4432 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
4433 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
4434 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4435 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
4436 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4437 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
4438 %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
4439 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
4440 call amdgpu_gfx void @external_void_func_struct_i8_i32({ i8, i32 } %val)
4444 define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 {
4445 ; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32:
4447 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4448 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
4449 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
4450 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
4451 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
4452 ; GFX9-NEXT: s_mov_b32 s33, s32
4453 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
4454 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s33
4455 ; GFX9-NEXT: v_mov_b32_e32 v0, 8
4456 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
4457 ; GFX9-NEXT: s_addk_i32 s32, 0x400
4458 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
4459 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33
4460 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4461 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4
4462 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12
4463 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
4464 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4465 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
4466 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
4467 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
4468 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
4469 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
4470 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
4471 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
4472 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4473 ; GFX9-NEXT: s_setpc_b64 s[4:5]
4475 ; GFX10-LABEL: test_call_external_void_func_byval_struct_i8_i32:
4477 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4478 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4479 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
4480 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
4481 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4482 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
4483 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
4484 ; GFX10-NEXT: v_mov_b32_e32 v0, 3
4485 ; GFX10-NEXT: v_mov_b32_e32 v1, 8
4486 ; GFX10-NEXT: s_mov_b32 s33, s32
4487 ; GFX10-NEXT: s_addk_i32 s32, 0x200
4488 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4489 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33
4490 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4
4491 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33
4492 ; GFX10-NEXT: s_getpc_b64 s[4:5]
4493 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4
4494 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12
4495 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4496 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
4497 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
4498 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
4499 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
4500 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
4501 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
4502 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
4503 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4504 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
4505 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4506 ; GFX10-NEXT: s_setpc_b64 s[4:5]
4508 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_byval_struct_i8_i32:
4509 ; GFX10-SCRATCH: ; %bb.0:
4510 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4511 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
4512 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
4513 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:8 ; 4-byte Folded Spill
4514 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4515 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
4516 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
4517 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3
4518 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8
4519 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4520 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
4521 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4522 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s33
4523 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v1, s33 offset:4
4524 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s33
4525 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
4526 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_byval_struct_i8_i32@rel32@lo+4
4527 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_byval_struct_i8_i32@rel32@hi+12
4528 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4529 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4530 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
4531 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
4532 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
4533 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
4534 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
4535 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload
4536 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4537 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
4538 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4539 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
4540 %val = alloca { i8, i32 }, align 4, addrspace(5)
4541 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 0
4542 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 1
4543 store i8 3, i8 addrspace(5)* %gep0
4544 store i32 8, i32 addrspace(5)* %gep1
4545 call amdgpu_gfx void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %val)
4549 define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
4550 ; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
4552 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4553 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
4554 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
4555 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
4556 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
4557 ; GFX9-NEXT: s_mov_b32 s33, s32
4558 ; GFX9-NEXT: v_mov_b32_e32 v0, 3
4559 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s33
4560 ; GFX9-NEXT: v_mov_b32_e32 v0, 8
4561 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
4562 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33
4563 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
4564 ; GFX9-NEXT: s_addk_i32 s32, 0x800
4565 ; GFX9-NEXT: v_add_u32_e32 v0, 8, v0
4566 ; GFX9-NEXT: v_lshrrev_b32_e64 v1, 6, s33
4567 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4568 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
4569 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
4570 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
4571 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4572 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8
4573 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12
4574 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
4575 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
4576 ; GFX9-NEXT: s_addk_i32 s32, 0xf800
4577 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
4578 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4579 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
4580 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4581 ; GFX9-NEXT: global_store_dword v[0:1], v1, off
4582 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4583 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
4584 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
4585 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
4586 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4587 ; GFX9-NEXT: s_setpc_b64 s[4:5]
4589 ; GFX10-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
4591 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4592 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4593 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
4594 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
4595 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4596 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
4597 ; GFX10-NEXT: v_mov_b32_e32 v0, 3
4598 ; GFX10-NEXT: v_mov_b32_e32 v1, 8
4599 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
4600 ; GFX10-NEXT: s_mov_b32 s33, s32
4601 ; GFX10-NEXT: s_addk_i32 s32, 0x400
4602 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33
4603 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4
4604 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33
4605 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4606 ; GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s33
4607 ; GFX10-NEXT: s_getpc_b64 s[4:5]
4608 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
4609 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
4610 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 8, v0
4611 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4612 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
4613 ; GFX10-NEXT: s_clause 0x1
4614 ; GFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8
4615 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12
4616 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
4617 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
4618 ; GFX10-NEXT: s_addk_i32 s32, 0xfc00
4619 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
4620 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4621 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
4622 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4623 ; GFX10-NEXT: global_store_dword v[0:1], v1, off
4624 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4625 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
4626 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
4627 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4628 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
4629 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4630 ; GFX10-NEXT: s_setpc_b64 s[4:5]
4632 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
4633 ; GFX10-SCRATCH: ; %bb.0:
4634 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4635 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
4636 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
4637 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:16 ; 4-byte Folded Spill
4638 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4639 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
4640 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
4641 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3
4642 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4643 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32
4644 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8
4645 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
4646 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
4647 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
4648 ; GFX10-SCRATCH-NEXT: s_add_i32 vcc_lo, s33, 8
4649 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4650 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s33
4651 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v1, s33 offset:4
4652 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, vcc_lo
4653 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s33
4654 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4655 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4656 ; GFX10-SCRATCH-NEXT: s_clause 0x1
4657 ; GFX10-SCRATCH-NEXT: scratch_load_ubyte v0, off, s33 offset:8
4658 ; GFX10-SCRATCH-NEXT: scratch_load_dword v1, off, s33 offset:12
4659 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
4660 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
4661 ; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0
4662 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
4663 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4664 ; GFX10-SCRATCH-NEXT: global_store_byte v[0:1], v0, off
4665 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
4666 ; GFX10-SCRATCH-NEXT: global_store_dword v[0:1], v1, off
4667 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
4668 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
4669 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:16 ; 4-byte Folded Reload
4670 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4671 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
4672 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4673 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
4674 %in.val = alloca { i8, i32 }, align 4, addrspace(5)
4675 %out.val = alloca { i8, i32 }, align 4, addrspace(5)
4676 %in.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 0
4677 %in.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 1
4678 store i8 3, i8 addrspace(5)* %in.gep0
4679 store i32 8, i32 addrspace(5)* %in.gep1
4680 call amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %out.val, { i8, i32 } addrspace(5)* byval({ i8, i32 }) %in.val)
4681 %out.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 0
4682 %out.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 1
4683 %out.val0 = load i8, i8 addrspace(5)* %out.gep0
4684 %out.val1 = load i32, i32 addrspace(5)* %out.gep1
4686 store volatile i8 %out.val0, i8 addrspace(1)* undef
4687 store volatile i32 %out.val1, i32 addrspace(1)* undef
4691 define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 {
4692 ; GFX9-LABEL: test_call_external_void_func_v16i8:
4694 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4695 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
4696 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4697 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
4698 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
4699 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
4700 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
4701 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
4702 ; GFX9-NEXT: s_mov_b32 s33, s32
4703 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
4704 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
4705 ; GFX9-NEXT: s_addk_i32 s32, 0x400
4706 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
4707 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4708 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4
4709 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12
4710 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4711 ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0
4712 ; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v0
4713 ; GFX9-NEXT: v_lshrrev_b32_e32 v18, 24, v0
4714 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1
4715 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1
4716 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1
4717 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
4718 ; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v2
4719 ; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v2
4720 ; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2
4721 ; GFX9-NEXT: v_mov_b32_e32 v8, v2
4722 ; GFX9-NEXT: v_lshrrev_b32_e32 v13, 8, v3
4723 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v3
4724 ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 24, v3
4725 ; GFX9-NEXT: v_mov_b32_e32 v12, v3
4726 ; GFX9-NEXT: v_mov_b32_e32 v1, v16
4727 ; GFX9-NEXT: v_mov_b32_e32 v2, v17
4728 ; GFX9-NEXT: v_mov_b32_e32 v3, v18
4729 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4730 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
4731 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
4732 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
4733 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
4734 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
4735 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4736 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
4737 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4738 ; GFX9-NEXT: s_setpc_b64 s[4:5]
4740 ; GFX10-LABEL: test_call_external_void_func_v16i8:
4742 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4743 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4744 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
4745 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4746 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4747 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
4748 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
4749 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
4750 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
4751 ; GFX10-NEXT: s_mov_b32 s33, s32
4752 ; GFX10-NEXT: s_addk_i32 s32, 0x200
4753 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4754 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4755 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
4756 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
4757 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4758 ; GFX10-NEXT: s_getpc_b64 s[4:5]
4759 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4
4760 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12
4761 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4762 ; GFX10-NEXT: v_lshrrev_b32_e32 v16, 8, v0
4763 ; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v0
4764 ; GFX10-NEXT: v_lshrrev_b32_e32 v18, 24, v0
4765 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
4766 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v1
4767 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1
4768 ; GFX10-NEXT: v_mov_b32_e32 v4, v1
4769 ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 8, v2
4770 ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2
4771 ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v2
4772 ; GFX10-NEXT: v_mov_b32_e32 v8, v2
4773 ; GFX10-NEXT: v_lshrrev_b32_e32 v13, 8, v3
4774 ; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v3
4775 ; GFX10-NEXT: v_lshrrev_b32_e32 v15, 24, v3
4776 ; GFX10-NEXT: v_mov_b32_e32 v12, v3
4777 ; GFX10-NEXT: v_mov_b32_e32 v1, v16
4778 ; GFX10-NEXT: v_mov_b32_e32 v2, v17
4779 ; GFX10-NEXT: v_mov_b32_e32 v3, v18
4780 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
4781 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
4782 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
4783 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
4784 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
4785 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
4786 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4787 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4788 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
4789 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4790 ; GFX10-NEXT: s_setpc_b64 s[4:5]
4792 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i8:
4793 ; GFX10-SCRATCH: ; %bb.0:
4794 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4795 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
4796 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
4797 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
4798 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4799 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
4800 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
4801 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
4802 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
4803 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4804 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
4805 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4806 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4807 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
4808 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
4809 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4810 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
4811 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v16i8@rel32@lo+4
4812 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i8@rel32@hi+12
4813 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4814 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v16, 8, v0
4815 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v17, 16, v0
4816 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v18, 24, v0
4817 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v5, 8, v1
4818 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v6, 16, v1
4819 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v7, 24, v1
4820 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v1
4821 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v9, 8, v2
4822 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v10, 16, v2
4823 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v11, 24, v2
4824 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, v2
4825 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v13, 8, v3
4826 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v14, 16, v3
4827 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v15, 24, v3
4828 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, v3
4829 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, v16
4830 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, v17
4831 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, v18
4832 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4833 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
4834 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
4835 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
4836 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
4837 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
4838 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
4839 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4840 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
4841 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4842 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
4843 %ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
4844 %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
4845 call amdgpu_gfx void @external_void_func_v16i8(<16 x i8> %val)
4849 define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
4850 ; GFX9-LABEL: tail_call_byval_align16:
4851 ; GFX9: ; %bb.0: ; %entry
4852 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4853 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:12
4854 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
4855 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4856 ; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
4857 ; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
4858 ; GFX9-NEXT: s_waitcnt vmcnt(1)
4859 ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
4860 ; GFX9-NEXT: s_waitcnt vmcnt(1)
4861 ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32
4862 ; GFX9-NEXT: s_setpc_b64 s[4:5]
4864 ; GFX10-LABEL: tail_call_byval_align16:
4865 ; GFX10: ; %bb.0: ; %entry
4866 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4867 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4868 ; GFX10-NEXT: s_clause 0x1
4869 ; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:12
4870 ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
4871 ; GFX10-NEXT: s_getpc_b64 s[4:5]
4872 ; GFX10-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
4873 ; GFX10-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
4874 ; GFX10-NEXT: s_waitcnt vmcnt(1)
4875 ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
4876 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4877 ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32
4878 ; GFX10-NEXT: s_setpc_b64 s[4:5]
4880 ; GFX10-SCRATCH-LABEL: tail_call_byval_align16:
4881 ; GFX10-SCRATCH: ; %bb.0: ; %entry
4882 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4883 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
4884 ; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s32 offset:8
4885 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
4886 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4
4887 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12
4888 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4889 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32
4890 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
4892 %alloca = alloca double, align 8, addrspace(5)
4893 tail call amdgpu_gfx void @byval_align16_f64_arg(<32 x i32> %val, double addrspace(5)* byval(double) align 16 %alloca)
4897 ; inreg arguments are put in sgprs
4898 define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 {
4899 ; GFX9-LABEL: test_call_external_void_func_i1_imm_inreg:
4901 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4902 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
4903 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4904 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
4905 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
4906 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
4907 ; GFX9-NEXT: s_mov_b32 s33, s32
4908 ; GFX9-NEXT: s_addk_i32 s32, 0x400
4909 ; GFX9-NEXT: v_mov_b32_e32 v0, 1
4910 ; GFX9-NEXT: s_getpc_b64 s[4:5]
4911 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_inreg@rel32@lo+4
4912 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_inreg@rel32@hi+12
4913 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
4914 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32
4915 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
4916 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
4917 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
4918 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
4919 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
4920 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
4921 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4922 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
4923 ; GFX9-NEXT: s_waitcnt vmcnt(0)
4924 ; GFX9-NEXT: s_setpc_b64 s[4:5]
4926 ; GFX10-LABEL: test_call_external_void_func_i1_imm_inreg:
4928 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4929 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4930 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
4931 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4932 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4933 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
4934 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
4935 ; GFX10-NEXT: v_mov_b32_e32 v0, 1
4936 ; GFX10-NEXT: s_mov_b32 s33, s32
4937 ; GFX10-NEXT: s_addk_i32 s32, 0x200
4938 ; GFX10-NEXT: s_getpc_b64 s[4:5]
4939 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i1_inreg@rel32@lo+4
4940 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i1_inreg@rel32@hi+12
4941 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
4942 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32
4943 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
4944 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
4945 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
4946 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
4947 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
4948 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
4949 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
4950 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4951 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
4952 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
4953 ; GFX10-NEXT: s_waitcnt vmcnt(0)
4954 ; GFX10-NEXT: s_setpc_b64 s[4:5]
4956 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_imm_inreg:
4957 ; GFX10-SCRATCH: ; %bb.0:
4958 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4959 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
4960 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
4961 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
4962 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4963 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
4964 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
4965 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1
4966 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
4967 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
4968 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
4969 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1_inreg@rel32@lo+4
4970 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1_inreg@rel32@hi+12
4971 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
4972 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32
4973 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
4974 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
4975 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
4976 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
4977 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
4978 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
4979 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
4980 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
4981 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
4982 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
4983 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
4984 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
4985 call amdgpu_gfx void @external_void_func_i1_inreg(i1 inreg true)
4989 define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 {
4990 ; GFX9-LABEL: test_call_external_void_func_i8_imm_inreg:
4992 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4993 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
4994 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4995 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
4996 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
4997 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
4998 ; GFX9-NEXT: s_mov_b32 s33, s32
4999 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5000 ; GFX9-NEXT: s_movk_i32 s4, 0x7b
5001 ; GFX9-NEXT: s_getpc_b64 s[6:7]
5002 ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_i8_inreg@rel32@lo+4
5003 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_i8_inreg@rel32@hi+12
5004 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5005 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
5006 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
5007 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
5008 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
5009 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
5010 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
5011 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5012 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
5013 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5014 ; GFX9-NEXT: s_setpc_b64 s[4:5]
5016 ; GFX10-LABEL: test_call_external_void_func_i8_imm_inreg:
5018 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5019 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
5020 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
5021 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5022 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5023 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
5024 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
5025 ; GFX10-NEXT: s_movk_i32 s4, 0x7b
5026 ; GFX10-NEXT: s_mov_b32 s33, s32
5027 ; GFX10-NEXT: s_addk_i32 s32, 0x200
5028 ; GFX10-NEXT: s_getpc_b64 s[6:7]
5029 ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_i8_inreg@rel32@lo+4
5030 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_i8_inreg@rel32@hi+12
5031 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5032 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5033 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
5034 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
5035 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
5036 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
5037 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
5038 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
5039 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5040 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5041 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
5042 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5043 ; GFX10-NEXT: s_setpc_b64 s[4:5]
5045 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm_inreg:
5046 ; GFX10-SCRATCH: ; %bb.0:
5047 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5048 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
5049 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
5050 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
5051 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5052 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
5053 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
5054 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b
5055 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5056 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
5057 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
5058 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_inreg@rel32@lo+4
5059 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_inreg@rel32@hi+12
5060 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5061 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5062 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5063 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
5064 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
5065 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
5066 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
5067 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
5068 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
5069 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5070 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
5071 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5072 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
5073 call amdgpu_gfx void @external_void_func_i8_inreg(i8 inreg 123)
5077 define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 {
5078 ; GFX9-LABEL: test_call_external_void_func_i16_imm_inreg:
5080 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5081 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
5082 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5083 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
5084 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
5085 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5086 ; GFX9-NEXT: s_mov_b32 s33, s32
5087 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5088 ; GFX9-NEXT: s_movk_i32 s4, 0x7b
5089 ; GFX9-NEXT: s_getpc_b64 s[6:7]
5090 ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_i16_inreg@rel32@lo+4
5091 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_i16_inreg@rel32@hi+12
5092 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5093 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
5094 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
5095 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
5096 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
5097 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
5098 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
5099 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5100 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
5101 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5102 ; GFX9-NEXT: s_setpc_b64 s[4:5]
5104 ; GFX10-LABEL: test_call_external_void_func_i16_imm_inreg:
5106 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5107 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
5108 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
5109 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5110 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5111 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
5112 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
5113 ; GFX10-NEXT: s_movk_i32 s4, 0x7b
5114 ; GFX10-NEXT: s_mov_b32 s33, s32
5115 ; GFX10-NEXT: s_addk_i32 s32, 0x200
5116 ; GFX10-NEXT: s_getpc_b64 s[6:7]
5117 ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_i16_inreg@rel32@lo+4
5118 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_i16_inreg@rel32@hi+12
5119 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5120 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5121 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
5122 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
5123 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
5124 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
5125 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
5126 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
5127 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5128 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5129 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
5130 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5131 ; GFX10-NEXT: s_setpc_b64 s[4:5]
5133 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm_inreg:
5134 ; GFX10-SCRATCH: ; %bb.0:
5135 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5136 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
5137 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
5138 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
5139 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5140 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
5141 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
5142 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b
5143 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5144 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
5145 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
5146 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16_inreg@rel32@lo+4
5147 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16_inreg@rel32@hi+12
5148 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5149 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5150 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5151 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
5152 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
5153 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
5154 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
5155 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
5156 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
5157 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5158 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
5159 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5160 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
5161 call amdgpu_gfx void @external_void_func_i16_inreg(i16 inreg 123)
5165 define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 {
5166 ; GFX9-LABEL: test_call_external_void_func_i32_imm_inreg:
5168 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5169 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
5170 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5171 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
5172 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
5173 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5174 ; GFX9-NEXT: s_mov_b32 s33, s32
5175 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5176 ; GFX9-NEXT: s_mov_b32 s4, 42
5177 ; GFX9-NEXT: s_getpc_b64 s[6:7]
5178 ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_i32_inreg@rel32@lo+4
5179 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_i32_inreg@rel32@hi+12
5180 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5181 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
5182 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
5183 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
5184 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
5185 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
5186 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
5187 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5188 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
5189 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5190 ; GFX9-NEXT: s_setpc_b64 s[4:5]
5192 ; GFX10-LABEL: test_call_external_void_func_i32_imm_inreg:
5194 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5195 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
5196 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
5197 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5198 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5199 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
5200 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
5201 ; GFX10-NEXT: s_mov_b32 s4, 42
5202 ; GFX10-NEXT: s_mov_b32 s33, s32
5203 ; GFX10-NEXT: s_addk_i32 s32, 0x200
5204 ; GFX10-NEXT: s_getpc_b64 s[6:7]
5205 ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_i32_inreg@rel32@lo+4
5206 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_i32_inreg@rel32@hi+12
5207 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5208 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5209 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
5210 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
5211 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
5212 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
5213 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
5214 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
5215 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5216 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5217 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
5218 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5219 ; GFX10-NEXT: s_setpc_b64 s[4:5]
5221 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i32_imm_inreg:
5222 ; GFX10-SCRATCH: ; %bb.0:
5223 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5224 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
5225 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
5226 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
5227 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5228 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
5229 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
5230 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 42
5231 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5232 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
5233 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
5234 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i32_inreg@rel32@lo+4
5235 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i32_inreg@rel32@hi+12
5236 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5237 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5238 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5239 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
5240 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
5241 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
5242 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
5243 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
5244 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
5245 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5246 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
5247 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5248 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
5249 call amdgpu_gfx void @external_void_func_i32_inreg(i32 inreg 42)
5253 define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 {
5254 ; GFX9-LABEL: test_call_external_void_func_i64_imm_inreg:
5256 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5257 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
5258 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5259 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
5260 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
5261 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5262 ; GFX9-NEXT: s_mov_b32 s33, s32
5263 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5264 ; GFX9-NEXT: s_movk_i32 s4, 0x7b
5265 ; GFX9-NEXT: s_mov_b32 s5, 0
5266 ; GFX9-NEXT: s_getpc_b64 s[6:7]
5267 ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_i64_inreg@rel32@lo+4
5268 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_i64_inreg@rel32@hi+12
5269 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5270 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
5271 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
5272 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
5273 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
5274 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
5275 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
5276 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5277 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
5278 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5279 ; GFX9-NEXT: s_setpc_b64 s[4:5]
5281 ; GFX10-LABEL: test_call_external_void_func_i64_imm_inreg:
5283 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5284 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
5285 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
5286 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5287 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5288 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
5289 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
5290 ; GFX10-NEXT: s_movk_i32 s4, 0x7b
5291 ; GFX10-NEXT: s_mov_b32 s5, 0
5292 ; GFX10-NEXT: s_mov_b32 s33, s32
5293 ; GFX10-NEXT: s_addk_i32 s32, 0x200
5294 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5295 ; GFX10-NEXT: s_getpc_b64 s[6:7]
5296 ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_i64_inreg@rel32@lo+4
5297 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_i64_inreg@rel32@hi+12
5298 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5299 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
5300 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
5301 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
5302 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
5303 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
5304 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
5305 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5306 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5307 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
5308 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5309 ; GFX10-NEXT: s_setpc_b64 s[4:5]
5311 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i64_imm_inreg:
5312 ; GFX10-SCRATCH: ; %bb.0:
5313 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5314 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
5315 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
5316 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
5317 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5318 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
5319 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
5320 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b
5321 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0
5322 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5323 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
5324 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5325 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
5326 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i64_inreg@rel32@lo+4
5327 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i64_inreg@rel32@hi+12
5328 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5329 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5330 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
5331 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
5332 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
5333 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
5334 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
5335 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
5336 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5337 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
5338 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5339 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
5340 call amdgpu_gfx void @external_void_func_i64_inreg(i64 inreg 123)
5344 define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 {
5345 ; GFX9-LABEL: test_call_external_void_func_v2i64_inreg:
5347 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5348 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
5349 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5350 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
5351 ; GFX9-NEXT: s_mov_b64 s[4:5], 0
5352 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
5353 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
5354 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5355 ; GFX9-NEXT: s_mov_b32 s33, s32
5356 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5357 ; GFX9-NEXT: s_getpc_b64 s[8:9]
5358 ; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v2i64_inreg@rel32@lo+4
5359 ; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64_inreg@rel32@hi+12
5360 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5361 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
5362 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
5363 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
5364 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
5365 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
5366 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
5367 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5368 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
5369 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5370 ; GFX9-NEXT: s_setpc_b64 s[4:5]
5372 ; GFX10-LABEL: test_call_external_void_func_v2i64_inreg:
5374 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5375 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
5376 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
5377 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5378 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5379 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
5380 ; GFX10-NEXT: s_mov_b64 s[4:5], 0
5381 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
5382 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
5383 ; GFX10-NEXT: s_mov_b32 s33, s32
5384 ; GFX10-NEXT: s_addk_i32 s32, 0x200
5385 ; GFX10-NEXT: s_getpc_b64 s[8:9]
5386 ; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v2i64_inreg@rel32@lo+4
5387 ; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64_inreg@rel32@hi+12
5388 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5389 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5390 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9]
5391 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
5392 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
5393 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
5394 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
5395 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
5396 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5397 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5398 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
5399 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5400 ; GFX10-NEXT: s_setpc_b64 s[4:5]
5402 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_inreg:
5403 ; GFX10-SCRATCH: ; %bb.0:
5404 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5405 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
5406 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
5407 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
5408 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5409 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
5410 ; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0
5411 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
5412 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
5413 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5414 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
5415 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
5416 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4
5417 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12
5418 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5419 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5420 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5421 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
5422 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
5423 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
5424 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
5425 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
5426 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
5427 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5428 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
5429 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5430 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
5431 %val = load <2 x i64>, <2 x i64> addrspace(4)* null
5432 call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg %val)
5436 define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 {
5437 ; GFX9-LABEL: test_call_external_void_func_v2i64_imm_inreg:
5439 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5440 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
5441 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5442 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
5443 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
5444 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5445 ; GFX9-NEXT: s_mov_b32 s33, s32
5446 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5447 ; GFX9-NEXT: s_mov_b32 s4, 1
5448 ; GFX9-NEXT: s_mov_b32 s5, 2
5449 ; GFX9-NEXT: s_mov_b32 s6, 3
5450 ; GFX9-NEXT: s_mov_b32 s7, 4
5451 ; GFX9-NEXT: s_getpc_b64 s[8:9]
5452 ; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v2i64_inreg@rel32@lo+4
5453 ; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64_inreg@rel32@hi+12
5454 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5455 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
5456 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
5457 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
5458 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
5459 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
5460 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
5461 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5462 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
5463 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5464 ; GFX9-NEXT: s_setpc_b64 s[4:5]
5466 ; GFX10-LABEL: test_call_external_void_func_v2i64_imm_inreg:
5468 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5469 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
5470 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
5471 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5472 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5473 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
5474 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
5475 ; GFX10-NEXT: s_mov_b32 s4, 1
5476 ; GFX10-NEXT: s_mov_b32 s5, 2
5477 ; GFX10-NEXT: s_mov_b32 s6, 3
5478 ; GFX10-NEXT: s_mov_b32 s7, 4
5479 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5480 ; GFX10-NEXT: s_mov_b32 s33, s32
5481 ; GFX10-NEXT: s_addk_i32 s32, 0x200
5482 ; GFX10-NEXT: s_getpc_b64 s[8:9]
5483 ; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v2i64_inreg@rel32@lo+4
5484 ; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64_inreg@rel32@hi+12
5485 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5486 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9]
5487 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
5488 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
5489 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
5490 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
5491 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
5492 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5493 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5494 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
5495 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5496 ; GFX10-NEXT: s_setpc_b64 s[4:5]
5498 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_imm_inreg:
5499 ; GFX10-SCRATCH: ; %bb.0:
5500 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5501 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
5502 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
5503 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
5504 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5505 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
5506 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
5507 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
5508 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
5509 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
5510 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
5511 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5512 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5513 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
5514 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
5515 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4
5516 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12
5517 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5518 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5519 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
5520 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
5521 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
5522 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
5523 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
5524 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
5525 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5526 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
5527 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5528 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
5529 call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg <i64 8589934593, i64 17179869187>)
5533 define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 {
5534 ; GFX9-LABEL: test_call_external_void_func_v3i64_inreg:
5536 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5537 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
5538 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5539 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
5540 ; GFX9-NEXT: s_mov_b64 s[4:5], 0
5541 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
5542 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
5543 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5544 ; GFX9-NEXT: s_mov_b32 s33, s32
5545 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5546 ; GFX9-NEXT: s_mov_b32 s8, 1
5547 ; GFX9-NEXT: s_mov_b32 s9, 2
5548 ; GFX9-NEXT: s_getpc_b64 s[10:11]
5549 ; GFX9-NEXT: s_add_u32 s10, s10, external_void_func_v3i64_inreg@rel32@lo+4
5550 ; GFX9-NEXT: s_addc_u32 s11, s11, external_void_func_v3i64_inreg@rel32@hi+12
5551 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5552 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[10:11]
5553 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
5554 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
5555 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
5556 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
5557 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
5558 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5559 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
5560 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5561 ; GFX9-NEXT: s_setpc_b64 s[4:5]
5563 ; GFX10-LABEL: test_call_external_void_func_v3i64_inreg:
5565 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5566 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
5567 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
5568 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5569 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5570 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
5571 ; GFX10-NEXT: s_mov_b64 s[4:5], 0
5572 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
5573 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
5574 ; GFX10-NEXT: s_mov_b32 s8, 1
5575 ; GFX10-NEXT: s_mov_b32 s9, 2
5576 ; GFX10-NEXT: s_mov_b32 s33, s32
5577 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5578 ; GFX10-NEXT: s_addk_i32 s32, 0x200
5579 ; GFX10-NEXT: s_getpc_b64 s[10:11]
5580 ; GFX10-NEXT: s_add_u32 s10, s10, external_void_func_v3i64_inreg@rel32@lo+4
5581 ; GFX10-NEXT: s_addc_u32 s11, s11, external_void_func_v3i64_inreg@rel32@hi+12
5582 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5583 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[10:11]
5584 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
5585 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
5586 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
5587 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
5588 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
5589 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5590 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5591 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
5592 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5593 ; GFX10-NEXT: s_setpc_b64 s[4:5]
5595 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i64_inreg:
5596 ; GFX10-SCRATCH: ; %bb.0:
5597 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5598 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
5599 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
5600 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
5601 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5602 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
5603 ; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0
5604 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
5605 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
5606 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1
5607 ; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2
5608 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5609 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5610 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
5611 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
5612 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i64_inreg@rel32@lo+4
5613 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64_inreg@rel32@hi+12
5614 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5615 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5616 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
5617 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
5618 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
5619 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
5620 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
5621 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
5622 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5623 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
5624 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5625 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
5626 %load = load <2 x i64>, <2 x i64> addrspace(4)* null
5627 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
5629 call amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg %val)
5633 define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 {
5634 ; GFX9-LABEL: test_call_external_void_func_v4i64_inreg:
5636 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5637 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
5638 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5639 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
5640 ; GFX9-NEXT: s_mov_b64 s[4:5], 0
5641 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
5642 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
5643 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5644 ; GFX9-NEXT: s_mov_b32 s33, s32
5645 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5646 ; GFX9-NEXT: s_mov_b32 s8, 1
5647 ; GFX9-NEXT: s_mov_b32 s9, 2
5648 ; GFX9-NEXT: s_mov_b32 s10, 3
5649 ; GFX9-NEXT: s_mov_b32 s11, 4
5650 ; GFX9-NEXT: s_getpc_b64 s[12:13]
5651 ; GFX9-NEXT: s_add_u32 s12, s12, external_void_func_v4i64_inreg@rel32@lo+4
5652 ; GFX9-NEXT: s_addc_u32 s13, s13, external_void_func_v4i64_inreg@rel32@hi+12
5653 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5654 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[12:13]
5655 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
5656 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
5657 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
5658 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
5659 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
5660 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5661 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
5662 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5663 ; GFX9-NEXT: s_setpc_b64 s[4:5]
5665 ; GFX10-LABEL: test_call_external_void_func_v4i64_inreg:
5667 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5668 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
5669 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
5670 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5671 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5672 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
5673 ; GFX10-NEXT: s_mov_b64 s[4:5], 0
5674 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
5675 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
5676 ; GFX10-NEXT: s_mov_b32 s8, 1
5677 ; GFX10-NEXT: s_mov_b32 s9, 2
5678 ; GFX10-NEXT: s_mov_b32 s10, 3
5679 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5680 ; GFX10-NEXT: s_mov_b32 s11, 4
5681 ; GFX10-NEXT: s_mov_b32 s33, s32
5682 ; GFX10-NEXT: s_addk_i32 s32, 0x200
5683 ; GFX10-NEXT: s_getpc_b64 s[12:13]
5684 ; GFX10-NEXT: s_add_u32 s12, s12, external_void_func_v4i64_inreg@rel32@lo+4
5685 ; GFX10-NEXT: s_addc_u32 s13, s13, external_void_func_v4i64_inreg@rel32@hi+12
5686 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5687 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[12:13]
5688 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
5689 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
5690 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
5691 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
5692 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
5693 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5694 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5695 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
5696 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5697 ; GFX10-NEXT: s_setpc_b64 s[4:5]
5699 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i64_inreg:
5700 ; GFX10-SCRATCH: ; %bb.0:
5701 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5702 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
5703 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
5704 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
5705 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5706 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
5707 ; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0
5708 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
5709 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
5710 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1
5711 ; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2
5712 ; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 3
5713 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5714 ; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 4
5715 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5716 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
5717 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
5718 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i64_inreg@rel32@lo+4
5719 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64_inreg@rel32@hi+12
5720 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5721 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5722 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
5723 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
5724 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
5725 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
5726 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
5727 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
5728 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5729 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
5730 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5731 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
5732 %load = load <2 x i64>, <2 x i64> addrspace(4)* null
5733 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5734 call amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> inreg %val)
5738 define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 {
5739 ; GFX9-LABEL: test_call_external_void_func_f16_imm_inreg:
5741 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5742 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
5743 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5744 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
5745 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
5746 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5747 ; GFX9-NEXT: s_mov_b32 s33, s32
5748 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5749 ; GFX9-NEXT: s_movk_i32 s4, 0x4400
5750 ; GFX9-NEXT: s_getpc_b64 s[6:7]
5751 ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_f16_inreg@rel32@lo+4
5752 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_f16_inreg@rel32@hi+12
5753 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5754 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
5755 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
5756 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
5757 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
5758 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
5759 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
5760 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5761 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
5762 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5763 ; GFX9-NEXT: s_setpc_b64 s[4:5]
5765 ; GFX10-LABEL: test_call_external_void_func_f16_imm_inreg:
5767 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5768 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
5769 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
5770 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5771 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5772 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
5773 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
5774 ; GFX10-NEXT: s_movk_i32 s4, 0x4400
5775 ; GFX10-NEXT: s_mov_b32 s33, s32
5776 ; GFX10-NEXT: s_addk_i32 s32, 0x200
5777 ; GFX10-NEXT: s_getpc_b64 s[6:7]
5778 ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_f16_inreg@rel32@lo+4
5779 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_f16_inreg@rel32@hi+12
5780 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5781 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5782 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
5783 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
5784 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
5785 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
5786 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
5787 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
5788 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5789 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5790 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
5791 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5792 ; GFX10-NEXT: s_setpc_b64 s[4:5]
5794 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm_inreg:
5795 ; GFX10-SCRATCH: ; %bb.0:
5796 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5797 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
5798 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
5799 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
5800 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5801 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
5802 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
5803 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x4400
5804 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5805 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
5806 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
5807 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f16_inreg@rel32@lo+4
5808 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f16_inreg@rel32@hi+12
5809 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5810 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5811 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5812 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
5813 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
5814 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
5815 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
5816 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
5817 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
5818 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5819 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
5820 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5821 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
5822 call amdgpu_gfx void @external_void_func_f16_inreg(half inreg 4.0)
5826 define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 {
5827 ; GFX9-LABEL: test_call_external_void_func_f32_imm_inreg:
5829 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5830 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
5831 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5832 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
5833 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
5834 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5835 ; GFX9-NEXT: s_mov_b32 s33, s32
5836 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5837 ; GFX9-NEXT: s_mov_b32 s4, 4.0
5838 ; GFX9-NEXT: s_getpc_b64 s[6:7]
5839 ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_f32_inreg@rel32@lo+4
5840 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_f32_inreg@rel32@hi+12
5841 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5842 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
5843 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
5844 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
5845 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
5846 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
5847 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
5848 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5849 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
5850 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5851 ; GFX9-NEXT: s_setpc_b64 s[4:5]
5853 ; GFX10-LABEL: test_call_external_void_func_f32_imm_inreg:
5855 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5856 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
5857 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
5858 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5859 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5860 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
5861 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
5862 ; GFX10-NEXT: s_mov_b32 s4, 4.0
5863 ; GFX10-NEXT: s_mov_b32 s33, s32
5864 ; GFX10-NEXT: s_addk_i32 s32, 0x200
5865 ; GFX10-NEXT: s_getpc_b64 s[6:7]
5866 ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_f32_inreg@rel32@lo+4
5867 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_f32_inreg@rel32@hi+12
5868 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5869 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5870 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
5871 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
5872 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
5873 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
5874 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
5875 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
5876 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5877 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5878 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
5879 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5880 ; GFX10-NEXT: s_setpc_b64 s[4:5]
5882 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f32_imm_inreg:
5883 ; GFX10-SCRATCH: ; %bb.0:
5884 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5885 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
5886 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
5887 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
5888 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5889 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
5890 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
5891 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 4.0
5892 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5893 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
5894 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
5895 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f32_inreg@rel32@lo+4
5896 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f32_inreg@rel32@hi+12
5897 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5898 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5899 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5900 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
5901 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
5902 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
5903 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
5904 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
5905 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
5906 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5907 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
5908 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
5909 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
5910 call amdgpu_gfx void @external_void_func_f32_inreg(float inreg 4.0)
5914 define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 {
5915 ; GFX9-LABEL: test_call_external_void_func_v2f32_imm_inreg:
5917 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5918 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
5919 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5920 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
5921 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
5922 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
5923 ; GFX9-NEXT: s_mov_b32 s33, s32
5924 ; GFX9-NEXT: s_addk_i32 s32, 0x400
5925 ; GFX9-NEXT: s_mov_b32 s4, 1.0
5926 ; GFX9-NEXT: s_mov_b32 s5, 2.0
5927 ; GFX9-NEXT: s_getpc_b64 s[6:7]
5928 ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v2f32_inreg@rel32@lo+4
5929 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v2f32_inreg@rel32@hi+12
5930 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
5931 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
5932 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
5933 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
5934 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
5935 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
5936 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
5937 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5938 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
5939 ; GFX9-NEXT: s_waitcnt vmcnt(0)
5940 ; GFX9-NEXT: s_setpc_b64 s[4:5]
5942 ; GFX10-LABEL: test_call_external_void_func_v2f32_imm_inreg:
5944 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5945 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
5946 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
5947 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5948 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5949 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
5950 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
5951 ; GFX10-NEXT: s_mov_b32 s4, 1.0
5952 ; GFX10-NEXT: s_mov_b32 s5, 2.0
5953 ; GFX10-NEXT: s_mov_b32 s33, s32
5954 ; GFX10-NEXT: s_addk_i32 s32, 0x200
5955 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
5956 ; GFX10-NEXT: s_getpc_b64 s[6:7]
5957 ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2f32_inreg@rel32@lo+4
5958 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v2f32_inreg@rel32@hi+12
5959 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
5960 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
5961 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
5962 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
5963 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
5964 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
5965 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
5966 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5967 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
5968 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
5969 ; GFX10-NEXT: s_waitcnt vmcnt(0)
5970 ; GFX10-NEXT: s_setpc_b64 s[4:5]
5972 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f32_imm_inreg:
5973 ; GFX10-SCRATCH: ; %bb.0:
5974 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5975 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
5976 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
5977 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
5978 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5979 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
5980 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
5981 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0
5982 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
5983 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
5984 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
5985 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
5986 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
5987 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f32_inreg@rel32@lo+4
5988 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32_inreg@rel32@hi+12
5989 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
5990 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
5991 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
5992 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
5993 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
5994 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
5995 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
5996 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
5997 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
5998 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
5999 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6000 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
6001 call amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> inreg <float 1.0, float 2.0>)
6005 define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 {
6006 ; GFX9-LABEL: test_call_external_void_func_v3f32_imm_inreg:
6008 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6009 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
6010 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6011 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
6012 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
6013 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6014 ; GFX9-NEXT: s_mov_b32 s33, s32
6015 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6016 ; GFX9-NEXT: s_mov_b32 s4, 1.0
6017 ; GFX9-NEXT: s_mov_b32 s5, 2.0
6018 ; GFX9-NEXT: s_mov_b32 s6, 4.0
6019 ; GFX9-NEXT: s_getpc_b64 s[8:9]
6020 ; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v3f32_inreg@rel32@lo+4
6021 ; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v3f32_inreg@rel32@hi+12
6022 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6023 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
6024 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
6025 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
6026 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6027 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
6028 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
6029 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6030 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
6031 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6032 ; GFX9-NEXT: s_setpc_b64 s[4:5]
6034 ; GFX10-LABEL: test_call_external_void_func_v3f32_imm_inreg:
6036 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6037 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
6038 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
6039 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6040 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6041 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
6042 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
6043 ; GFX10-NEXT: s_mov_b32 s4, 1.0
6044 ; GFX10-NEXT: s_mov_b32 s5, 2.0
6045 ; GFX10-NEXT: s_mov_b32 s6, 4.0
6046 ; GFX10-NEXT: s_mov_b32 s33, s32
6047 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6048 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6049 ; GFX10-NEXT: s_getpc_b64 s[8:9]
6050 ; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v3f32_inreg@rel32@lo+4
6051 ; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v3f32_inreg@rel32@hi+12
6052 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6053 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9]
6054 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
6055 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
6056 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6057 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
6058 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
6059 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6060 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6061 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
6062 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6063 ; GFX10-NEXT: s_setpc_b64 s[4:5]
6065 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f32_imm_inreg:
6066 ; GFX10-SCRATCH: ; %bb.0:
6067 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6068 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
6069 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
6070 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
6071 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6072 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
6073 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
6074 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0
6075 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
6076 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0
6077 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6078 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6079 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6080 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
6081 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f32_inreg@rel32@lo+4
6082 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32_inreg@rel32@hi+12
6083 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6084 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6085 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
6086 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
6087 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6088 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
6089 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
6090 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
6091 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6092 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
6093 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6094 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
6095 call amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> inreg <float 1.0, float 2.0, float 4.0>)
6099 define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 {
6100 ; GFX9-LABEL: test_call_external_void_func_v5f32_imm_inreg:
6102 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6103 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
6104 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6105 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
6106 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
6107 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6108 ; GFX9-NEXT: s_mov_b32 s33, s32
6109 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6110 ; GFX9-NEXT: s_mov_b32 s4, 1.0
6111 ; GFX9-NEXT: s_mov_b32 s5, 2.0
6112 ; GFX9-NEXT: s_mov_b32 s6, 4.0
6113 ; GFX9-NEXT: s_mov_b32 s7, -1.0
6114 ; GFX9-NEXT: s_mov_b32 s8, 0.5
6115 ; GFX9-NEXT: s_getpc_b64 s[10:11]
6116 ; GFX9-NEXT: s_add_u32 s10, s10, external_void_func_v5f32_inreg@rel32@lo+4
6117 ; GFX9-NEXT: s_addc_u32 s11, s11, external_void_func_v5f32_inreg@rel32@hi+12
6118 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6119 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[10:11]
6120 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
6121 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
6122 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6123 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
6124 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
6125 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6126 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
6127 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6128 ; GFX9-NEXT: s_setpc_b64 s[4:5]
6130 ; GFX10-LABEL: test_call_external_void_func_v5f32_imm_inreg:
6132 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6133 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
6134 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
6135 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6136 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6137 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
6138 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
6139 ; GFX10-NEXT: s_mov_b32 s4, 1.0
6140 ; GFX10-NEXT: s_mov_b32 s5, 2.0
6141 ; GFX10-NEXT: s_mov_b32 s6, 4.0
6142 ; GFX10-NEXT: s_mov_b32 s7, -1.0
6143 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6144 ; GFX10-NEXT: s_mov_b32 s8, 0.5
6145 ; GFX10-NEXT: s_mov_b32 s33, s32
6146 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6147 ; GFX10-NEXT: s_getpc_b64 s[10:11]
6148 ; GFX10-NEXT: s_add_u32 s10, s10, external_void_func_v5f32_inreg@rel32@lo+4
6149 ; GFX10-NEXT: s_addc_u32 s11, s11, external_void_func_v5f32_inreg@rel32@hi+12
6150 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6151 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[10:11]
6152 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
6153 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
6154 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6155 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
6156 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
6157 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6158 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6159 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
6160 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6161 ; GFX10-NEXT: s_setpc_b64 s[4:5]
6163 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5f32_imm_inreg:
6164 ; GFX10-SCRATCH: ; %bb.0:
6165 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6166 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
6167 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
6168 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
6169 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6170 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
6171 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
6172 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0
6173 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
6174 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0
6175 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, -1.0
6176 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6177 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0.5
6178 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6179 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6180 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
6181 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5f32_inreg@rel32@lo+4
6182 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32_inreg@rel32@hi+12
6183 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6184 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6185 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
6186 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
6187 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6188 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
6189 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
6190 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
6191 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6192 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
6193 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6194 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
6195 call amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> inreg <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
6199 define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 {
6200 ; GFX9-LABEL: test_call_external_void_func_f64_imm_inreg:
6202 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6203 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
6204 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6205 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
6206 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
6207 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6208 ; GFX9-NEXT: s_mov_b32 s33, s32
6209 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6210 ; GFX9-NEXT: s_mov_b32 s4, 0
6211 ; GFX9-NEXT: s_mov_b32 s5, 0x40100000
6212 ; GFX9-NEXT: s_getpc_b64 s[6:7]
6213 ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_f64_inreg@rel32@lo+4
6214 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_f64_inreg@rel32@hi+12
6215 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6216 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
6217 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
6218 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
6219 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6220 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
6221 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
6222 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6223 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
6224 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6225 ; GFX9-NEXT: s_setpc_b64 s[4:5]
6227 ; GFX10-LABEL: test_call_external_void_func_f64_imm_inreg:
6229 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6230 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
6231 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
6232 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6233 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6234 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
6235 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
6236 ; GFX10-NEXT: s_mov_b32 s4, 0
6237 ; GFX10-NEXT: s_mov_b32 s5, 0x40100000
6238 ; GFX10-NEXT: s_mov_b32 s33, s32
6239 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6240 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6241 ; GFX10-NEXT: s_getpc_b64 s[6:7]
6242 ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_f64_inreg@rel32@lo+4
6243 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_f64_inreg@rel32@hi+12
6244 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6245 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
6246 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
6247 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
6248 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6249 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
6250 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
6251 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6252 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6253 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
6254 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6255 ; GFX10-NEXT: s_setpc_b64 s[4:5]
6257 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f64_imm_inreg:
6258 ; GFX10-SCRATCH: ; %bb.0:
6259 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6260 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
6261 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
6262 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
6263 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6264 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
6265 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
6266 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0
6267 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40100000
6268 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6269 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6270 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6271 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
6272 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f64_inreg@rel32@lo+4
6273 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f64_inreg@rel32@hi+12
6274 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6275 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6276 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
6277 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
6278 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6279 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
6280 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
6281 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
6282 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6283 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
6284 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6285 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
6286 call amdgpu_gfx void @external_void_func_f64_inreg(double inreg 4.0)
6290 define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 {
6291 ; GFX9-LABEL: test_call_external_void_func_v2f64_imm_inreg:
6293 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6294 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
6295 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6296 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
6297 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
6298 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6299 ; GFX9-NEXT: s_mov_b32 s33, s32
6300 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6301 ; GFX9-NEXT: s_mov_b32 s4, 0
6302 ; GFX9-NEXT: s_mov_b32 s5, 2.0
6303 ; GFX9-NEXT: s_mov_b32 s6, 0
6304 ; GFX9-NEXT: s_mov_b32 s7, 0x40100000
6305 ; GFX9-NEXT: s_getpc_b64 s[8:9]
6306 ; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v2f64_inreg@rel32@lo+4
6307 ; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v2f64_inreg@rel32@hi+12
6308 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6309 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
6310 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
6311 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
6312 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6313 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
6314 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
6315 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6316 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
6317 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6318 ; GFX9-NEXT: s_setpc_b64 s[4:5]
6320 ; GFX10-LABEL: test_call_external_void_func_v2f64_imm_inreg:
6322 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6323 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
6324 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
6325 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6326 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6327 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
6328 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
6329 ; GFX10-NEXT: s_mov_b32 s4, 0
6330 ; GFX10-NEXT: s_mov_b32 s5, 2.0
6331 ; GFX10-NEXT: s_mov_b32 s6, 0
6332 ; GFX10-NEXT: s_mov_b32 s7, 0x40100000
6333 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6334 ; GFX10-NEXT: s_mov_b32 s33, s32
6335 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6336 ; GFX10-NEXT: s_getpc_b64 s[8:9]
6337 ; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v2f64_inreg@rel32@lo+4
6338 ; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v2f64_inreg@rel32@hi+12
6339 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6340 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9]
6341 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
6342 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
6343 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6344 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
6345 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
6346 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6347 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6348 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
6349 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6350 ; GFX10-NEXT: s_setpc_b64 s[4:5]
6352 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f64_imm_inreg:
6353 ; GFX10-SCRATCH: ; %bb.0:
6354 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6355 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
6356 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
6357 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
6358 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6359 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
6360 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
6361 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0
6362 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
6363 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0
6364 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000
6365 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6366 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6367 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6368 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
6369 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f64_inreg@rel32@lo+4
6370 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64_inreg@rel32@hi+12
6371 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6372 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6373 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
6374 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
6375 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6376 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
6377 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
6378 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
6379 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6380 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
6381 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6382 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
6383 call amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> inreg <double 2.0, double 4.0>)
6387 define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 {
6388 ; GFX9-LABEL: test_call_external_void_func_v3f64_imm_inreg:
6390 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6391 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
6392 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6393 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
6394 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
6395 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6396 ; GFX9-NEXT: s_mov_b32 s33, s32
6397 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6398 ; GFX9-NEXT: s_mov_b32 s4, 0
6399 ; GFX9-NEXT: s_mov_b32 s5, 2.0
6400 ; GFX9-NEXT: s_mov_b32 s6, 0
6401 ; GFX9-NEXT: s_mov_b32 s7, 0x40100000
6402 ; GFX9-NEXT: s_mov_b32 s8, 0
6403 ; GFX9-NEXT: s_mov_b32 s9, 0x40200000
6404 ; GFX9-NEXT: s_getpc_b64 s[10:11]
6405 ; GFX9-NEXT: s_add_u32 s10, s10, external_void_func_v3f64_inreg@rel32@lo+4
6406 ; GFX9-NEXT: s_addc_u32 s11, s11, external_void_func_v3f64_inreg@rel32@hi+12
6407 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6408 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[10:11]
6409 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
6410 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
6411 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6412 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
6413 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
6414 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6415 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
6416 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6417 ; GFX9-NEXT: s_setpc_b64 s[4:5]
6419 ; GFX10-LABEL: test_call_external_void_func_v3f64_imm_inreg:
6421 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6422 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
6423 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
6424 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6425 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6426 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
6427 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
6428 ; GFX10-NEXT: s_mov_b32 s4, 0
6429 ; GFX10-NEXT: s_mov_b32 s5, 2.0
6430 ; GFX10-NEXT: s_mov_b32 s6, 0
6431 ; GFX10-NEXT: s_mov_b32 s7, 0x40100000
6432 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6433 ; GFX10-NEXT: s_mov_b32 s8, 0
6434 ; GFX10-NEXT: s_mov_b32 s9, 0x40200000
6435 ; GFX10-NEXT: s_mov_b32 s33, s32
6436 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6437 ; GFX10-NEXT: s_getpc_b64 s[10:11]
6438 ; GFX10-NEXT: s_add_u32 s10, s10, external_void_func_v3f64_inreg@rel32@lo+4
6439 ; GFX10-NEXT: s_addc_u32 s11, s11, external_void_func_v3f64_inreg@rel32@hi+12
6440 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6441 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[10:11]
6442 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
6443 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
6444 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6445 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
6446 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
6447 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6448 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6449 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
6450 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6451 ; GFX10-NEXT: s_setpc_b64 s[4:5]
6453 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f64_imm_inreg:
6454 ; GFX10-SCRATCH: ; %bb.0:
6455 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6456 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
6457 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
6458 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
6459 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6460 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
6461 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
6462 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0
6463 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
6464 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0
6465 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000
6466 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6467 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0
6468 ; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 0x40200000
6469 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6470 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6471 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
6472 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64_inreg@rel32@lo+4
6473 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64_inreg@rel32@hi+12
6474 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6475 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6476 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
6477 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
6478 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6479 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
6480 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
6481 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
6482 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6483 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
6484 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6485 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
6486 call amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> inreg <double 2.0, double 4.0, double 8.0>)
6490 define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 {
6491 ; GFX9-LABEL: test_call_external_void_func_v2i16_inreg:
6493 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6494 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
6495 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6496 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
6497 ; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0
6498 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
6499 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6500 ; GFX9-NEXT: s_mov_b32 s33, s32
6501 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6502 ; GFX9-NEXT: s_getpc_b64 s[6:7]
6503 ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v2i16_inreg@rel32@lo+4
6504 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v2i16_inreg@rel32@hi+12
6505 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6506 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
6507 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
6508 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
6509 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6510 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
6511 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
6512 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6513 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
6514 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6515 ; GFX9-NEXT: s_setpc_b64 s[4:5]
6517 ; GFX10-LABEL: test_call_external_void_func_v2i16_inreg:
6519 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6520 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
6521 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
6522 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6523 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6524 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
6525 ; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0
6526 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
6527 ; GFX10-NEXT: s_mov_b32 s33, s32
6528 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6529 ; GFX10-NEXT: s_getpc_b64 s[6:7]
6530 ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2i16_inreg@rel32@lo+4
6531 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v2i16_inreg@rel32@hi+12
6532 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6533 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6534 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
6535 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
6536 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
6537 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6538 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
6539 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
6540 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6541 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6542 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
6543 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6544 ; GFX10-NEXT: s_setpc_b64 s[4:5]
6546 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i16_inreg:
6547 ; GFX10-SCRATCH: ; %bb.0:
6548 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6549 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
6550 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
6551 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
6552 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6553 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
6554 ; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0
6555 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
6556 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6557 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6558 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
6559 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i16_inreg@rel32@lo+4
6560 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i16_inreg@rel32@hi+12
6561 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6562 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6563 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6564 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
6565 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
6566 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6567 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
6568 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
6569 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
6570 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6571 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
6572 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6573 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
6574 %val = load <2 x i16>, <2 x i16> addrspace(4)* undef
6575 call amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> inreg %val)
6579 define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 {
6580 ; GFX9-LABEL: test_call_external_void_func_v3i16_inreg:
6582 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6583 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
6584 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6585 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
6586 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
6587 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
6588 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6589 ; GFX9-NEXT: s_mov_b32 s33, s32
6590 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6591 ; GFX9-NEXT: s_getpc_b64 s[6:7]
6592 ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v3i16_inreg@rel32@lo+4
6593 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v3i16_inreg@rel32@hi+12
6594 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6595 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
6596 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
6597 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
6598 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6599 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
6600 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
6601 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6602 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
6603 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6604 ; GFX9-NEXT: s_setpc_b64 s[4:5]
6606 ; GFX10-LABEL: test_call_external_void_func_v3i16_inreg:
6608 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6609 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
6610 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
6611 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6612 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6613 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
6614 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
6615 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
6616 ; GFX10-NEXT: s_mov_b32 s33, s32
6617 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6618 ; GFX10-NEXT: s_getpc_b64 s[6:7]
6619 ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v3i16_inreg@rel32@lo+4
6620 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v3i16_inreg@rel32@hi+12
6621 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6622 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6623 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
6624 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
6625 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
6626 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6627 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
6628 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
6629 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6630 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6631 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
6632 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6633 ; GFX10-NEXT: s_setpc_b64 s[4:5]
6635 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_inreg:
6636 ; GFX10-SCRATCH: ; %bb.0:
6637 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6638 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
6639 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
6640 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
6641 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6642 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
6643 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
6644 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
6645 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6646 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6647 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
6648 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4
6649 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12
6650 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6651 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6652 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6653 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
6654 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
6655 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6656 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
6657 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
6658 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
6659 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6660 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
6661 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6662 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
6663 %val = load <3 x i16>, <3 x i16> addrspace(4)* undef
6664 call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg %val)
6668 define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 {
6669 ; GFX9-LABEL: test_call_external_void_func_v3f16_inreg:
6671 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6672 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
6673 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6674 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
6675 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
6676 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
6677 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6678 ; GFX9-NEXT: s_mov_b32 s33, s32
6679 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6680 ; GFX9-NEXT: s_getpc_b64 s[6:7]
6681 ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v3f16_inreg@rel32@lo+4
6682 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v3f16_inreg@rel32@hi+12
6683 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6684 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
6685 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
6686 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
6687 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6688 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
6689 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
6690 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6691 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
6692 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6693 ; GFX9-NEXT: s_setpc_b64 s[4:5]
6695 ; GFX10-LABEL: test_call_external_void_func_v3f16_inreg:
6697 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6698 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
6699 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
6700 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6701 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6702 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
6703 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
6704 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
6705 ; GFX10-NEXT: s_mov_b32 s33, s32
6706 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6707 ; GFX10-NEXT: s_getpc_b64 s[6:7]
6708 ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v3f16_inreg@rel32@lo+4
6709 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v3f16_inreg@rel32@hi+12
6710 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6711 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6712 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
6713 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
6714 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
6715 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6716 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
6717 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
6718 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6719 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6720 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
6721 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6722 ; GFX10-NEXT: s_setpc_b64 s[4:5]
6724 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_inreg:
6725 ; GFX10-SCRATCH: ; %bb.0:
6726 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6727 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
6728 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
6729 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
6730 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6731 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
6732 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
6733 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
6734 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6735 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6736 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
6737 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4
6738 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12
6739 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6740 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6741 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6742 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
6743 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
6744 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6745 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
6746 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
6747 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
6748 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6749 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
6750 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6751 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
6752 %val = load <3 x half>, <3 x half> addrspace(4)* undef
6753 call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg %val)
6757 define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 {
6758 ; GFX9-LABEL: test_call_external_void_func_v3i16_imm_inreg:
6760 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6761 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
6762 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6763 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
6764 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
6765 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6766 ; GFX9-NEXT: s_mov_b32 s33, s32
6767 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6768 ; GFX9-NEXT: s_mov_b32 s4, 0x20001
6769 ; GFX9-NEXT: s_mov_b32 s5, 3
6770 ; GFX9-NEXT: s_getpc_b64 s[6:7]
6771 ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v3i16_inreg@rel32@lo+4
6772 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v3i16_inreg@rel32@hi+12
6773 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6774 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
6775 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
6776 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
6777 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6778 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
6779 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
6780 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6781 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
6782 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6783 ; GFX9-NEXT: s_setpc_b64 s[4:5]
6785 ; GFX10-LABEL: test_call_external_void_func_v3i16_imm_inreg:
6787 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6788 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
6789 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
6790 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6791 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6792 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
6793 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
6794 ; GFX10-NEXT: s_mov_b32 s4, 0x20001
6795 ; GFX10-NEXT: s_mov_b32 s5, 3
6796 ; GFX10-NEXT: s_mov_b32 s33, s32
6797 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6798 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6799 ; GFX10-NEXT: s_getpc_b64 s[6:7]
6800 ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v3i16_inreg@rel32@lo+4
6801 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v3i16_inreg@rel32@hi+12
6802 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6803 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
6804 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
6805 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
6806 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6807 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
6808 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
6809 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6810 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6811 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
6812 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6813 ; GFX10-NEXT: s_setpc_b64 s[4:5]
6815 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_imm_inreg:
6816 ; GFX10-SCRATCH: ; %bb.0:
6817 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6818 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
6819 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
6820 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
6821 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6822 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
6823 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
6824 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001
6825 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 3
6826 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6827 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6828 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6829 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
6830 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4
6831 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12
6832 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6833 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6834 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
6835 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
6836 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6837 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
6838 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
6839 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
6840 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6841 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
6842 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6843 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
6844 call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg <i16 1, i16 2, i16 3>)
6848 define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 {
6849 ; GFX9-LABEL: test_call_external_void_func_v3f16_imm_inreg:
6851 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6852 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
6853 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6854 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
6855 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
6856 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6857 ; GFX9-NEXT: s_mov_b32 s33, s32
6858 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6859 ; GFX9-NEXT: s_mov_b32 s4, 0x40003c00
6860 ; GFX9-NEXT: s_movk_i32 s5, 0x4400
6861 ; GFX9-NEXT: s_getpc_b64 s[6:7]
6862 ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v3f16_inreg@rel32@lo+4
6863 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v3f16_inreg@rel32@hi+12
6864 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6865 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
6866 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
6867 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
6868 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6869 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
6870 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
6871 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6872 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
6873 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6874 ; GFX9-NEXT: s_setpc_b64 s[4:5]
6876 ; GFX10-LABEL: test_call_external_void_func_v3f16_imm_inreg:
6878 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6879 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
6880 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
6881 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6882 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6883 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
6884 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
6885 ; GFX10-NEXT: s_mov_b32 s4, 0x40003c00
6886 ; GFX10-NEXT: s_movk_i32 s5, 0x4400
6887 ; GFX10-NEXT: s_mov_b32 s33, s32
6888 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6889 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6890 ; GFX10-NEXT: s_getpc_b64 s[6:7]
6891 ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v3f16_inreg@rel32@lo+4
6892 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v3f16_inreg@rel32@hi+12
6893 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6894 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
6895 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
6896 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
6897 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6898 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
6899 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
6900 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6901 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6902 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
6903 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6904 ; GFX10-NEXT: s_setpc_b64 s[4:5]
6906 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_imm_inreg:
6907 ; GFX10-SCRATCH: ; %bb.0:
6908 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6909 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
6910 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
6911 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
6912 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6913 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
6914 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
6915 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x40003c00
6916 ; GFX10-SCRATCH-NEXT: s_movk_i32 s5, 0x4400
6917 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
6918 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
6919 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
6920 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
6921 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4
6922 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12
6923 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
6924 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
6925 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
6926 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
6927 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
6928 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
6929 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
6930 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
6931 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
6932 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
6933 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
6934 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
6935 call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg <half 1.0, half 2.0, half 4.0>)
6939 define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 {
6940 ; GFX9-LABEL: test_call_external_void_func_v4i16_inreg:
6942 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6943 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
6944 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6945 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
6946 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
6947 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
6948 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
6949 ; GFX9-NEXT: s_mov_b32 s33, s32
6950 ; GFX9-NEXT: s_addk_i32 s32, 0x400
6951 ; GFX9-NEXT: s_getpc_b64 s[6:7]
6952 ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v4i16_inreg@rel32@lo+4
6953 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v4i16_inreg@rel32@hi+12
6954 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
6955 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
6956 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
6957 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
6958 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
6959 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
6960 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
6961 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6962 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
6963 ; GFX9-NEXT: s_waitcnt vmcnt(0)
6964 ; GFX9-NEXT: s_setpc_b64 s[4:5]
6966 ; GFX10-LABEL: test_call_external_void_func_v4i16_inreg:
6968 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6969 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
6970 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
6971 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6972 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6973 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
6974 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
6975 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
6976 ; GFX10-NEXT: s_mov_b32 s33, s32
6977 ; GFX10-NEXT: s_addk_i32 s32, 0x200
6978 ; GFX10-NEXT: s_getpc_b64 s[6:7]
6979 ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v4i16_inreg@rel32@lo+4
6980 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v4i16_inreg@rel32@hi+12
6981 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
6982 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
6983 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
6984 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
6985 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
6986 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
6987 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
6988 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
6989 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6990 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
6991 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
6992 ; GFX10-NEXT: s_waitcnt vmcnt(0)
6993 ; GFX10-NEXT: s_setpc_b64 s[4:5]
6995 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_inreg:
6996 ; GFX10-SCRATCH: ; %bb.0:
6997 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6998 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
6999 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
7000 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
7001 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7002 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
7003 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
7004 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
7005 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7006 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7007 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7008 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4
7009 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12
7010 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7011 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7012 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7013 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
7014 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
7015 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7016 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
7017 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
7018 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
7019 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7020 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
7021 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7022 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
7023 %val = load <4 x i16>, <4 x i16> addrspace(4)* undef
7024 call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg %val)
7028 define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 {
7029 ; GFX9-LABEL: test_call_external_void_func_v4i16_imm_inreg:
7031 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7032 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
7033 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7034 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
7035 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
7036 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7037 ; GFX9-NEXT: s_mov_b32 s33, s32
7038 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7039 ; GFX9-NEXT: s_mov_b32 s4, 0x20001
7040 ; GFX9-NEXT: s_mov_b32 s5, 0x40003
7041 ; GFX9-NEXT: s_getpc_b64 s[6:7]
7042 ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v4i16_inreg@rel32@lo+4
7043 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v4i16_inreg@rel32@hi+12
7044 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7045 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
7046 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
7047 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
7048 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7049 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
7050 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
7051 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7052 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
7053 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7054 ; GFX9-NEXT: s_setpc_b64 s[4:5]
7056 ; GFX10-LABEL: test_call_external_void_func_v4i16_imm_inreg:
7058 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7059 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
7060 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
7061 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7062 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7063 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
7064 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
7065 ; GFX10-NEXT: s_mov_b32 s4, 0x20001
7066 ; GFX10-NEXT: s_mov_b32 s5, 0x40003
7067 ; GFX10-NEXT: s_mov_b32 s33, s32
7068 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7069 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7070 ; GFX10-NEXT: s_getpc_b64 s[6:7]
7071 ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v4i16_inreg@rel32@lo+4
7072 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v4i16_inreg@rel32@hi+12
7073 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7074 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
7075 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
7076 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
7077 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7078 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
7079 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
7080 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7081 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7082 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
7083 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7084 ; GFX10-NEXT: s_setpc_b64 s[4:5]
7086 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_imm_inreg:
7087 ; GFX10-SCRATCH: ; %bb.0:
7088 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7089 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
7090 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
7091 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
7092 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7093 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
7094 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
7095 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001
7096 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40003
7097 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7098 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7099 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7100 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7101 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4
7102 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12
7103 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7104 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7105 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
7106 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
7107 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7108 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
7109 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
7110 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
7111 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7112 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
7113 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7114 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
7115 call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg <i16 1, i16 2, i16 3, i16 4>)
7119 define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 {
7120 ; GFX9-LABEL: test_call_external_void_func_v2f16_inreg:
7122 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7123 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
7124 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7125 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
7126 ; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0
7127 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
7128 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7129 ; GFX9-NEXT: s_mov_b32 s33, s32
7130 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7131 ; GFX9-NEXT: s_getpc_b64 s[6:7]
7132 ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v2f16_inreg@rel32@lo+4
7133 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v2f16_inreg@rel32@hi+12
7134 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7135 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
7136 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
7137 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
7138 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7139 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
7140 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
7141 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7142 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
7143 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7144 ; GFX9-NEXT: s_setpc_b64 s[4:5]
7146 ; GFX10-LABEL: test_call_external_void_func_v2f16_inreg:
7148 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7149 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
7150 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
7151 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7152 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7153 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
7154 ; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0
7155 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
7156 ; GFX10-NEXT: s_mov_b32 s33, s32
7157 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7158 ; GFX10-NEXT: s_getpc_b64 s[6:7]
7159 ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2f16_inreg@rel32@lo+4
7160 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v2f16_inreg@rel32@hi+12
7161 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7162 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7163 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
7164 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
7165 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
7166 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7167 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
7168 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
7169 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7170 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7171 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
7172 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7173 ; GFX10-NEXT: s_setpc_b64 s[4:5]
7175 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f16_inreg:
7176 ; GFX10-SCRATCH: ; %bb.0:
7177 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7178 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
7179 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
7180 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
7181 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7182 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
7183 ; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0
7184 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
7185 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7186 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7187 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7188 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f16_inreg@rel32@lo+4
7189 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f16_inreg@rel32@hi+12
7190 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7191 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7192 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7193 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
7194 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
7195 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7196 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
7197 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
7198 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
7199 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7200 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
7201 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7202 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
7203 %val = load <2 x half>, <2 x half> addrspace(4)* undef
7204 call amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> inreg %val)
7208 define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 {
7209 ; GFX9-LABEL: test_call_external_void_func_v2i32_inreg:
7211 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7212 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
7213 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7214 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
7215 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
7216 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
7217 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7218 ; GFX9-NEXT: s_mov_b32 s33, s32
7219 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7220 ; GFX9-NEXT: s_getpc_b64 s[6:7]
7221 ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v2i32_inreg@rel32@lo+4
7222 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v2i32_inreg@rel32@hi+12
7223 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7224 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
7225 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
7226 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
7227 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7228 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
7229 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
7230 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7231 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
7232 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7233 ; GFX9-NEXT: s_setpc_b64 s[4:5]
7235 ; GFX10-LABEL: test_call_external_void_func_v2i32_inreg:
7237 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7238 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
7239 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
7240 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7241 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7242 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
7243 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
7244 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
7245 ; GFX10-NEXT: s_mov_b32 s33, s32
7246 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7247 ; GFX10-NEXT: s_getpc_b64 s[6:7]
7248 ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2i32_inreg@rel32@lo+4
7249 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v2i32_inreg@rel32@hi+12
7250 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7251 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7252 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
7253 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
7254 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
7255 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7256 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
7257 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
7258 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7259 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7260 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
7261 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7262 ; GFX10-NEXT: s_setpc_b64 s[4:5]
7264 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_inreg:
7265 ; GFX10-SCRATCH: ; %bb.0:
7266 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7267 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
7268 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
7269 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
7270 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7271 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
7272 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
7273 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
7274 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7275 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7276 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7277 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4
7278 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12
7279 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7280 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7281 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7282 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
7283 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
7284 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7285 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
7286 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
7287 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
7288 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7289 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
7290 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7291 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
7292 %val = load <2 x i32>, <2 x i32> addrspace(4)* undef
7293 call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg %val)
7297 define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 {
7298 ; GFX9-LABEL: test_call_external_void_func_v2i32_imm_inreg:
7300 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7301 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
7302 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7303 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
7304 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
7305 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7306 ; GFX9-NEXT: s_mov_b32 s33, s32
7307 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7308 ; GFX9-NEXT: s_mov_b32 s4, 1
7309 ; GFX9-NEXT: s_mov_b32 s5, 2
7310 ; GFX9-NEXT: s_getpc_b64 s[6:7]
7311 ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v2i32_inreg@rel32@lo+4
7312 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v2i32_inreg@rel32@hi+12
7313 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7314 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
7315 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
7316 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
7317 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7318 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
7319 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
7320 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7321 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
7322 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7323 ; GFX9-NEXT: s_setpc_b64 s[4:5]
7325 ; GFX10-LABEL: test_call_external_void_func_v2i32_imm_inreg:
7327 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7328 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
7329 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
7330 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7331 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7332 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
7333 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
7334 ; GFX10-NEXT: s_mov_b32 s4, 1
7335 ; GFX10-NEXT: s_mov_b32 s5, 2
7336 ; GFX10-NEXT: s_mov_b32 s33, s32
7337 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7338 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7339 ; GFX10-NEXT: s_getpc_b64 s[6:7]
7340 ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2i32_inreg@rel32@lo+4
7341 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v2i32_inreg@rel32@hi+12
7342 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7343 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
7344 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
7345 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
7346 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7347 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
7348 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
7349 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7350 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7351 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
7352 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7353 ; GFX10-NEXT: s_setpc_b64 s[4:5]
7355 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_imm_inreg:
7356 ; GFX10-SCRATCH: ; %bb.0:
7357 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7358 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
7359 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
7360 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
7361 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7362 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
7363 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
7364 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
7365 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
7366 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7367 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7368 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7369 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7370 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4
7371 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12
7372 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7373 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7374 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
7375 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
7376 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7377 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
7378 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
7379 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
7380 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7381 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
7382 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7383 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
7384 call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg <i32 1, i32 2>)
7388 define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 {
7389 ; GFX9-LABEL: test_call_external_void_func_v3i32_imm_inreg:
7391 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7392 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
7393 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7394 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
7395 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
7396 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7397 ; GFX9-NEXT: s_mov_b32 s33, s32
7398 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7399 ; GFX9-NEXT: s_mov_b32 s4, 3
7400 ; GFX9-NEXT: s_mov_b32 s5, 4
7401 ; GFX9-NEXT: s_mov_b32 s6, 5
7402 ; GFX9-NEXT: s_getpc_b64 s[8:9]
7403 ; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v3i32_inreg@rel32@lo+4
7404 ; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32_inreg@rel32@hi+12
7405 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7406 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
7407 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
7408 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
7409 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7410 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
7411 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
7412 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7413 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
7414 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7415 ; GFX9-NEXT: s_setpc_b64 s[4:5]
7417 ; GFX10-LABEL: test_call_external_void_func_v3i32_imm_inreg:
7419 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7420 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
7421 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
7422 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7423 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7424 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
7425 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
7426 ; GFX10-NEXT: s_mov_b32 s4, 3
7427 ; GFX10-NEXT: s_mov_b32 s5, 4
7428 ; GFX10-NEXT: s_mov_b32 s6, 5
7429 ; GFX10-NEXT: s_mov_b32 s33, s32
7430 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7431 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7432 ; GFX10-NEXT: s_getpc_b64 s[8:9]
7433 ; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v3i32_inreg@rel32@lo+4
7434 ; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32_inreg@rel32@hi+12
7435 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7436 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9]
7437 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
7438 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
7439 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7440 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
7441 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
7442 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7443 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7444 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
7445 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7446 ; GFX10-NEXT: s_setpc_b64 s[4:5]
7448 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_imm_inreg:
7449 ; GFX10-SCRATCH: ; %bb.0:
7450 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7451 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
7452 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
7453 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
7454 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7455 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
7456 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
7457 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3
7458 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4
7459 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5
7460 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7461 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7462 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7463 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7464 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_inreg@rel32@lo+4
7465 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_inreg@rel32@hi+12
7466 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7467 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7468 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
7469 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
7470 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7471 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
7472 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
7473 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
7474 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7475 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
7476 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7477 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
7478 call amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> inreg <i32 3, i32 4, i32 5>)
7482 define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 {
7483 ; GFX9-LABEL: test_call_external_void_func_v3i32_i32_inreg:
7485 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7486 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
7487 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7488 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
7489 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
7490 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7491 ; GFX9-NEXT: s_mov_b32 s33, s32
7492 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7493 ; GFX9-NEXT: s_mov_b32 s4, 3
7494 ; GFX9-NEXT: s_mov_b32 s5, 4
7495 ; GFX9-NEXT: s_mov_b32 s6, 5
7496 ; GFX9-NEXT: s_mov_b32 s7, 6
7497 ; GFX9-NEXT: s_getpc_b64 s[8:9]
7498 ; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v3i32_i32_inreg@rel32@lo+4
7499 ; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32_i32_inreg@rel32@hi+12
7500 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7501 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
7502 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
7503 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
7504 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7505 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
7506 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
7507 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7508 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
7509 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7510 ; GFX9-NEXT: s_setpc_b64 s[4:5]
7512 ; GFX10-LABEL: test_call_external_void_func_v3i32_i32_inreg:
7514 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7515 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
7516 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
7517 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7518 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7519 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
7520 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
7521 ; GFX10-NEXT: s_mov_b32 s4, 3
7522 ; GFX10-NEXT: s_mov_b32 s5, 4
7523 ; GFX10-NEXT: s_mov_b32 s6, 5
7524 ; GFX10-NEXT: s_mov_b32 s7, 6
7525 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7526 ; GFX10-NEXT: s_mov_b32 s33, s32
7527 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7528 ; GFX10-NEXT: s_getpc_b64 s[8:9]
7529 ; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v3i32_i32_inreg@rel32@lo+4
7530 ; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32_i32_inreg@rel32@hi+12
7531 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7532 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9]
7533 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
7534 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
7535 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7536 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
7537 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
7538 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7539 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7540 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
7541 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7542 ; GFX10-NEXT: s_setpc_b64 s[4:5]
7544 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_i32_inreg:
7545 ; GFX10-SCRATCH: ; %bb.0:
7546 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7547 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
7548 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
7549 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
7550 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7551 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
7552 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
7553 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3
7554 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4
7555 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5
7556 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 6
7557 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7558 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7559 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7560 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7561 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_i32_inreg@rel32@lo+4
7562 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32_inreg@rel32@hi+12
7563 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7564 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7565 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
7566 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
7567 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7568 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
7569 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
7570 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
7571 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7572 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
7573 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7574 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
7575 call amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> inreg <i32 3, i32 4, i32 5>, i32 inreg 6)
7579 define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 {
7580 ; GFX9-LABEL: test_call_external_void_func_v4i32_inreg:
7582 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7583 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
7584 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7585 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
7586 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
7587 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
7588 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7589 ; GFX9-NEXT: s_mov_b32 s33, s32
7590 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7591 ; GFX9-NEXT: s_getpc_b64 s[8:9]
7592 ; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v4i32_inreg@rel32@lo+4
7593 ; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32_inreg@rel32@hi+12
7594 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7595 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
7596 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
7597 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
7598 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7599 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
7600 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
7601 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7602 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
7603 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7604 ; GFX9-NEXT: s_setpc_b64 s[4:5]
7606 ; GFX10-LABEL: test_call_external_void_func_v4i32_inreg:
7608 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7609 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
7610 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
7611 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7612 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7613 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
7614 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
7615 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
7616 ; GFX10-NEXT: s_mov_b32 s33, s32
7617 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7618 ; GFX10-NEXT: s_getpc_b64 s[8:9]
7619 ; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v4i32_inreg@rel32@lo+4
7620 ; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32_inreg@rel32@hi+12
7621 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7622 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7623 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9]
7624 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
7625 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
7626 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7627 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
7628 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
7629 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7630 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7631 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
7632 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7633 ; GFX10-NEXT: s_setpc_b64 s[4:5]
7635 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_inreg:
7636 ; GFX10-SCRATCH: ; %bb.0:
7637 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7638 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
7639 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
7640 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
7641 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7642 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
7643 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
7644 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
7645 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7646 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7647 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7648 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4
7649 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12
7650 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7651 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7652 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7653 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
7654 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
7655 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7656 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
7657 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
7658 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
7659 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7660 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
7661 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7662 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
7663 %val = load <4 x i32>, <4 x i32> addrspace(4)* undef
7664 call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg %val)
7668 define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 {
7669 ; GFX9-LABEL: test_call_external_void_func_v4i32_imm_inreg:
7671 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7672 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
7673 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7674 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
7675 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
7676 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7677 ; GFX9-NEXT: s_mov_b32 s33, s32
7678 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7679 ; GFX9-NEXT: s_mov_b32 s4, 1
7680 ; GFX9-NEXT: s_mov_b32 s5, 2
7681 ; GFX9-NEXT: s_mov_b32 s6, 3
7682 ; GFX9-NEXT: s_mov_b32 s7, 4
7683 ; GFX9-NEXT: s_getpc_b64 s[8:9]
7684 ; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v4i32_inreg@rel32@lo+4
7685 ; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32_inreg@rel32@hi+12
7686 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7687 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
7688 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
7689 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
7690 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7691 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
7692 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
7693 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7694 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
7695 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7696 ; GFX9-NEXT: s_setpc_b64 s[4:5]
7698 ; GFX10-LABEL: test_call_external_void_func_v4i32_imm_inreg:
7700 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7701 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
7702 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
7703 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7704 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7705 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
7706 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
7707 ; GFX10-NEXT: s_mov_b32 s4, 1
7708 ; GFX10-NEXT: s_mov_b32 s5, 2
7709 ; GFX10-NEXT: s_mov_b32 s6, 3
7710 ; GFX10-NEXT: s_mov_b32 s7, 4
7711 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7712 ; GFX10-NEXT: s_mov_b32 s33, s32
7713 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7714 ; GFX10-NEXT: s_getpc_b64 s[8:9]
7715 ; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v4i32_inreg@rel32@lo+4
7716 ; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32_inreg@rel32@hi+12
7717 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7718 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9]
7719 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
7720 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
7721 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7722 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
7723 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
7724 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7725 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7726 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
7727 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7728 ; GFX10-NEXT: s_setpc_b64 s[4:5]
7730 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_imm_inreg:
7731 ; GFX10-SCRATCH: ; %bb.0:
7732 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7733 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
7734 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
7735 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
7736 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7737 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
7738 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
7739 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
7740 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
7741 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
7742 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
7743 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7744 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7745 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7746 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7747 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4
7748 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12
7749 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7750 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7751 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
7752 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
7753 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7754 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
7755 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
7756 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
7757 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7758 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
7759 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7760 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
7761 call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg <i32 1, i32 2, i32 3, i32 4>)
7765 define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 {
7766 ; GFX9-LABEL: test_call_external_void_func_v5i32_imm_inreg:
7768 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7769 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
7770 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7771 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
7772 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
7773 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7774 ; GFX9-NEXT: s_mov_b32 s33, s32
7775 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7776 ; GFX9-NEXT: s_mov_b32 s4, 1
7777 ; GFX9-NEXT: s_mov_b32 s5, 2
7778 ; GFX9-NEXT: s_mov_b32 s6, 3
7779 ; GFX9-NEXT: s_mov_b32 s7, 4
7780 ; GFX9-NEXT: s_mov_b32 s8, 5
7781 ; GFX9-NEXT: s_getpc_b64 s[10:11]
7782 ; GFX9-NEXT: s_add_u32 s10, s10, external_void_func_v5i32_inreg@rel32@lo+4
7783 ; GFX9-NEXT: s_addc_u32 s11, s11, external_void_func_v5i32_inreg@rel32@hi+12
7784 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7785 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[10:11]
7786 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
7787 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
7788 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7789 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
7790 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
7791 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7792 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
7793 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7794 ; GFX9-NEXT: s_setpc_b64 s[4:5]
7796 ; GFX10-LABEL: test_call_external_void_func_v5i32_imm_inreg:
7798 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7799 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
7800 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
7801 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7802 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7803 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
7804 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
7805 ; GFX10-NEXT: s_mov_b32 s4, 1
7806 ; GFX10-NEXT: s_mov_b32 s5, 2
7807 ; GFX10-NEXT: s_mov_b32 s6, 3
7808 ; GFX10-NEXT: s_mov_b32 s7, 4
7809 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7810 ; GFX10-NEXT: s_mov_b32 s8, 5
7811 ; GFX10-NEXT: s_mov_b32 s33, s32
7812 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7813 ; GFX10-NEXT: s_getpc_b64 s[10:11]
7814 ; GFX10-NEXT: s_add_u32 s10, s10, external_void_func_v5i32_inreg@rel32@lo+4
7815 ; GFX10-NEXT: s_addc_u32 s11, s11, external_void_func_v5i32_inreg@rel32@hi+12
7816 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7817 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[10:11]
7818 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
7819 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
7820 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7821 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
7822 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
7823 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7824 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7825 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
7826 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7827 ; GFX10-NEXT: s_setpc_b64 s[4:5]
7829 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i32_imm_inreg:
7830 ; GFX10-SCRATCH: ; %bb.0:
7831 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7832 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
7833 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
7834 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
7835 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7836 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
7837 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
7838 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
7839 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
7840 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
7841 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
7842 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7843 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5
7844 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7845 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7846 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7847 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5i32_inreg@rel32@lo+4
7848 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32_inreg@rel32@hi+12
7849 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7850 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7851 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
7852 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
7853 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7854 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
7855 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
7856 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
7857 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7858 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
7859 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7860 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
7861 call amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> inreg <i32 1, i32 2, i32 3, i32 4, i32 5>)
7865 define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 {
7866 ; GFX9-LABEL: test_call_external_void_func_v8i32_inreg:
7868 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7869 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
7870 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7871 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
7872 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
7873 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
7874 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7875 ; GFX9-NEXT: s_mov_b32 s33, s32
7876 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7877 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
7878 ; GFX9-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0
7879 ; GFX9-NEXT: s_getpc_b64 s[12:13]
7880 ; GFX9-NEXT: s_add_u32 s12, s12, external_void_func_v8i32_inreg@rel32@lo+4
7881 ; GFX9-NEXT: s_addc_u32 s13, s13, external_void_func_v8i32_inreg@rel32@hi+12
7882 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7883 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[12:13]
7884 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
7885 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
7886 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7887 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
7888 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
7889 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7890 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
7891 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7892 ; GFX9-NEXT: s_setpc_b64 s[4:5]
7894 ; GFX10-LABEL: test_call_external_void_func_v8i32_inreg:
7896 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7897 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
7898 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
7899 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7900 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7901 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
7902 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
7903 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
7904 ; GFX10-NEXT: s_mov_b32 s33, s32
7905 ; GFX10-NEXT: s_addk_i32 s32, 0x200
7906 ; GFX10-NEXT: s_getpc_b64 s[12:13]
7907 ; GFX10-NEXT: s_add_u32 s12, s12, external_void_func_v8i32_inreg@rel32@lo+4
7908 ; GFX10-NEXT: s_addc_u32 s13, s13, external_void_func_v8i32_inreg@rel32@hi+12
7909 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
7910 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
7911 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
7912 ; GFX10-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0
7913 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[12:13]
7914 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
7915 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
7916 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
7917 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
7918 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
7919 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7920 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
7921 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
7922 ; GFX10-NEXT: s_waitcnt vmcnt(0)
7923 ; GFX10-NEXT: s_setpc_b64 s[4:5]
7925 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_inreg:
7926 ; GFX10-SCRATCH: ; %bb.0:
7927 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7928 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
7929 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
7930 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
7931 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7932 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
7933 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
7934 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
7935 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
7936 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
7937 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
7938 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
7939 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
7940 ; GFX10-SCRATCH-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0
7941 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
7942 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4
7943 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12
7944 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
7945 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
7946 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
7947 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
7948 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
7949 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
7950 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
7951 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
7952 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
7953 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
7954 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
7955 %ptr = load <8 x i32> addrspace(4)*, <8 x i32> addrspace(4)* addrspace(4)* undef
7956 %val = load <8 x i32>, <8 x i32> addrspace(4)* %ptr
7957 call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg %val)
7961 define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 {
7962 ; GFX9-LABEL: test_call_external_void_func_v8i32_imm_inreg:
7964 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7965 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
7966 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7967 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
7968 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
7969 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
7970 ; GFX9-NEXT: s_mov_b32 s33, s32
7971 ; GFX9-NEXT: s_addk_i32 s32, 0x400
7972 ; GFX9-NEXT: s_mov_b32 s4, 1
7973 ; GFX9-NEXT: s_mov_b32 s5, 2
7974 ; GFX9-NEXT: s_mov_b32 s6, 3
7975 ; GFX9-NEXT: s_mov_b32 s7, 4
7976 ; GFX9-NEXT: s_mov_b32 s8, 5
7977 ; GFX9-NEXT: s_mov_b32 s9, 6
7978 ; GFX9-NEXT: s_mov_b32 s10, 7
7979 ; GFX9-NEXT: s_mov_b32 s11, 8
7980 ; GFX9-NEXT: s_getpc_b64 s[12:13]
7981 ; GFX9-NEXT: s_add_u32 s12, s12, external_void_func_v8i32_inreg@rel32@lo+4
7982 ; GFX9-NEXT: s_addc_u32 s13, s13, external_void_func_v8i32_inreg@rel32@hi+12
7983 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
7984 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[12:13]
7985 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
7986 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
7987 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
7988 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
7989 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
7990 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7991 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
7992 ; GFX9-NEXT: s_waitcnt vmcnt(0)
7993 ; GFX9-NEXT: s_setpc_b64 s[4:5]
7995 ; GFX10-LABEL: test_call_external_void_func_v8i32_imm_inreg:
7997 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7998 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
7999 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
8000 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8001 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8002 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
8003 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
8004 ; GFX10-NEXT: s_mov_b32 s4, 1
8005 ; GFX10-NEXT: s_mov_b32 s5, 2
8006 ; GFX10-NEXT: s_mov_b32 s6, 3
8007 ; GFX10-NEXT: s_mov_b32 s7, 4
8008 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
8009 ; GFX10-NEXT: s_mov_b32 s8, 5
8010 ; GFX10-NEXT: s_mov_b32 s9, 6
8011 ; GFX10-NEXT: s_mov_b32 s10, 7
8012 ; GFX10-NEXT: s_mov_b32 s11, 8
8013 ; GFX10-NEXT: s_mov_b32 s33, s32
8014 ; GFX10-NEXT: s_addk_i32 s32, 0x200
8015 ; GFX10-NEXT: s_getpc_b64 s[12:13]
8016 ; GFX10-NEXT: s_add_u32 s12, s12, external_void_func_v8i32_inreg@rel32@lo+4
8017 ; GFX10-NEXT: s_addc_u32 s13, s13, external_void_func_v8i32_inreg@rel32@hi+12
8018 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
8019 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[12:13]
8020 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
8021 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
8022 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
8023 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
8024 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
8025 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8026 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8027 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
8028 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8029 ; GFX10-NEXT: s_setpc_b64 s[4:5]
8031 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_imm_inreg:
8032 ; GFX10-SCRATCH: ; %bb.0:
8033 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8034 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
8035 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
8036 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
8037 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8038 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
8039 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
8040 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
8041 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
8042 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
8043 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
8044 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
8045 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5
8046 ; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 6
8047 ; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 7
8048 ; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 8
8049 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
8050 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
8051 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
8052 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4
8053 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12
8054 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
8055 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
8056 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
8057 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
8058 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
8059 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
8060 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
8061 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
8062 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8063 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
8064 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8065 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
8066 call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
8070 define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 {
8071 ; GFX9-LABEL: test_call_external_void_func_v16i32_inreg:
8073 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8074 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
8075 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8076 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
8077 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
8078 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
8079 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
8080 ; GFX9-NEXT: s_mov_b32 s33, s32
8081 ; GFX9-NEXT: s_addk_i32 s32, 0x400
8082 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
8083 ; GFX9-NEXT: s_load_dwordx16 s[4:19], s[4:5], 0x0
8084 ; GFX9-NEXT: s_getpc_b64 s[20:21]
8085 ; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v16i32_inreg@rel32@lo+4
8086 ; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v16i32_inreg@rel32@hi+12
8087 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8088 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21]
8089 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
8090 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
8091 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
8092 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
8093 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
8094 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8095 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
8096 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8097 ; GFX9-NEXT: s_setpc_b64 s[4:5]
8099 ; GFX10-LABEL: test_call_external_void_func_v16i32_inreg:
8101 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8102 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
8103 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
8104 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8105 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8106 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
8107 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
8108 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
8109 ; GFX10-NEXT: s_mov_b32 s33, s32
8110 ; GFX10-NEXT: s_addk_i32 s32, 0x200
8111 ; GFX10-NEXT: s_getpc_b64 s[20:21]
8112 ; GFX10-NEXT: s_add_u32 s20, s20, external_void_func_v16i32_inreg@rel32@lo+4
8113 ; GFX10-NEXT: s_addc_u32 s21, s21, external_void_func_v16i32_inreg@rel32@hi+12
8114 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
8115 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
8116 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
8117 ; GFX10-NEXT: s_load_dwordx16 s[4:19], s[4:5], 0x0
8118 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[20:21]
8119 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
8120 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
8121 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
8122 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
8123 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
8124 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8125 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8126 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
8127 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8128 ; GFX10-NEXT: s_setpc_b64 s[4:5]
8130 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i32_inreg:
8131 ; GFX10-SCRATCH: ; %bb.0:
8132 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8133 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
8134 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
8135 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
8136 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8137 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
8138 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
8139 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
8140 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
8141 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
8142 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
8143 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
8144 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
8145 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0
8146 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
8147 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_inreg@rel32@lo+4
8148 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_inreg@rel32@hi+12
8149 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
8150 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
8151 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
8152 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
8153 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
8154 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
8155 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
8156 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8157 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
8158 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8159 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
8160 %ptr = load <16 x i32> addrspace(4)*, <16 x i32> addrspace(4)* addrspace(4)* undef
8161 %val = load <16 x i32>, <16 x i32> addrspace(4)* %ptr
8162 call amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> inreg %val)
8166 define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 {
8167 ; GFX9-LABEL: test_call_external_void_func_v32i32_inreg:
8169 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8170 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
8171 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8172 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
8173 ; GFX9-NEXT: v_writelane_b32 v40, s33, 18
8174 ; GFX9-NEXT: v_writelane_b32 v40, s36, 0
8175 ; GFX9-NEXT: v_writelane_b32 v40, s37, 1
8176 ; GFX9-NEXT: v_writelane_b32 v40, s38, 2
8177 ; GFX9-NEXT: v_writelane_b32 v40, s39, 3
8178 ; GFX9-NEXT: v_writelane_b32 v40, s40, 4
8179 ; GFX9-NEXT: v_writelane_b32 v40, s41, 5
8180 ; GFX9-NEXT: v_writelane_b32 v40, s42, 6
8181 ; GFX9-NEXT: v_writelane_b32 v40, s43, 7
8182 ; GFX9-NEXT: v_writelane_b32 v40, s44, 8
8183 ; GFX9-NEXT: v_writelane_b32 v40, s45, 9
8184 ; GFX9-NEXT: v_writelane_b32 v40, s46, 10
8185 ; GFX9-NEXT: s_load_dwordx2 s[20:21], s[4:5], 0x0
8186 ; GFX9-NEXT: v_writelane_b32 v40, s47, 11
8187 ; GFX9-NEXT: v_writelane_b32 v40, s48, 12
8188 ; GFX9-NEXT: v_writelane_b32 v40, s49, 13
8189 ; GFX9-NEXT: v_writelane_b32 v40, s50, 14
8190 ; GFX9-NEXT: v_writelane_b32 v40, s51, 15
8191 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
8192 ; GFX9-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0
8193 ; GFX9-NEXT: s_load_dwordx16 s[36:51], s[20:21], 0x40
8194 ; GFX9-NEXT: s_mov_b32 s33, s32
8195 ; GFX9-NEXT: s_addk_i32 s32, 0x400
8196 ; GFX9-NEXT: v_writelane_b32 v40, s30, 16
8197 ; GFX9-NEXT: v_writelane_b32 v40, s31, 17
8198 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
8199 ; GFX9-NEXT: v_mov_b32_e32 v0, s46
8200 ; GFX9-NEXT: v_mov_b32_e32 v1, s47
8201 ; GFX9-NEXT: v_mov_b32_e32 v2, s48
8202 ; GFX9-NEXT: v_mov_b32_e32 v3, s49
8203 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
8204 ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
8205 ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
8206 ; GFX9-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12
8207 ; GFX9-NEXT: v_mov_b32_e32 v0, s50
8208 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
8209 ; GFX9-NEXT: v_mov_b32_e32 v0, s51
8210 ; GFX9-NEXT: s_mov_b32 s20, s36
8211 ; GFX9-NEXT: s_mov_b32 s21, s37
8212 ; GFX9-NEXT: s_mov_b32 s22, s38
8213 ; GFX9-NEXT: s_mov_b32 s23, s39
8214 ; GFX9-NEXT: s_mov_b32 s24, s40
8215 ; GFX9-NEXT: s_mov_b32 s25, s41
8216 ; GFX9-NEXT: s_mov_b32 s26, s42
8217 ; GFX9-NEXT: s_mov_b32 s27, s43
8218 ; GFX9-NEXT: s_mov_b32 s28, s44
8219 ; GFX9-NEXT: s_mov_b32 s29, s45
8220 ; GFX9-NEXT: s_getpc_b64 s[30:31]
8221 ; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_inreg@rel32@lo+4
8222 ; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_inreg@rel32@hi+12
8223 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
8224 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
8225 ; GFX9-NEXT: v_readlane_b32 s4, v40, 16
8226 ; GFX9-NEXT: v_readlane_b32 s5, v40, 17
8227 ; GFX9-NEXT: v_readlane_b32 s51, v40, 15
8228 ; GFX9-NEXT: v_readlane_b32 s50, v40, 14
8229 ; GFX9-NEXT: v_readlane_b32 s49, v40, 13
8230 ; GFX9-NEXT: v_readlane_b32 s48, v40, 12
8231 ; GFX9-NEXT: v_readlane_b32 s47, v40, 11
8232 ; GFX9-NEXT: v_readlane_b32 s46, v40, 10
8233 ; GFX9-NEXT: v_readlane_b32 s45, v40, 9
8234 ; GFX9-NEXT: v_readlane_b32 s44, v40, 8
8235 ; GFX9-NEXT: v_readlane_b32 s43, v40, 7
8236 ; GFX9-NEXT: v_readlane_b32 s42, v40, 6
8237 ; GFX9-NEXT: v_readlane_b32 s41, v40, 5
8238 ; GFX9-NEXT: v_readlane_b32 s40, v40, 4
8239 ; GFX9-NEXT: v_readlane_b32 s39, v40, 3
8240 ; GFX9-NEXT: v_readlane_b32 s38, v40, 2
8241 ; GFX9-NEXT: v_readlane_b32 s37, v40, 1
8242 ; GFX9-NEXT: v_readlane_b32 s36, v40, 0
8243 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
8244 ; GFX9-NEXT: v_readlane_b32 s33, v40, 18
8245 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
8246 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8247 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
8248 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8249 ; GFX9-NEXT: s_setpc_b64 s[4:5]
8251 ; GFX10-LABEL: test_call_external_void_func_v32i32_inreg:
8253 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8254 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
8255 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
8256 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8257 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8258 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
8259 ; GFX10-NEXT: v_writelane_b32 v40, s33, 18
8260 ; GFX10-NEXT: s_load_dwordx2 s[20:21], s[4:5], 0x0
8261 ; GFX10-NEXT: s_mov_b32 s33, s32
8262 ; GFX10-NEXT: s_addk_i32 s32, 0x200
8263 ; GFX10-NEXT: v_writelane_b32 v40, s36, 0
8264 ; GFX10-NEXT: v_writelane_b32 v40, s37, 1
8265 ; GFX10-NEXT: v_writelane_b32 v40, s38, 2
8266 ; GFX10-NEXT: v_writelane_b32 v40, s39, 3
8267 ; GFX10-NEXT: v_writelane_b32 v40, s40, 4
8268 ; GFX10-NEXT: v_writelane_b32 v40, s41, 5
8269 ; GFX10-NEXT: v_writelane_b32 v40, s42, 6
8270 ; GFX10-NEXT: v_writelane_b32 v40, s43, 7
8271 ; GFX10-NEXT: v_writelane_b32 v40, s44, 8
8272 ; GFX10-NEXT: v_writelane_b32 v40, s45, 9
8273 ; GFX10-NEXT: v_writelane_b32 v40, s46, 10
8274 ; GFX10-NEXT: v_writelane_b32 v40, s47, 11
8275 ; GFX10-NEXT: v_writelane_b32 v40, s48, 12
8276 ; GFX10-NEXT: v_writelane_b32 v40, s49, 13
8277 ; GFX10-NEXT: v_writelane_b32 v40, s50, 14
8278 ; GFX10-NEXT: v_writelane_b32 v40, s51, 15
8279 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
8280 ; GFX10-NEXT: s_clause 0x1
8281 ; GFX10-NEXT: s_load_dwordx16 s[36:51], s[20:21], 0x40
8282 ; GFX10-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0
8283 ; GFX10-NEXT: v_writelane_b32 v40, s30, 16
8284 ; GFX10-NEXT: v_writelane_b32 v40, s31, 17
8285 ; GFX10-NEXT: s_getpc_b64 s[30:31]
8286 ; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_inreg@rel32@lo+4
8287 ; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_inreg@rel32@hi+12
8288 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
8289 ; GFX10-NEXT: v_mov_b32_e32 v0, s46
8290 ; GFX10-NEXT: v_mov_b32_e32 v1, s47
8291 ; GFX10-NEXT: v_mov_b32_e32 v2, s48
8292 ; GFX10-NEXT: v_mov_b32_e32 v3, s49
8293 ; GFX10-NEXT: s_mov_b32 s20, s36
8294 ; GFX10-NEXT: s_mov_b32 s21, s37
8295 ; GFX10-NEXT: s_mov_b32 s22, s38
8296 ; GFX10-NEXT: s_mov_b32 s23, s39
8297 ; GFX10-NEXT: s_mov_b32 s24, s40
8298 ; GFX10-NEXT: s_mov_b32 s25, s41
8299 ; GFX10-NEXT: s_mov_b32 s26, s42
8300 ; GFX10-NEXT: s_mov_b32 s27, s43
8301 ; GFX10-NEXT: s_mov_b32 s28, s44
8302 ; GFX10-NEXT: s_mov_b32 s29, s45
8303 ; GFX10-NEXT: v_mov_b32_e32 v4, s50
8304 ; GFX10-NEXT: v_mov_b32_e32 v5, s51
8305 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32
8306 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
8307 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
8308 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12
8309 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16
8310 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20
8311 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
8312 ; GFX10-NEXT: v_readlane_b32 s4, v40, 16
8313 ; GFX10-NEXT: v_readlane_b32 s5, v40, 17
8314 ; GFX10-NEXT: v_readlane_b32 s51, v40, 15
8315 ; GFX10-NEXT: v_readlane_b32 s50, v40, 14
8316 ; GFX10-NEXT: v_readlane_b32 s49, v40, 13
8317 ; GFX10-NEXT: v_readlane_b32 s48, v40, 12
8318 ; GFX10-NEXT: v_readlane_b32 s47, v40, 11
8319 ; GFX10-NEXT: v_readlane_b32 s46, v40, 10
8320 ; GFX10-NEXT: v_readlane_b32 s45, v40, 9
8321 ; GFX10-NEXT: v_readlane_b32 s44, v40, 8
8322 ; GFX10-NEXT: v_readlane_b32 s43, v40, 7
8323 ; GFX10-NEXT: v_readlane_b32 s42, v40, 6
8324 ; GFX10-NEXT: v_readlane_b32 s41, v40, 5
8325 ; GFX10-NEXT: v_readlane_b32 s40, v40, 4
8326 ; GFX10-NEXT: v_readlane_b32 s39, v40, 3
8327 ; GFX10-NEXT: v_readlane_b32 s38, v40, 2
8328 ; GFX10-NEXT: v_readlane_b32 s37, v40, 1
8329 ; GFX10-NEXT: v_readlane_b32 s36, v40, 0
8330 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
8331 ; GFX10-NEXT: v_readlane_b32 s33, v40, 18
8332 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
8333 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8334 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8335 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
8336 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8337 ; GFX10-NEXT: s_setpc_b64 s[4:5]
8339 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_inreg:
8340 ; GFX10-SCRATCH: ; %bb.0:
8341 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8342 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
8343 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
8344 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
8345 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8346 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
8347 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 18
8348 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
8349 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
8350 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
8351 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 0
8352 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 1
8353 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 2
8354 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 3
8355 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 4
8356 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 5
8357 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 6
8358 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 7
8359 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 8
8360 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 9
8361 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 10
8362 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 11
8363 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 12
8364 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 13
8365 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 14
8366 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 15
8367 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
8368 ; GFX10-SCRATCH-NEXT: s_clause 0x1
8369 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x40
8370 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0
8371 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
8372 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_inreg@rel32@lo+4
8373 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_inreg@rel32@hi+12
8374 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16
8375 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17
8376 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
8377 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, s50
8378 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, s51
8379 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s46
8380 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s47
8381 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, s48
8382 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, s49
8383 ; GFX10-SCRATCH-NEXT: s_mov_b32 s20, s36
8384 ; GFX10-SCRATCH-NEXT: s_mov_b32 s21, s37
8385 ; GFX10-SCRATCH-NEXT: s_mov_b32 s22, s38
8386 ; GFX10-SCRATCH-NEXT: s_mov_b32 s23, s39
8387 ; GFX10-SCRATCH-NEXT: s_mov_b32 s24, s40
8388 ; GFX10-SCRATCH-NEXT: s_mov_b32 s25, s41
8389 ; GFX10-SCRATCH-NEXT: s_mov_b32 s26, s42
8390 ; GFX10-SCRATCH-NEXT: s_mov_b32 s27, s43
8391 ; GFX10-SCRATCH-NEXT: s_mov_b32 s28, s44
8392 ; GFX10-SCRATCH-NEXT: s_mov_b32 s29, s45
8393 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s32 offset:16
8394 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32
8395 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
8396 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 16
8397 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 17
8398 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 15
8399 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 14
8400 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 13
8401 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 12
8402 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 11
8403 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 10
8404 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 9
8405 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 8
8406 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 7
8407 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 6
8408 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 5
8409 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 4
8410 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 3
8411 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 2
8412 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 1
8413 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 0
8414 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
8415 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 18
8416 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
8417 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
8418 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8419 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
8420 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8421 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
8422 %ptr = load <32 x i32> addrspace(4)*, <32 x i32> addrspace(4)* addrspace(4)* undef
8423 %val = load <32 x i32>, <32 x i32> addrspace(4)* %ptr
8424 call amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> inreg %val)
8428 define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 {
8429 ; GFX9-LABEL: test_call_external_void_func_v32i32_i32_inreg:
8431 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8432 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
8433 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8434 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
8435 ; GFX9-NEXT: v_writelane_b32 v40, s33, 18
8436 ; GFX9-NEXT: v_writelane_b32 v40, s36, 0
8437 ; GFX9-NEXT: v_writelane_b32 v40, s37, 1
8438 ; GFX9-NEXT: v_writelane_b32 v40, s38, 2
8439 ; GFX9-NEXT: v_writelane_b32 v40, s39, 3
8440 ; GFX9-NEXT: v_writelane_b32 v40, s40, 4
8441 ; GFX9-NEXT: v_writelane_b32 v40, s41, 5
8442 ; GFX9-NEXT: v_writelane_b32 v40, s42, 6
8443 ; GFX9-NEXT: v_writelane_b32 v40, s43, 7
8444 ; GFX9-NEXT: v_writelane_b32 v40, s44, 8
8445 ; GFX9-NEXT: v_writelane_b32 v40, s45, 9
8446 ; GFX9-NEXT: v_writelane_b32 v40, s46, 10
8447 ; GFX9-NEXT: v_writelane_b32 v40, s47, 11
8448 ; GFX9-NEXT: s_load_dwordx2 s[20:21], s[4:5], 0x0
8449 ; GFX9-NEXT: s_load_dword s22, s[4:5], 0x0
8450 ; GFX9-NEXT: v_writelane_b32 v40, s48, 12
8451 ; GFX9-NEXT: v_writelane_b32 v40, s49, 13
8452 ; GFX9-NEXT: v_writelane_b32 v40, s50, 14
8453 ; GFX9-NEXT: v_writelane_b32 v40, s51, 15
8454 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
8455 ; GFX9-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0
8456 ; GFX9-NEXT: s_load_dwordx16 s[36:51], s[20:21], 0x40
8457 ; GFX9-NEXT: s_mov_b32 s33, s32
8458 ; GFX9-NEXT: v_mov_b32_e32 v0, s22
8459 ; GFX9-NEXT: s_addk_i32 s32, 0x400
8460 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
8461 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
8462 ; GFX9-NEXT: v_mov_b32_e32 v0, s46
8463 ; GFX9-NEXT: v_mov_b32_e32 v1, s47
8464 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
8465 ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
8466 ; GFX9-NEXT: v_mov_b32_e32 v0, s48
8467 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
8468 ; GFX9-NEXT: v_mov_b32_e32 v0, s49
8469 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
8470 ; GFX9-NEXT: v_mov_b32_e32 v0, s50
8471 ; GFX9-NEXT: v_writelane_b32 v40, s30, 16
8472 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
8473 ; GFX9-NEXT: v_mov_b32_e32 v0, s51
8474 ; GFX9-NEXT: s_mov_b32 s20, s36
8475 ; GFX9-NEXT: s_mov_b32 s21, s37
8476 ; GFX9-NEXT: s_mov_b32 s22, s38
8477 ; GFX9-NEXT: s_mov_b32 s23, s39
8478 ; GFX9-NEXT: s_mov_b32 s24, s40
8479 ; GFX9-NEXT: s_mov_b32 s25, s41
8480 ; GFX9-NEXT: s_mov_b32 s26, s42
8481 ; GFX9-NEXT: s_mov_b32 s27, s43
8482 ; GFX9-NEXT: s_mov_b32 s28, s44
8483 ; GFX9-NEXT: s_mov_b32 s29, s45
8484 ; GFX9-NEXT: v_writelane_b32 v40, s31, 17
8485 ; GFX9-NEXT: s_getpc_b64 s[30:31]
8486 ; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32_inreg@rel32@lo+4
8487 ; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32_inreg@rel32@hi+12
8488 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
8489 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
8490 ; GFX9-NEXT: v_readlane_b32 s4, v40, 16
8491 ; GFX9-NEXT: v_readlane_b32 s5, v40, 17
8492 ; GFX9-NEXT: v_readlane_b32 s51, v40, 15
8493 ; GFX9-NEXT: v_readlane_b32 s50, v40, 14
8494 ; GFX9-NEXT: v_readlane_b32 s49, v40, 13
8495 ; GFX9-NEXT: v_readlane_b32 s48, v40, 12
8496 ; GFX9-NEXT: v_readlane_b32 s47, v40, 11
8497 ; GFX9-NEXT: v_readlane_b32 s46, v40, 10
8498 ; GFX9-NEXT: v_readlane_b32 s45, v40, 9
8499 ; GFX9-NEXT: v_readlane_b32 s44, v40, 8
8500 ; GFX9-NEXT: v_readlane_b32 s43, v40, 7
8501 ; GFX9-NEXT: v_readlane_b32 s42, v40, 6
8502 ; GFX9-NEXT: v_readlane_b32 s41, v40, 5
8503 ; GFX9-NEXT: v_readlane_b32 s40, v40, 4
8504 ; GFX9-NEXT: v_readlane_b32 s39, v40, 3
8505 ; GFX9-NEXT: v_readlane_b32 s38, v40, 2
8506 ; GFX9-NEXT: v_readlane_b32 s37, v40, 1
8507 ; GFX9-NEXT: v_readlane_b32 s36, v40, 0
8508 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
8509 ; GFX9-NEXT: v_readlane_b32 s33, v40, 18
8510 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
8511 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8512 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
8513 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8514 ; GFX9-NEXT: s_setpc_b64 s[4:5]
8516 ; GFX10-LABEL: test_call_external_void_func_v32i32_i32_inreg:
8518 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8519 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
8520 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
8521 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8522 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8523 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
8524 ; GFX10-NEXT: v_writelane_b32 v40, s33, 18
8525 ; GFX10-NEXT: s_clause 0x1
8526 ; GFX10-NEXT: s_load_dwordx2 s[20:21], s[4:5], 0x0
8527 ; GFX10-NEXT: s_load_dword s22, s[4:5], 0x0
8528 ; GFX10-NEXT: s_mov_b32 s33, s32
8529 ; GFX10-NEXT: s_addk_i32 s32, 0x200
8530 ; GFX10-NEXT: v_writelane_b32 v40, s36, 0
8531 ; GFX10-NEXT: v_writelane_b32 v40, s37, 1
8532 ; GFX10-NEXT: v_writelane_b32 v40, s38, 2
8533 ; GFX10-NEXT: v_writelane_b32 v40, s39, 3
8534 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
8535 ; GFX10-NEXT: v_mov_b32_e32 v0, s22
8536 ; GFX10-NEXT: v_writelane_b32 v40, s40, 4
8537 ; GFX10-NEXT: v_writelane_b32 v40, s41, 5
8538 ; GFX10-NEXT: v_writelane_b32 v40, s42, 6
8539 ; GFX10-NEXT: v_writelane_b32 v40, s43, 7
8540 ; GFX10-NEXT: v_writelane_b32 v40, s44, 8
8541 ; GFX10-NEXT: v_writelane_b32 v40, s45, 9
8542 ; GFX10-NEXT: v_writelane_b32 v40, s46, 10
8543 ; GFX10-NEXT: v_writelane_b32 v40, s47, 11
8544 ; GFX10-NEXT: v_writelane_b32 v40, s48, 12
8545 ; GFX10-NEXT: v_writelane_b32 v40, s49, 13
8546 ; GFX10-NEXT: v_writelane_b32 v40, s50, 14
8547 ; GFX10-NEXT: v_writelane_b32 v40, s51, 15
8548 ; GFX10-NEXT: s_clause 0x1
8549 ; GFX10-NEXT: s_load_dwordx16 s[36:51], s[20:21], 0x40
8550 ; GFX10-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0
8551 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
8552 ; GFX10-NEXT: v_writelane_b32 v40, s30, 16
8553 ; GFX10-NEXT: v_writelane_b32 v40, s31, 17
8554 ; GFX10-NEXT: s_getpc_b64 s[30:31]
8555 ; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32_inreg@rel32@lo+4
8556 ; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32_inreg@rel32@hi+12
8557 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
8558 ; GFX10-NEXT: v_mov_b32_e32 v0, s46
8559 ; GFX10-NEXT: v_mov_b32_e32 v1, s47
8560 ; GFX10-NEXT: v_mov_b32_e32 v2, s48
8561 ; GFX10-NEXT: v_mov_b32_e32 v3, s49
8562 ; GFX10-NEXT: s_mov_b32 s20, s36
8563 ; GFX10-NEXT: s_mov_b32 s21, s37
8564 ; GFX10-NEXT: s_mov_b32 s22, s38
8565 ; GFX10-NEXT: s_mov_b32 s23, s39
8566 ; GFX10-NEXT: s_mov_b32 s24, s40
8567 ; GFX10-NEXT: s_mov_b32 s25, s41
8568 ; GFX10-NEXT: s_mov_b32 s26, s42
8569 ; GFX10-NEXT: s_mov_b32 s27, s43
8570 ; GFX10-NEXT: s_mov_b32 s28, s44
8571 ; GFX10-NEXT: s_mov_b32 s29, s45
8572 ; GFX10-NEXT: v_mov_b32_e32 v4, s50
8573 ; GFX10-NEXT: v_mov_b32_e32 v5, s51
8574 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32
8575 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
8576 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
8577 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12
8578 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16
8579 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20
8580 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
8581 ; GFX10-NEXT: v_readlane_b32 s4, v40, 16
8582 ; GFX10-NEXT: v_readlane_b32 s5, v40, 17
8583 ; GFX10-NEXT: v_readlane_b32 s51, v40, 15
8584 ; GFX10-NEXT: v_readlane_b32 s50, v40, 14
8585 ; GFX10-NEXT: v_readlane_b32 s49, v40, 13
8586 ; GFX10-NEXT: v_readlane_b32 s48, v40, 12
8587 ; GFX10-NEXT: v_readlane_b32 s47, v40, 11
8588 ; GFX10-NEXT: v_readlane_b32 s46, v40, 10
8589 ; GFX10-NEXT: v_readlane_b32 s45, v40, 9
8590 ; GFX10-NEXT: v_readlane_b32 s44, v40, 8
8591 ; GFX10-NEXT: v_readlane_b32 s43, v40, 7
8592 ; GFX10-NEXT: v_readlane_b32 s42, v40, 6
8593 ; GFX10-NEXT: v_readlane_b32 s41, v40, 5
8594 ; GFX10-NEXT: v_readlane_b32 s40, v40, 4
8595 ; GFX10-NEXT: v_readlane_b32 s39, v40, 3
8596 ; GFX10-NEXT: v_readlane_b32 s38, v40, 2
8597 ; GFX10-NEXT: v_readlane_b32 s37, v40, 1
8598 ; GFX10-NEXT: v_readlane_b32 s36, v40, 0
8599 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
8600 ; GFX10-NEXT: v_readlane_b32 s33, v40, 18
8601 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
8602 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8603 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8604 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
8605 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8606 ; GFX10-NEXT: s_setpc_b64 s[4:5]
8608 ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_i32_inreg:
8609 ; GFX10-SCRATCH: ; %bb.0:
8610 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8611 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
8612 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
8613 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
8614 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8615 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
8616 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 18
8617 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
8618 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
8619 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
8620 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 0
8621 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 1
8622 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 2
8623 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 3
8624 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 4
8625 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 5
8626 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 6
8627 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 7
8628 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 8
8629 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 9
8630 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 10
8631 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 11
8632 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 12
8633 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 13
8634 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 14
8635 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 15
8636 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
8637 ; GFX10-SCRATCH-NEXT: s_load_dword s2, s[0:1], 0x0
8638 ; GFX10-SCRATCH-NEXT: ; kill: killed $sgpr0_sgpr1
8639 ; GFX10-SCRATCH-NEXT: ; kill: killed $sgpr0_sgpr1
8640 ; GFX10-SCRATCH-NEXT: s_clause 0x1
8641 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x40
8642 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0
8643 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
8644 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_i32_inreg@rel32@lo+4
8645 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_i32_inreg@rel32@hi+12
8646 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16
8647 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17
8648 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
8649 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, s2
8650 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, s50
8651 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, s51
8652 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s46
8653 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s47
8654 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, s48
8655 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, s49
8656 ; GFX10-SCRATCH-NEXT: s_mov_b32 s20, s36
8657 ; GFX10-SCRATCH-NEXT: s_mov_b32 s21, s37
8658 ; GFX10-SCRATCH-NEXT: s_mov_b32 s22, s38
8659 ; GFX10-SCRATCH-NEXT: s_mov_b32 s23, s39
8660 ; GFX10-SCRATCH-NEXT: s_mov_b32 s24, s40
8661 ; GFX10-SCRATCH-NEXT: s_mov_b32 s25, s41
8662 ; GFX10-SCRATCH-NEXT: s_mov_b32 s26, s42
8663 ; GFX10-SCRATCH-NEXT: s_mov_b32 s27, s43
8664 ; GFX10-SCRATCH-NEXT: s_mov_b32 s28, s44
8665 ; GFX10-SCRATCH-NEXT: s_mov_b32 s29, s45
8666 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v6, s32 offset:24
8667 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s32 offset:16
8668 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32
8669 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
8670 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 16
8671 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 17
8672 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 15
8673 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 14
8674 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 13
8675 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 12
8676 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 11
8677 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 10
8678 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 9
8679 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 8
8680 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 7
8681 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 6
8682 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 5
8683 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 4
8684 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 3
8685 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 2
8686 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 1
8687 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 0
8688 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
8689 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 18
8690 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
8691 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
8692 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8693 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
8694 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8695 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
8696 %ptr0 = load <32 x i32> addrspace(4)*, <32 x i32> addrspace(4)* addrspace(4)* undef
8697 %val0 = load <32 x i32>, <32 x i32> addrspace(4)* %ptr0
8698 %val1 = load i32, i32 addrspace(4)* undef
8699 call amdgpu_gfx void @external_void_func_v32i32_i32_inreg(<32 x i32> inreg %val0, i32 inreg %val1)
8703 define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
8704 ; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64:
8705 ; GFX9: ; %bb.0: ; %entry
8706 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8707 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
8708 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
8709 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
8710 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
8711 ; GFX9-NEXT: s_mov_b32 s33, s32
8712 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33
8713 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4
8714 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
8715 ; GFX9-NEXT: s_addk_i32 s32, 0x400
8716 ; GFX9-NEXT: s_getpc_b64 s[4:5]
8717 ; GFX9-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
8718 ; GFX9-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
8719 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8720 ; GFX9-NEXT: s_waitcnt vmcnt(1)
8721 ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32
8722 ; GFX9-NEXT: s_waitcnt vmcnt(1)
8723 ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4
8724 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
8725 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
8726 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
8727 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
8728 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
8729 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
8730 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
8731 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
8732 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8733 ; GFX9-NEXT: s_setpc_b64 s[4:5]
8735 ; GFX10-LABEL: stack_passed_arg_alignment_v32i32_f64:
8736 ; GFX10: ; %bb.0: ; %entry
8737 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8738 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
8739 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
8740 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
8741 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8742 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
8743 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
8744 ; GFX10-NEXT: s_mov_b32 s33, s32
8745 ; GFX10-NEXT: s_addk_i32 s32, 0x200
8746 ; GFX10-NEXT: s_clause 0x1
8747 ; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33
8748 ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4
8749 ; GFX10-NEXT: s_getpc_b64 s[4:5]
8750 ; GFX10-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
8751 ; GFX10-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
8752 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
8753 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
8754 ; GFX10-NEXT: s_waitcnt vmcnt(1)
8755 ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32
8756 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8757 ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4
8758 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
8759 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
8760 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
8761 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
8762 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
8763 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
8764 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
8765 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8766 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
8767 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8768 ; GFX10-NEXT: s_setpc_b64 s[4:5]
8770 ; GFX10-SCRATCH-LABEL: stack_passed_arg_alignment_v32i32_f64:
8771 ; GFX10-SCRATCH: ; %bb.0: ; %entry
8772 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8773 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
8774 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
8775 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:8 ; 4-byte Folded Spill
8776 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8777 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
8778 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
8779 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
8780 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
8781 ; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33
8782 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
8783 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, stack_passed_f64_arg@rel32@lo+4
8784 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, stack_passed_f64_arg@rel32@hi+12
8785 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
8786 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
8787 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8788 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32
8789 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
8790 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
8791 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
8792 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
8793 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
8794 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
8795 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload
8796 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8797 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
8798 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
8799 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
8801 call amdgpu_gfx void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
8805 define amdgpu_gfx void @stack_12xv3i32() #0 {
8806 ; GFX9-LABEL: stack_12xv3i32:
8807 ; GFX9: ; %bb.0: ; %entry
8808 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8809 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
8810 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8811 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
8812 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
8813 ; GFX9-NEXT: s_mov_b32 s33, s32
8814 ; GFX9-NEXT: s_addk_i32 s32, 0x400
8815 ; GFX9-NEXT: v_mov_b32_e32 v0, 12
8816 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
8817 ; GFX9-NEXT: v_mov_b32_e32 v0, 13
8818 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
8819 ; GFX9-NEXT: v_mov_b32_e32 v0, 14
8820 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
8821 ; GFX9-NEXT: v_mov_b32_e32 v0, 15
8822 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
8823 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
8824 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
8825 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
8826 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
8827 ; GFX9-NEXT: v_mov_b32_e32 v3, 1
8828 ; GFX9-NEXT: v_mov_b32_e32 v4, 1
8829 ; GFX9-NEXT: v_mov_b32_e32 v5, 1
8830 ; GFX9-NEXT: v_mov_b32_e32 v6, 2
8831 ; GFX9-NEXT: v_mov_b32_e32 v7, 2
8832 ; GFX9-NEXT: v_mov_b32_e32 v8, 2
8833 ; GFX9-NEXT: v_mov_b32_e32 v9, 3
8834 ; GFX9-NEXT: v_mov_b32_e32 v10, 3
8835 ; GFX9-NEXT: v_mov_b32_e32 v11, 3
8836 ; GFX9-NEXT: v_mov_b32_e32 v12, 4
8837 ; GFX9-NEXT: v_mov_b32_e32 v13, 4
8838 ; GFX9-NEXT: v_mov_b32_e32 v14, 4
8839 ; GFX9-NEXT: v_mov_b32_e32 v15, 5
8840 ; GFX9-NEXT: v_mov_b32_e32 v16, 5
8841 ; GFX9-NEXT: v_mov_b32_e32 v17, 5
8842 ; GFX9-NEXT: v_mov_b32_e32 v18, 6
8843 ; GFX9-NEXT: v_mov_b32_e32 v19, 6
8844 ; GFX9-NEXT: v_mov_b32_e32 v20, 6
8845 ; GFX9-NEXT: v_mov_b32_e32 v21, 7
8846 ; GFX9-NEXT: v_mov_b32_e32 v22, 7
8847 ; GFX9-NEXT: v_mov_b32_e32 v23, 7
8848 ; GFX9-NEXT: v_mov_b32_e32 v24, 8
8849 ; GFX9-NEXT: v_mov_b32_e32 v25, 8
8850 ; GFX9-NEXT: v_mov_b32_e32 v26, 8
8851 ; GFX9-NEXT: v_mov_b32_e32 v27, 9
8852 ; GFX9-NEXT: v_mov_b32_e32 v28, 9
8853 ; GFX9-NEXT: v_mov_b32_e32 v29, 9
8854 ; GFX9-NEXT: v_mov_b32_e32 v30, 10
8855 ; GFX9-NEXT: v_mov_b32_e32 v31, 11
8856 ; GFX9-NEXT: s_getpc_b64 s[4:5]
8857 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4
8858 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12
8859 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8860 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
8861 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
8862 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
8863 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
8864 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
8865 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
8866 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8867 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
8868 ; GFX9-NEXT: s_waitcnt vmcnt(0)
8869 ; GFX9-NEXT: s_setpc_b64 s[4:5]
8871 ; GFX10-LABEL: stack_12xv3i32:
8872 ; GFX10: ; %bb.0: ; %entry
8873 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8874 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
8875 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
8876 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8877 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8878 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
8879 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
8880 ; GFX10-NEXT: v_mov_b32_e32 v0, 12
8881 ; GFX10-NEXT: v_mov_b32_e32 v1, 13
8882 ; GFX10-NEXT: v_mov_b32_e32 v2, 14
8883 ; GFX10-NEXT: s_mov_b32 s33, s32
8884 ; GFX10-NEXT: s_addk_i32 s32, 0x200
8885 ; GFX10-NEXT: v_mov_b32_e32 v3, 15
8886 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
8887 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32
8888 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
8889 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
8890 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12
8891 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
8892 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
8893 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
8894 ; GFX10-NEXT: v_mov_b32_e32 v3, 1
8895 ; GFX10-NEXT: v_mov_b32_e32 v4, 1
8896 ; GFX10-NEXT: v_mov_b32_e32 v5, 1
8897 ; GFX10-NEXT: v_mov_b32_e32 v6, 2
8898 ; GFX10-NEXT: v_mov_b32_e32 v7, 2
8899 ; GFX10-NEXT: v_mov_b32_e32 v8, 2
8900 ; GFX10-NEXT: v_mov_b32_e32 v9, 3
8901 ; GFX10-NEXT: v_mov_b32_e32 v10, 3
8902 ; GFX10-NEXT: v_mov_b32_e32 v11, 3
8903 ; GFX10-NEXT: v_mov_b32_e32 v12, 4
8904 ; GFX10-NEXT: v_mov_b32_e32 v13, 4
8905 ; GFX10-NEXT: v_mov_b32_e32 v14, 4
8906 ; GFX10-NEXT: v_mov_b32_e32 v15, 5
8907 ; GFX10-NEXT: v_mov_b32_e32 v16, 5
8908 ; GFX10-NEXT: v_mov_b32_e32 v17, 5
8909 ; GFX10-NEXT: v_mov_b32_e32 v18, 6
8910 ; GFX10-NEXT: v_mov_b32_e32 v19, 6
8911 ; GFX10-NEXT: v_mov_b32_e32 v20, 6
8912 ; GFX10-NEXT: v_mov_b32_e32 v21, 7
8913 ; GFX10-NEXT: v_mov_b32_e32 v22, 7
8914 ; GFX10-NEXT: v_mov_b32_e32 v23, 7
8915 ; GFX10-NEXT: v_mov_b32_e32 v24, 8
8916 ; GFX10-NEXT: v_mov_b32_e32 v25, 8
8917 ; GFX10-NEXT: v_mov_b32_e32 v26, 8
8918 ; GFX10-NEXT: v_mov_b32_e32 v27, 9
8919 ; GFX10-NEXT: v_mov_b32_e32 v28, 9
8920 ; GFX10-NEXT: v_mov_b32_e32 v29, 9
8921 ; GFX10-NEXT: v_mov_b32_e32 v30, 10
8922 ; GFX10-NEXT: v_mov_b32_e32 v31, 11
8923 ; GFX10-NEXT: s_getpc_b64 s[4:5]
8924 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4
8925 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12
8926 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
8927 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
8928 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
8929 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
8930 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
8931 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
8932 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
8933 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8934 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
8935 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
8936 ; GFX10-NEXT: s_waitcnt vmcnt(0)
8937 ; GFX10-NEXT: s_setpc_b64 s[4:5]
8939 ; GFX10-SCRATCH-LABEL: stack_12xv3i32:
8940 ; GFX10-SCRATCH: ; %bb.0: ; %entry
8941 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8942 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
8943 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
8944 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
8945 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
8946 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
8947 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
8948 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 12
8949 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 13
8950 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 14
8951 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 15
8952 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
8953 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
8954 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
8955 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1
8956 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32
8957 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
8958 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
8959 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0
8960 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 1
8961 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 1
8962 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 2
8963 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 2
8964 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 2
8965 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v9, 3
8966 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v10, 3
8967 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v11, 3
8968 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, 4
8969 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v13, 4
8970 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v14, 4
8971 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v15, 5
8972 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 5
8973 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v17, 5
8974 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v18, 6
8975 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v19, 6
8976 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v20, 6
8977 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v21, 7
8978 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v22, 7
8979 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v23, 7
8980 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v24, 8
8981 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v25, 8
8982 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v26, 8
8983 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v27, 9
8984 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v28, 9
8985 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v29, 9
8986 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v30, 10
8987 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 11
8988 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
8989 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4
8990 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12
8991 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
8992 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
8993 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
8994 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
8995 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
8996 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
8997 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
8998 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
8999 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9000 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
9001 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
9002 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
9004 call amdgpu_gfx void @external_void_func_12xv3i32(
9005 <3 x i32><i32 0, i32 0, i32 0>,
9006 <3 x i32><i32 1, i32 1, i32 1>,
9007 <3 x i32><i32 2, i32 2, i32 2>,
9008 <3 x i32><i32 3, i32 3, i32 3>,
9009 <3 x i32><i32 4, i32 4, i32 4>,
9010 <3 x i32><i32 5, i32 5, i32 5>,
9011 <3 x i32><i32 6, i32 6, i32 6>,
9012 <3 x i32><i32 7, i32 7, i32 7>,
9013 <3 x i32><i32 8, i32 8, i32 8>,
9014 <3 x i32><i32 9, i32 9, i32 9>,
9015 <3 x i32><i32 10, i32 11, i32 12>,
9016 <3 x i32><i32 13, i32 14, i32 15>)
9020 define amdgpu_gfx void @stack_8xv5i32() #0 {
9021 ; GFX9-LABEL: stack_8xv5i32:
9022 ; GFX9: ; %bb.0: ; %entry
9023 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9024 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
9025 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9026 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
9027 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
9028 ; GFX9-NEXT: s_mov_b32 s33, s32
9029 ; GFX9-NEXT: s_addk_i32 s32, 0x400
9030 ; GFX9-NEXT: v_mov_b32_e32 v0, 8
9031 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
9032 ; GFX9-NEXT: v_mov_b32_e32 v0, 9
9033 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
9034 ; GFX9-NEXT: v_mov_b32_e32 v0, 10
9035 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
9036 ; GFX9-NEXT: v_mov_b32_e32 v0, 11
9037 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
9038 ; GFX9-NEXT: v_mov_b32_e32 v0, 12
9039 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
9040 ; GFX9-NEXT: v_mov_b32_e32 v0, 13
9041 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
9042 ; GFX9-NEXT: v_mov_b32_e32 v0, 14
9043 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
9044 ; GFX9-NEXT: v_mov_b32_e32 v0, 15
9045 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
9046 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
9047 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
9048 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
9049 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
9050 ; GFX9-NEXT: v_mov_b32_e32 v3, 0
9051 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
9052 ; GFX9-NEXT: v_mov_b32_e32 v5, 1
9053 ; GFX9-NEXT: v_mov_b32_e32 v6, 1
9054 ; GFX9-NEXT: v_mov_b32_e32 v7, 1
9055 ; GFX9-NEXT: v_mov_b32_e32 v8, 1
9056 ; GFX9-NEXT: v_mov_b32_e32 v9, 1
9057 ; GFX9-NEXT: v_mov_b32_e32 v10, 2
9058 ; GFX9-NEXT: v_mov_b32_e32 v11, 2
9059 ; GFX9-NEXT: v_mov_b32_e32 v12, 2
9060 ; GFX9-NEXT: v_mov_b32_e32 v13, 2
9061 ; GFX9-NEXT: v_mov_b32_e32 v14, 2
9062 ; GFX9-NEXT: v_mov_b32_e32 v15, 3
9063 ; GFX9-NEXT: v_mov_b32_e32 v16, 3
9064 ; GFX9-NEXT: v_mov_b32_e32 v17, 3
9065 ; GFX9-NEXT: v_mov_b32_e32 v18, 3
9066 ; GFX9-NEXT: v_mov_b32_e32 v19, 3
9067 ; GFX9-NEXT: v_mov_b32_e32 v20, 4
9068 ; GFX9-NEXT: v_mov_b32_e32 v21, 4
9069 ; GFX9-NEXT: v_mov_b32_e32 v22, 4
9070 ; GFX9-NEXT: v_mov_b32_e32 v23, 4
9071 ; GFX9-NEXT: v_mov_b32_e32 v24, 4
9072 ; GFX9-NEXT: v_mov_b32_e32 v25, 5
9073 ; GFX9-NEXT: v_mov_b32_e32 v26, 5
9074 ; GFX9-NEXT: v_mov_b32_e32 v27, 5
9075 ; GFX9-NEXT: v_mov_b32_e32 v28, 5
9076 ; GFX9-NEXT: v_mov_b32_e32 v29, 5
9077 ; GFX9-NEXT: v_mov_b32_e32 v30, 6
9078 ; GFX9-NEXT: v_mov_b32_e32 v31, 7
9079 ; GFX9-NEXT: s_getpc_b64 s[4:5]
9080 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4
9081 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12
9082 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
9083 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
9084 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
9085 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
9086 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
9087 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
9088 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
9089 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
9090 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
9091 ; GFX9-NEXT: s_waitcnt vmcnt(0)
9092 ; GFX9-NEXT: s_setpc_b64 s[4:5]
9094 ; GFX10-LABEL: stack_8xv5i32:
9095 ; GFX10: ; %bb.0: ; %entry
9096 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9097 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
9098 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
9099 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9100 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9101 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
9102 ; GFX10-NEXT: v_mov_b32_e32 v0, 8
9103 ; GFX10-NEXT: v_mov_b32_e32 v1, 9
9104 ; GFX10-NEXT: v_mov_b32_e32 v2, 10
9105 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
9106 ; GFX10-NEXT: s_mov_b32 s33, s32
9107 ; GFX10-NEXT: s_addk_i32 s32, 0x200
9108 ; GFX10-NEXT: v_mov_b32_e32 v3, 14
9109 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32
9110 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
9111 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
9112 ; GFX10-NEXT: v_mov_b32_e32 v0, 11
9113 ; GFX10-NEXT: v_mov_b32_e32 v1, 12
9114 ; GFX10-NEXT: v_mov_b32_e32 v2, 13
9115 ; GFX10-NEXT: v_mov_b32_e32 v4, 15
9116 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
9117 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
9118 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:16
9119 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20
9120 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24
9121 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28
9122 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
9123 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
9124 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
9125 ; GFX10-NEXT: v_mov_b32_e32 v3, 0
9126 ; GFX10-NEXT: v_mov_b32_e32 v4, 0
9127 ; GFX10-NEXT: v_mov_b32_e32 v5, 1
9128 ; GFX10-NEXT: v_mov_b32_e32 v6, 1
9129 ; GFX10-NEXT: v_mov_b32_e32 v7, 1
9130 ; GFX10-NEXT: v_mov_b32_e32 v8, 1
9131 ; GFX10-NEXT: v_mov_b32_e32 v9, 1
9132 ; GFX10-NEXT: v_mov_b32_e32 v10, 2
9133 ; GFX10-NEXT: v_mov_b32_e32 v11, 2
9134 ; GFX10-NEXT: v_mov_b32_e32 v12, 2
9135 ; GFX10-NEXT: v_mov_b32_e32 v13, 2
9136 ; GFX10-NEXT: v_mov_b32_e32 v14, 2
9137 ; GFX10-NEXT: v_mov_b32_e32 v15, 3
9138 ; GFX10-NEXT: v_mov_b32_e32 v16, 3
9139 ; GFX10-NEXT: v_mov_b32_e32 v17, 3
9140 ; GFX10-NEXT: v_mov_b32_e32 v18, 3
9141 ; GFX10-NEXT: v_mov_b32_e32 v19, 3
9142 ; GFX10-NEXT: v_mov_b32_e32 v20, 4
9143 ; GFX10-NEXT: v_mov_b32_e32 v21, 4
9144 ; GFX10-NEXT: v_mov_b32_e32 v22, 4
9145 ; GFX10-NEXT: v_mov_b32_e32 v23, 4
9146 ; GFX10-NEXT: v_mov_b32_e32 v24, 4
9147 ; GFX10-NEXT: v_mov_b32_e32 v25, 5
9148 ; GFX10-NEXT: v_mov_b32_e32 v26, 5
9149 ; GFX10-NEXT: v_mov_b32_e32 v27, 5
9150 ; GFX10-NEXT: v_mov_b32_e32 v28, 5
9151 ; GFX10-NEXT: v_mov_b32_e32 v29, 5
9152 ; GFX10-NEXT: v_mov_b32_e32 v30, 6
9153 ; GFX10-NEXT: v_mov_b32_e32 v31, 7
9154 ; GFX10-NEXT: s_getpc_b64 s[4:5]
9155 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4
9156 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12
9157 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
9158 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
9159 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
9160 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
9161 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
9162 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
9163 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
9164 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
9165 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9166 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
9167 ; GFX10-NEXT: s_waitcnt vmcnt(0)
9168 ; GFX10-NEXT: s_setpc_b64 s[4:5]
9170 ; GFX10-SCRATCH-LABEL: stack_8xv5i32:
9171 ; GFX10-SCRATCH: ; %bb.0: ; %entry
9172 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9173 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
9174 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
9175 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
9176 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9177 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
9178 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
9179 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 12
9180 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 13
9181 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 14
9182 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 15
9183 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 8
9184 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 9
9185 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 10
9186 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 11
9187 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
9188 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
9189 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
9190 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16
9191 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[4:7], s32
9192 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
9193 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
9194 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0
9195 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0
9196 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0
9197 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 1
9198 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 1
9199 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 1
9200 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 1
9201 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v9, 1
9202 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v10, 2
9203 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v11, 2
9204 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, 2
9205 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v13, 2
9206 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v14, 2
9207 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v15, 3
9208 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 3
9209 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v17, 3
9210 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v18, 3
9211 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v19, 3
9212 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v20, 4
9213 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v21, 4
9214 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v22, 4
9215 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v23, 4
9216 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v24, 4
9217 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v25, 5
9218 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v26, 5
9219 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v27, 5
9220 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v28, 5
9221 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v29, 5
9222 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v30, 6
9223 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 7
9224 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
9225 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4
9226 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12
9227 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
9228 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
9229 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
9230 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
9231 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
9232 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
9233 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
9234 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
9235 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9236 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
9237 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
9238 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
9240 call amdgpu_gfx void @external_void_func_8xv5i32(
9241 <5 x i32><i32 0, i32 0, i32 0, i32 0, i32 0>,
9242 <5 x i32><i32 1, i32 1, i32 1, i32 1, i32 1>,
9243 <5 x i32><i32 2, i32 2, i32 2, i32 2, i32 2>,
9244 <5 x i32><i32 3, i32 3, i32 3, i32 3, i32 3>,
9245 <5 x i32><i32 4, i32 4, i32 4, i32 4, i32 4>,
9246 <5 x i32><i32 5, i32 5, i32 5, i32 5, i32 5>,
9247 <5 x i32><i32 6, i32 7, i32 8, i32 9, i32 10>,
9248 <5 x i32><i32 11, i32 12, i32 13, i32 14, i32 15>)
9252 define amdgpu_gfx void @stack_8xv5f32() #0 {
9253 ; GFX9-LABEL: stack_8xv5f32:
9254 ; GFX9: ; %bb.0: ; %entry
9255 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9256 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
9257 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9258 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
9259 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2
9260 ; GFX9-NEXT: s_mov_b32 s33, s32
9261 ; GFX9-NEXT: s_addk_i32 s32, 0x400
9262 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41000000
9263 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
9264 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41100000
9265 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4
9266 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41200000
9267 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
9268 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41300000
9269 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
9270 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41400000
9271 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
9272 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41500000
9273 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
9274 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000
9275 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
9276 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000
9277 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
9278 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28
9279 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
9280 ; GFX9-NEXT: v_mov_b32_e32 v1, 0
9281 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
9282 ; GFX9-NEXT: v_mov_b32_e32 v3, 0
9283 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
9284 ; GFX9-NEXT: v_mov_b32_e32 v5, 1.0
9285 ; GFX9-NEXT: v_mov_b32_e32 v6, 1.0
9286 ; GFX9-NEXT: v_mov_b32_e32 v7, 1.0
9287 ; GFX9-NEXT: v_mov_b32_e32 v8, 1.0
9288 ; GFX9-NEXT: v_mov_b32_e32 v9, 1.0
9289 ; GFX9-NEXT: v_mov_b32_e32 v10, 2.0
9290 ; GFX9-NEXT: v_mov_b32_e32 v11, 2.0
9291 ; GFX9-NEXT: v_mov_b32_e32 v12, 2.0
9292 ; GFX9-NEXT: v_mov_b32_e32 v13, 2.0
9293 ; GFX9-NEXT: v_mov_b32_e32 v14, 2.0
9294 ; GFX9-NEXT: v_mov_b32_e32 v15, 0x40400000
9295 ; GFX9-NEXT: v_mov_b32_e32 v16, 0x40400000
9296 ; GFX9-NEXT: v_mov_b32_e32 v17, 0x40400000
9297 ; GFX9-NEXT: v_mov_b32_e32 v18, 0x40400000
9298 ; GFX9-NEXT: v_mov_b32_e32 v19, 0x40400000
9299 ; GFX9-NEXT: v_mov_b32_e32 v20, 4.0
9300 ; GFX9-NEXT: v_mov_b32_e32 v21, 4.0
9301 ; GFX9-NEXT: v_mov_b32_e32 v22, 4.0
9302 ; GFX9-NEXT: v_mov_b32_e32 v23, 4.0
9303 ; GFX9-NEXT: v_mov_b32_e32 v24, 4.0
9304 ; GFX9-NEXT: v_mov_b32_e32 v25, 0x40a00000
9305 ; GFX9-NEXT: v_mov_b32_e32 v26, 0x40a00000
9306 ; GFX9-NEXT: v_mov_b32_e32 v27, 0x40a00000
9307 ; GFX9-NEXT: v_mov_b32_e32 v28, 0x40a00000
9308 ; GFX9-NEXT: v_mov_b32_e32 v29, 0x40a00000
9309 ; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000
9310 ; GFX9-NEXT: v_mov_b32_e32 v31, 0x40e00000
9311 ; GFX9-NEXT: s_getpc_b64 s[4:5]
9312 ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4
9313 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12
9314 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
9315 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
9316 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
9317 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
9318 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
9319 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2
9320 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
9321 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
9322 ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
9323 ; GFX9-NEXT: s_waitcnt vmcnt(0)
9324 ; GFX9-NEXT: s_setpc_b64 s[4:5]
9326 ; GFX10-LABEL: stack_8xv5f32:
9327 ; GFX10: ; %bb.0: ; %entry
9328 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9329 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
9330 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1
9331 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9332 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9333 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
9334 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x41000000
9335 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x41100000
9336 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x41200000
9337 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2
9338 ; GFX10-NEXT: s_mov_b32 s33, s32
9339 ; GFX10-NEXT: s_addk_i32 s32, 0x200
9340 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x41600000
9341 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32
9342 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
9343 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
9344 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x41300000
9345 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x41400000
9346 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x41500000
9347 ; GFX10-NEXT: v_mov_b32_e32 v4, 0x41700000
9348 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
9349 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
9350 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:16
9351 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20
9352 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24
9353 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28
9354 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
9355 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
9356 ; GFX10-NEXT: v_mov_b32_e32 v2, 0
9357 ; GFX10-NEXT: v_mov_b32_e32 v3, 0
9358 ; GFX10-NEXT: v_mov_b32_e32 v4, 0
9359 ; GFX10-NEXT: v_mov_b32_e32 v5, 1.0
9360 ; GFX10-NEXT: v_mov_b32_e32 v6, 1.0
9361 ; GFX10-NEXT: v_mov_b32_e32 v7, 1.0
9362 ; GFX10-NEXT: v_mov_b32_e32 v8, 1.0
9363 ; GFX10-NEXT: v_mov_b32_e32 v9, 1.0
9364 ; GFX10-NEXT: v_mov_b32_e32 v10, 2.0
9365 ; GFX10-NEXT: v_mov_b32_e32 v11, 2.0
9366 ; GFX10-NEXT: v_mov_b32_e32 v12, 2.0
9367 ; GFX10-NEXT: v_mov_b32_e32 v13, 2.0
9368 ; GFX10-NEXT: v_mov_b32_e32 v14, 2.0
9369 ; GFX10-NEXT: v_mov_b32_e32 v15, 0x40400000
9370 ; GFX10-NEXT: v_mov_b32_e32 v16, 0x40400000
9371 ; GFX10-NEXT: v_mov_b32_e32 v17, 0x40400000
9372 ; GFX10-NEXT: v_mov_b32_e32 v18, 0x40400000
9373 ; GFX10-NEXT: v_mov_b32_e32 v19, 0x40400000
9374 ; GFX10-NEXT: v_mov_b32_e32 v20, 4.0
9375 ; GFX10-NEXT: v_mov_b32_e32 v21, 4.0
9376 ; GFX10-NEXT: v_mov_b32_e32 v22, 4.0
9377 ; GFX10-NEXT: v_mov_b32_e32 v23, 4.0
9378 ; GFX10-NEXT: v_mov_b32_e32 v24, 4.0
9379 ; GFX10-NEXT: v_mov_b32_e32 v25, 0x40a00000
9380 ; GFX10-NEXT: v_mov_b32_e32 v26, 0x40a00000
9381 ; GFX10-NEXT: v_mov_b32_e32 v27, 0x40a00000
9382 ; GFX10-NEXT: v_mov_b32_e32 v28, 0x40a00000
9383 ; GFX10-NEXT: v_mov_b32_e32 v29, 0x40a00000
9384 ; GFX10-NEXT: v_mov_b32_e32 v30, 0x40c00000
9385 ; GFX10-NEXT: v_mov_b32_e32 v31, 0x40e00000
9386 ; GFX10-NEXT: s_getpc_b64 s[4:5]
9387 ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4
9388 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12
9389 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
9390 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
9391 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
9392 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
9393 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
9394 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2
9395 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1
9396 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
9397 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
9398 ; GFX10-NEXT: s_mov_b32 exec_lo, s6
9399 ; GFX10-NEXT: s_waitcnt vmcnt(0)
9400 ; GFX10-NEXT: s_setpc_b64 s[4:5]
9402 ; GFX10-SCRATCH-LABEL: stack_8xv5f32:
9403 ; GFX10-SCRATCH: ; %bb.0: ; %entry
9404 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9405 ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
9406 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
9407 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
9408 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9409 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
9410 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
9411 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x41400000
9412 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x41500000
9413 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0x41600000
9414 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x41700000
9415 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0x41000000
9416 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0x41100000
9417 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 0x41200000
9418 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 0x41300000
9419 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
9420 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
9421 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
9422 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16
9423 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[4:7], s32
9424 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0
9425 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0
9426 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0
9427 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0
9428 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0
9429 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 1.0
9430 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 1.0
9431 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 1.0
9432 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 1.0
9433 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v9, 1.0
9434 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v10, 2.0
9435 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v11, 2.0
9436 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, 2.0
9437 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v13, 2.0
9438 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v14, 2.0
9439 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v15, 0x40400000
9440 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 0x40400000
9441 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v17, 0x40400000
9442 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v18, 0x40400000
9443 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v19, 0x40400000
9444 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v20, 4.0
9445 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v21, 4.0
9446 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v22, 4.0
9447 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v23, 4.0
9448 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v24, 4.0
9449 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v25, 0x40a00000
9450 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v26, 0x40a00000
9451 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v27, 0x40a00000
9452 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v28, 0x40a00000
9453 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v29, 0x40a00000
9454 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v30, 0x40c00000
9455 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 0x40e00000
9456 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
9457 ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4
9458 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12
9459 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
9460 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
9461 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
9462 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
9463 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
9464 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
9465 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
9466 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
9467 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
9468 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
9469 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
9470 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
9472 call amdgpu_gfx void @external_void_func_8xv5f32(
9473 <5 x float><float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>,
9474 <5 x float><float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>,
9475 <5 x float><float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>,
9476 <5 x float><float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>,
9477 <5 x float><float 4.0, float 4.0, float 4.0, float 4.0, float 4.0>,
9478 <5 x float><float 5.0, float 5.0, float 5.0, float 5.0, float 5.0>,
9479 <5 x float><float 6.0, float 7.0, float 8.0, float 9.0, float 10.0>,
9480 <5 x float><float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>)
9484 declare hidden amdgpu_gfx void @byval_align16_f64_arg(<32 x i32>, double addrspace(5)* byval(double) align 16) #0
9485 declare hidden amdgpu_gfx void @stack_passed_f64_arg(<32 x i32>, double) #0
9486 declare hidden amdgpu_gfx void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>,
9487 <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0
9488 declare hidden amdgpu_gfx void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>,
9489 <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) #0
9490 declare hidden amdgpu_gfx void @external_void_func_12xv3f32(<3 x float>, <3 x float>, <3 x float>, <3 x float>,
9491 <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) #0
9492 declare hidden amdgpu_gfx void @external_void_func_8xv5f32(<5 x float>, <5 x float>, <5 x float>, <5 x float>,
9493 <5 x float>, <5 x float>, <5 x float>, <5 x float>) #0
9494 attributes #0 = { nounwind }
9495 attributes #1 = { nounwind noinline }