1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s
3 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
4 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s
6 declare hidden amdgpu_gfx void @external_void_func_void() #0
8 define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
9 ; GFX9-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
11 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; GFX9-NEXT: s_mov_b32 s34, s33
13 ; GFX9-NEXT: s_mov_b32 s33, s32
14 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
15 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
16 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
17 ; GFX9-NEXT: v_writelane_b32 v40, s34, 4
18 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
19 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1
20 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2
21 ; GFX9-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi
22 ; GFX9-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo
23 ; GFX9-NEXT: s_addk_i32 s32, 0x400
24 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3
25 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
26 ; GFX9-NEXT: ;;#ASMSTART
27 ; GFX9-NEXT: ;;#ASMEND
28 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
29 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3
30 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2
31 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1
32 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 4
34 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
35 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
36 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
37 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
38 ; GFX9-NEXT: s_mov_b32 s33, s34
39 ; GFX9-NEXT: s_waitcnt vmcnt(0)
40 ; GFX9-NEXT: s_setpc_b64 s[30:31]
42 ; GFX10-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
44 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45 ; GFX10-NEXT: s_mov_b32 s34, s33
46 ; GFX10-NEXT: s_mov_b32 s33, s32
47 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
48 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
49 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
50 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
51 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4
52 ; GFX10-NEXT: s_addk_i32 s32, 0x200
53 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
54 ; GFX10-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo
55 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1
56 ; GFX10-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi
57 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2
58 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3
59 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
60 ; GFX10-NEXT: ;;#ASMSTART
61 ; GFX10-NEXT: ;;#ASMEND
62 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
63 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3
64 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2
65 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1
66 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
67 ; GFX10-NEXT: v_readlane_b32 s34, v40, 4
68 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
69 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
70 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
71 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
72 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
73 ; GFX10-NEXT: s_mov_b32 s33, s34
74 ; GFX10-NEXT: s_waitcnt vmcnt(0)
75 ; GFX10-NEXT: s_setpc_b64 s[30:31]
77 ; GFX11-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
79 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80 ; GFX11-NEXT: s_mov_b32 s0, s33
81 ; GFX11-NEXT: s_mov_b32 s33, s32
82 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
83 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
84 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
85 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4
86 ; GFX11-NEXT: s_add_i32 s32, s32, 16
87 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
88 ; GFX11-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo
89 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1
90 ; GFX11-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi
91 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2
92 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3
93 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5]
94 ; GFX11-NEXT: ;;#ASMSTART
95 ; GFX11-NEXT: ;;#ASMEND
96 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5]
97 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
98 ; GFX11-NEXT: v_readlane_b32 s31, v40, 3
99 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2
100 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1
101 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
102 ; GFX11-NEXT: v_readlane_b32 s0, v40, 4
103 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
104 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
105 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
106 ; GFX11-NEXT: s_add_i32 s32, s32, -16
107 ; GFX11-NEXT: s_mov_b32 s33, s0
108 ; GFX11-NEXT: s_waitcnt vmcnt(0)
109 ; GFX11-NEXT: s_setpc_b64 s[30:31]
110 call amdgpu_gfx void @external_void_func_void()
111 call void asm sideeffect "", ""() #0
112 call amdgpu_gfx void @external_void_func_void()
116 define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 {
117 ; GFX9-LABEL: void_func_void_clobber_s28_s29:
119 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120 ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1
121 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
122 ; GFX9-NEXT: s_mov_b64 exec, s[34:35]
123 ; GFX9-NEXT: v_writelane_b32 v0, s28, 0
124 ; GFX9-NEXT: v_writelane_b32 v0, s29, 1
125 ; GFX9-NEXT: v_writelane_b32 v0, s30, 2
126 ; GFX9-NEXT: v_writelane_b32 v0, s31, 3
127 ; GFX9-NEXT: ;;#ASMSTART
128 ; GFX9-NEXT: ; clobber
129 ; GFX9-NEXT: ;;#ASMEND
130 ; GFX9-NEXT: ;;#ASMSTART
131 ; GFX9-NEXT: ; clobber
132 ; GFX9-NEXT: ;;#ASMEND
133 ; GFX9-NEXT: v_readlane_b32 s31, v0, 3
134 ; GFX9-NEXT: v_readlane_b32 s30, v0, 2
135 ; GFX9-NEXT: v_readlane_b32 s29, v0, 1
136 ; GFX9-NEXT: v_readlane_b32 s28, v0, 0
137 ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1
138 ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
139 ; GFX9-NEXT: s_mov_b64 exec, s[34:35]
140 ; GFX9-NEXT: s_waitcnt vmcnt(0)
141 ; GFX9-NEXT: s_setpc_b64 s[30:31]
143 ; GFX10-LABEL: void_func_void_clobber_s28_s29:
145 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1
147 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
148 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
149 ; GFX10-NEXT: s_mov_b32 exec_lo, s34
150 ; GFX10-NEXT: v_writelane_b32 v0, s28, 0
151 ; GFX10-NEXT: v_writelane_b32 v0, s29, 1
152 ; GFX10-NEXT: v_writelane_b32 v0, s30, 2
153 ; GFX10-NEXT: v_writelane_b32 v0, s31, 3
154 ; GFX10-NEXT: ;;#ASMSTART
155 ; GFX10-NEXT: ; clobber
156 ; GFX10-NEXT: ;;#ASMEND
157 ; GFX10-NEXT: ;;#ASMSTART
158 ; GFX10-NEXT: ; clobber
159 ; GFX10-NEXT: ;;#ASMEND
160 ; GFX10-NEXT: v_readlane_b32 s31, v0, 3
161 ; GFX10-NEXT: v_readlane_b32 s30, v0, 2
162 ; GFX10-NEXT: v_readlane_b32 s29, v0, 1
163 ; GFX10-NEXT: v_readlane_b32 s28, v0, 0
164 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1
165 ; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
166 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
167 ; GFX10-NEXT: s_mov_b32 exec_lo, s34
168 ; GFX10-NEXT: s_waitcnt vmcnt(0)
169 ; GFX10-NEXT: s_setpc_b64 s[30:31]
171 ; GFX11-LABEL: void_func_void_clobber_s28_s29:
173 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
175 ; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
176 ; GFX11-NEXT: s_mov_b32 exec_lo, s0
177 ; GFX11-NEXT: v_writelane_b32 v0, s28, 0
178 ; GFX11-NEXT: v_writelane_b32 v0, s29, 1
179 ; GFX11-NEXT: v_writelane_b32 v0, s30, 2
180 ; GFX11-NEXT: v_writelane_b32 v0, s31, 3
181 ; GFX11-NEXT: ;;#ASMSTART
182 ; GFX11-NEXT: ; clobber
183 ; GFX11-NEXT: ;;#ASMEND
184 ; GFX11-NEXT: ;;#ASMSTART
185 ; GFX11-NEXT: ; clobber
186 ; GFX11-NEXT: ;;#ASMEND
187 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
188 ; GFX11-NEXT: v_readlane_b32 s31, v0, 3
189 ; GFX11-NEXT: v_readlane_b32 s30, v0, 2
190 ; GFX11-NEXT: v_readlane_b32 s29, v0, 1
191 ; GFX11-NEXT: v_readlane_b32 s28, v0, 0
192 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
193 ; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
194 ; GFX11-NEXT: s_mov_b32 exec_lo, s0
195 ; GFX11-NEXT: s_waitcnt vmcnt(0)
196 ; GFX11-NEXT: s_setpc_b64 s[30:31]
197 call void asm sideeffect "; clobber", "~{s[30:31]}"() #0
198 call void asm sideeffect "; clobber", "~{s[28:29]}"() #0
202 define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) %out) #0 {
203 ; GFX9-LABEL: test_call_void_func_void_mayclobber_s31:
205 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206 ; GFX9-NEXT: s_mov_b32 s34, s33
207 ; GFX9-NEXT: s_mov_b32 s33, s32
208 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
209 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
210 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
211 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
212 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
213 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
214 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi
215 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo
216 ; GFX9-NEXT: s_addk_i32 s32, 0x400
217 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
218 ; GFX9-NEXT: ;;#ASMSTART
219 ; GFX9-NEXT: ; def s31
220 ; GFX9-NEXT: ;;#ASMEND
221 ; GFX9-NEXT: s_mov_b32 s4, s31
222 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
223 ; GFX9-NEXT: s_mov_b32 s31, s4
224 ; GFX9-NEXT: ;;#ASMSTART
225 ; GFX9-NEXT: ; use s31
226 ; GFX9-NEXT: ;;#ASMEND
227 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
228 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
229 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
230 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
231 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
232 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
233 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
234 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
235 ; GFX9-NEXT: s_mov_b32 s33, s34
236 ; GFX9-NEXT: s_waitcnt vmcnt(0)
237 ; GFX9-NEXT: s_setpc_b64 s[30:31]
239 ; GFX10-LABEL: test_call_void_func_void_mayclobber_s31:
241 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242 ; GFX10-NEXT: s_mov_b32 s34, s33
243 ; GFX10-NEXT: s_mov_b32 s33, s32
244 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
245 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
246 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
247 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
248 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
249 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi
250 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo
251 ; GFX10-NEXT: s_addk_i32 s32, 0x200
252 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
253 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
254 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
255 ; GFX10-NEXT: ;;#ASMSTART
256 ; GFX10-NEXT: ; def s31
257 ; GFX10-NEXT: ;;#ASMEND
258 ; GFX10-NEXT: s_mov_b32 s4, s31
259 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
260 ; GFX10-NEXT: s_mov_b32 s31, s4
261 ; GFX10-NEXT: ;;#ASMSTART
262 ; GFX10-NEXT: ; use s31
263 ; GFX10-NEXT: ;;#ASMEND
264 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
265 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
266 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
267 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
268 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
269 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
270 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
271 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
272 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
273 ; GFX10-NEXT: s_mov_b32 s33, s34
274 ; GFX10-NEXT: s_waitcnt vmcnt(0)
275 ; GFX10-NEXT: s_setpc_b64 s[30:31]
277 ; GFX11-LABEL: test_call_void_func_void_mayclobber_s31:
279 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
280 ; GFX11-NEXT: s_mov_b32 s0, s33
281 ; GFX11-NEXT: s_mov_b32 s33, s32
282 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
283 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
284 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
285 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
286 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi
287 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo
288 ; GFX11-NEXT: s_add_i32 s32, s32, 16
289 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
290 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
291 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
292 ; GFX11-NEXT: ;;#ASMSTART
293 ; GFX11-NEXT: ; def s31
294 ; GFX11-NEXT: ;;#ASMEND
295 ; GFX11-NEXT: s_mov_b32 s4, s31
296 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
297 ; GFX11-NEXT: s_mov_b32 s31, s4
298 ; GFX11-NEXT: ;;#ASMSTART
299 ; GFX11-NEXT: ; use s31
300 ; GFX11-NEXT: ;;#ASMEND
301 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
302 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
303 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
304 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
305 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
306 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
307 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
308 ; GFX11-NEXT: s_add_i32 s32, s32, -16
309 ; GFX11-NEXT: s_mov_b32 s33, s0
310 ; GFX11-NEXT: s_waitcnt vmcnt(0)
311 ; GFX11-NEXT: s_setpc_b64 s[30:31]
312 %s31 = call i32 asm sideeffect "; def $0", "={s31}"()
313 call amdgpu_gfx void @external_void_func_void()
314 call void asm sideeffect "; use $0", "{s31}"(i32 %s31)
318 define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) %out) #0 {
319 ; GFX9-LABEL: test_call_void_func_void_mayclobber_v31:
321 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322 ; GFX9-NEXT: s_mov_b32 s34, s33
323 ; GFX9-NEXT: s_mov_b32 s33, s32
324 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
325 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
326 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
327 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
328 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
329 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi
330 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo
331 ; GFX9-NEXT: s_addk_i32 s32, 0x400
332 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
333 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
334 ; GFX9-NEXT: ;;#ASMSTART
335 ; GFX9-NEXT: ; def v31
336 ; GFX9-NEXT: ;;#ASMEND
337 ; GFX9-NEXT: v_mov_b32_e32 v41, v31
338 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
339 ; GFX9-NEXT: v_mov_b32_e32 v31, v41
340 ; GFX9-NEXT: ;;#ASMSTART
341 ; GFX9-NEXT: ; use v31
342 ; GFX9-NEXT: ;;#ASMEND
343 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
344 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
345 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
346 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
347 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
348 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
349 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
350 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
351 ; GFX9-NEXT: s_mov_b32 s33, s34
352 ; GFX9-NEXT: s_waitcnt vmcnt(0)
353 ; GFX9-NEXT: s_setpc_b64 s[30:31]
355 ; GFX10-LABEL: test_call_void_func_void_mayclobber_v31:
357 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
358 ; GFX10-NEXT: s_mov_b32 s34, s33
359 ; GFX10-NEXT: s_mov_b32 s33, s32
360 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
361 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
362 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
363 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
364 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
365 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi
366 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo
367 ; GFX10-NEXT: s_addk_i32 s32, 0x200
368 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
369 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
370 ; GFX10-NEXT: ;;#ASMSTART
371 ; GFX10-NEXT: ; def v31
372 ; GFX10-NEXT: ;;#ASMEND
373 ; GFX10-NEXT: v_mov_b32_e32 v41, v31
374 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
375 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
376 ; GFX10-NEXT: v_mov_b32_e32 v31, v41
377 ; GFX10-NEXT: ;;#ASMSTART
378 ; GFX10-NEXT: ; use v31
379 ; GFX10-NEXT: ;;#ASMEND
380 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
381 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
382 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
383 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
384 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
385 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
386 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
387 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
388 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
389 ; GFX10-NEXT: s_mov_b32 s33, s34
390 ; GFX10-NEXT: s_waitcnt vmcnt(0)
391 ; GFX10-NEXT: s_setpc_b64 s[30:31]
393 ; GFX11-LABEL: test_call_void_func_void_mayclobber_v31:
395 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396 ; GFX11-NEXT: s_mov_b32 s0, s33
397 ; GFX11-NEXT: s_mov_b32 s33, s32
398 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
399 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; 4-byte Folded Spill
400 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
401 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
402 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi
403 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo
404 ; GFX11-NEXT: s_add_i32 s32, s32, 16
405 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 ; 4-byte Folded Spill
406 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
407 ; GFX11-NEXT: ;;#ASMSTART
408 ; GFX11-NEXT: ; def v31
409 ; GFX11-NEXT: ;;#ASMEND
410 ; GFX11-NEXT: v_mov_b32_e32 v41, v31
411 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
412 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
413 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
414 ; GFX11-NEXT: v_mov_b32_e32 v31, v41
415 ; GFX11-NEXT: ;;#ASMSTART
416 ; GFX11-NEXT: ; use v31
417 ; GFX11-NEXT: ;;#ASMEND
418 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 ; 4-byte Folded Reload
419 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
420 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
421 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
422 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
423 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:4 ; 4-byte Folded Reload
424 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
425 ; GFX11-NEXT: s_add_i32 s32, s32, -16
426 ; GFX11-NEXT: s_mov_b32 s33, s0
427 ; GFX11-NEXT: s_waitcnt vmcnt(0)
428 ; GFX11-NEXT: s_setpc_b64 s[30:31]
429 %v31 = call i32 asm sideeffect "; def $0", "={v31}"()
430 call amdgpu_gfx void @external_void_func_void()
431 call void asm sideeffect "; use $0", "{v31}"(i32 %v31)
436 define amdgpu_gfx void @test_call_void_func_void_preserves_s33(ptr addrspace(1) %out) #0 {
437 ; GFX9-LABEL: test_call_void_func_void_preserves_s33:
439 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
440 ; GFX9-NEXT: s_mov_b32 s34, s33
441 ; GFX9-NEXT: s_mov_b32 s33, s32
442 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
443 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
444 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
445 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
446 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
447 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
448 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi
449 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo
450 ; GFX9-NEXT: s_addk_i32 s32, 0x400
451 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
452 ; GFX9-NEXT: ;;#ASMSTART
453 ; GFX9-NEXT: ; def s33
454 ; GFX9-NEXT: ;;#ASMEND
455 ; GFX9-NEXT: s_mov_b32 s4, s33
456 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
457 ; GFX9-NEXT: s_mov_b32 s33, s4
458 ; GFX9-NEXT: ;;#ASMSTART
459 ; GFX9-NEXT: ; use s33
460 ; GFX9-NEXT: ;;#ASMEND
461 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
462 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
463 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
464 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
465 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
466 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
467 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
468 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
469 ; GFX9-NEXT: s_mov_b32 s33, s34
470 ; GFX9-NEXT: s_waitcnt vmcnt(0)
471 ; GFX9-NEXT: s_setpc_b64 s[30:31]
473 ; GFX10-LABEL: test_call_void_func_void_preserves_s33:
475 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
476 ; GFX10-NEXT: s_mov_b32 s34, s33
477 ; GFX10-NEXT: s_mov_b32 s33, s32
478 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
479 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
480 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
481 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
482 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
483 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi
484 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo
485 ; GFX10-NEXT: s_addk_i32 s32, 0x200
486 ; GFX10-NEXT: ;;#ASMSTART
487 ; GFX10-NEXT: ; def s33
488 ; GFX10-NEXT: ;;#ASMEND
489 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
490 ; GFX10-NEXT: s_mov_b32 s4, s33
491 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
492 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
493 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
494 ; GFX10-NEXT: s_mov_b32 s33, s4
495 ; GFX10-NEXT: ;;#ASMSTART
496 ; GFX10-NEXT: ; use s33
497 ; GFX10-NEXT: ;;#ASMEND
498 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
499 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
500 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
501 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
502 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
503 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
504 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
505 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
506 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
507 ; GFX10-NEXT: s_mov_b32 s33, s34
508 ; GFX10-NEXT: s_waitcnt vmcnt(0)
509 ; GFX10-NEXT: s_setpc_b64 s[30:31]
511 ; GFX11-LABEL: test_call_void_func_void_preserves_s33:
513 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
514 ; GFX11-NEXT: s_mov_b32 s0, s33
515 ; GFX11-NEXT: s_mov_b32 s33, s32
516 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
517 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
518 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
519 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
520 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi
521 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo
522 ; GFX11-NEXT: s_add_i32 s32, s32, 16
523 ; GFX11-NEXT: ;;#ASMSTART
524 ; GFX11-NEXT: ; def s33
525 ; GFX11-NEXT: ;;#ASMEND
526 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
527 ; GFX11-NEXT: s_mov_b32 s4, s33
528 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
529 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
530 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
531 ; GFX11-NEXT: s_mov_b32 s33, s4
532 ; GFX11-NEXT: ;;#ASMSTART
533 ; GFX11-NEXT: ; use s33
534 ; GFX11-NEXT: ;;#ASMEND
535 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
536 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
537 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
538 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
539 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
540 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
541 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
542 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
543 ; GFX11-NEXT: s_add_i32 s32, s32, -16
544 ; GFX11-NEXT: s_mov_b32 s33, s0
545 ; GFX11-NEXT: s_waitcnt vmcnt(0)
546 ; GFX11-NEXT: s_setpc_b64 s[30:31]
547 %s33 = call i32 asm sideeffect "; def $0", "={s33}"()
548 call amdgpu_gfx void @external_void_func_void()
549 call void asm sideeffect "; use $0", "{s33}"(i32 %s33)
553 define amdgpu_gfx void @test_call_void_func_void_preserves_s34(ptr addrspace(1) %out) #0 {
554 ; GFX9-LABEL: test_call_void_func_void_preserves_s34:
556 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
557 ; GFX9-NEXT: s_mov_b32 s34, s33
558 ; GFX9-NEXT: s_mov_b32 s33, s32
559 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
560 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
561 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
562 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
563 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
564 ; GFX9-NEXT: ;;#ASMSTART
565 ; GFX9-NEXT: ; def s34
566 ; GFX9-NEXT: ;;#ASMEND
567 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
568 ; GFX9-NEXT: s_mov_b32 s4, s34
569 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi
570 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo
571 ; GFX9-NEXT: s_addk_i32 s32, 0x400
572 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
573 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
574 ; GFX9-NEXT: s_mov_b32 s34, s4
575 ; GFX9-NEXT: ;;#ASMSTART
576 ; GFX9-NEXT: ; use s34
577 ; GFX9-NEXT: ;;#ASMEND
578 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
579 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
580 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
581 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
582 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
583 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
584 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
585 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
586 ; GFX9-NEXT: s_mov_b32 s33, s34
587 ; GFX9-NEXT: s_waitcnt vmcnt(0)
588 ; GFX9-NEXT: s_setpc_b64 s[30:31]
590 ; GFX10-LABEL: test_call_void_func_void_preserves_s34:
592 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
593 ; GFX10-NEXT: s_mov_b32 s34, s33
594 ; GFX10-NEXT: s_mov_b32 s33, s32
595 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
596 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
597 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
598 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
599 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
600 ; GFX10-NEXT: ;;#ASMSTART
601 ; GFX10-NEXT: ; def s34
602 ; GFX10-NEXT: ;;#ASMEND
603 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi
604 ; GFX10-NEXT: s_addk_i32 s32, 0x200
605 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
606 ; GFX10-NEXT: s_mov_b32 s4, s34
607 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo
608 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
609 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
610 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
611 ; GFX10-NEXT: s_mov_b32 s34, s4
612 ; GFX10-NEXT: ;;#ASMSTART
613 ; GFX10-NEXT: ; use s34
614 ; GFX10-NEXT: ;;#ASMEND
615 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
616 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
617 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
618 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
619 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
620 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
621 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
622 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
623 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
624 ; GFX10-NEXT: s_mov_b32 s33, s34
625 ; GFX10-NEXT: s_waitcnt vmcnt(0)
626 ; GFX10-NEXT: s_setpc_b64 s[30:31]
628 ; GFX11-LABEL: test_call_void_func_void_preserves_s34:
630 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
631 ; GFX11-NEXT: s_mov_b32 s0, s33
632 ; GFX11-NEXT: s_mov_b32 s33, s32
633 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
634 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
635 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
636 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
637 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi
638 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo
639 ; GFX11-NEXT: s_add_i32 s32, s32, 16
640 ; GFX11-NEXT: ;;#ASMSTART
641 ; GFX11-NEXT: ; def s34
642 ; GFX11-NEXT: ;;#ASMEND
643 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
644 ; GFX11-NEXT: s_mov_b32 s4, s34
645 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
646 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
647 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
648 ; GFX11-NEXT: s_mov_b32 s34, s4
649 ; GFX11-NEXT: ;;#ASMSTART
650 ; GFX11-NEXT: ; use s34
651 ; GFX11-NEXT: ;;#ASMEND
652 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
653 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
654 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
655 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
656 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
657 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
658 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
659 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
660 ; GFX11-NEXT: s_add_i32 s32, s32, -16
661 ; GFX11-NEXT: s_mov_b32 s33, s0
662 ; GFX11-NEXT: s_waitcnt vmcnt(0)
663 ; GFX11-NEXT: s_setpc_b64 s[30:31]
664 %s34 = call i32 asm sideeffect "; def $0", "={s34}"()
665 call amdgpu_gfx void @external_void_func_void()
666 call void asm sideeffect "; use $0", "{s34}"(i32 %s34)
670 define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) %out) #0 {
671 ; GFX9-LABEL: test_call_void_func_void_preserves_v40:
673 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
674 ; GFX9-NEXT: s_mov_b32 s34, s33
675 ; GFX9-NEXT: s_mov_b32 s33, s32
676 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
677 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
678 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
679 ; GFX9-NEXT: v_writelane_b32 v41, s34, 2
680 ; GFX9-NEXT: v_writelane_b32 v41, s30, 0
681 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi
682 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo
683 ; GFX9-NEXT: s_addk_i32 s32, 0x400
684 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
685 ; GFX9-NEXT: v_writelane_b32 v41, s31, 1
686 ; GFX9-NEXT: ;;#ASMSTART
687 ; GFX9-NEXT: ; def v40
688 ; GFX9-NEXT: ;;#ASMEND
689 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
690 ; GFX9-NEXT: ;;#ASMSTART
691 ; GFX9-NEXT: ; use v40
692 ; GFX9-NEXT: ;;#ASMEND
693 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
694 ; GFX9-NEXT: v_readlane_b32 s31, v41, 1
695 ; GFX9-NEXT: v_readlane_b32 s30, v41, 0
696 ; GFX9-NEXT: v_readlane_b32 s34, v41, 2
697 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
698 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
699 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
700 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
701 ; GFX9-NEXT: s_mov_b32 s33, s34
702 ; GFX9-NEXT: s_waitcnt vmcnt(0)
703 ; GFX9-NEXT: s_setpc_b64 s[30:31]
705 ; GFX10-LABEL: test_call_void_func_void_preserves_v40:
707 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
708 ; GFX10-NEXT: s_mov_b32 s34, s33
709 ; GFX10-NEXT: s_mov_b32 s33, s32
710 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
711 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
712 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
713 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
714 ; GFX10-NEXT: v_writelane_b32 v41, s34, 2
715 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi
716 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo
717 ; GFX10-NEXT: s_addk_i32 s32, 0x200
718 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
719 ; GFX10-NEXT: v_writelane_b32 v41, s30, 0
720 ; GFX10-NEXT: ;;#ASMSTART
721 ; GFX10-NEXT: ; def v40
722 ; GFX10-NEXT: ;;#ASMEND
723 ; GFX10-NEXT: v_writelane_b32 v41, s31, 1
724 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
725 ; GFX10-NEXT: ;;#ASMSTART
726 ; GFX10-NEXT: ; use v40
727 ; GFX10-NEXT: ;;#ASMEND
728 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
729 ; GFX10-NEXT: v_readlane_b32 s31, v41, 1
730 ; GFX10-NEXT: v_readlane_b32 s30, v41, 0
731 ; GFX10-NEXT: v_readlane_b32 s34, v41, 2
732 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
733 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
734 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
735 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
736 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
737 ; GFX10-NEXT: s_mov_b32 s33, s34
738 ; GFX10-NEXT: s_waitcnt vmcnt(0)
739 ; GFX10-NEXT: s_setpc_b64 s[30:31]
741 ; GFX11-LABEL: test_call_void_func_void_preserves_v40:
743 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
744 ; GFX11-NEXT: s_mov_b32 s0, s33
745 ; GFX11-NEXT: s_mov_b32 s33, s32
746 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
747 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill
748 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
749 ; GFX11-NEXT: v_writelane_b32 v41, s0, 2
750 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi
751 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo
752 ; GFX11-NEXT: s_add_i32 s32, s32, 16
753 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
754 ; GFX11-NEXT: v_writelane_b32 v41, s30, 0
755 ; GFX11-NEXT: ;;#ASMSTART
756 ; GFX11-NEXT: ; def v40
757 ; GFX11-NEXT: ;;#ASMEND
758 ; GFX11-NEXT: v_writelane_b32 v41, s31, 1
759 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
760 ; GFX11-NEXT: ;;#ASMSTART
761 ; GFX11-NEXT: ; use v40
762 ; GFX11-NEXT: ;;#ASMEND
763 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
764 ; GFX11-NEXT: v_readlane_b32 s31, v41, 1
765 ; GFX11-NEXT: v_readlane_b32 s30, v41, 0
766 ; GFX11-NEXT: v_readlane_b32 s0, v41, 2
767 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
768 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4 ; 4-byte Folded Reload
769 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
770 ; GFX11-NEXT: s_add_i32 s32, s32, -16
771 ; GFX11-NEXT: s_mov_b32 s33, s0
772 ; GFX11-NEXT: s_waitcnt vmcnt(0)
773 ; GFX11-NEXT: s_setpc_b64 s[30:31]
774 %v40 = call i32 asm sideeffect "; def $0", "={v40}"()
775 call amdgpu_gfx void @external_void_func_void()
776 call void asm sideeffect "; use $0", "{v40}"(i32 %v40)
780 define hidden void @void_func_void_clobber_s33() #1 {
781 ; GFX9-LABEL: void_func_void_clobber_s33:
783 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
784 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
785 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
786 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
787 ; GFX9-NEXT: v_writelane_b32 v0, s33, 0
788 ; GFX9-NEXT: ;;#ASMSTART
789 ; GFX9-NEXT: ; clobber
790 ; GFX9-NEXT: ;;#ASMEND
791 ; GFX9-NEXT: v_readlane_b32 s33, v0, 0
792 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
793 ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
794 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
795 ; GFX9-NEXT: s_waitcnt vmcnt(0)
796 ; GFX9-NEXT: s_setpc_b64 s[30:31]
798 ; GFX10-LABEL: void_func_void_clobber_s33:
800 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
801 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
802 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
803 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
804 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
805 ; GFX10-NEXT: v_writelane_b32 v0, s33, 0
806 ; GFX10-NEXT: ;;#ASMSTART
807 ; GFX10-NEXT: ; clobber
808 ; GFX10-NEXT: ;;#ASMEND
809 ; GFX10-NEXT: v_readlane_b32 s33, v0, 0
810 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
811 ; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
812 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
813 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
814 ; GFX10-NEXT: s_waitcnt vmcnt(0)
815 ; GFX10-NEXT: s_setpc_b64 s[30:31]
817 ; GFX11-LABEL: void_func_void_clobber_s33:
819 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
820 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
821 ; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
822 ; GFX11-NEXT: s_mov_b32 exec_lo, s0
823 ; GFX11-NEXT: v_writelane_b32 v0, s33, 0
824 ; GFX11-NEXT: ;;#ASMSTART
825 ; GFX11-NEXT: ; clobber
826 ; GFX11-NEXT: ;;#ASMEND
827 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
828 ; GFX11-NEXT: v_readlane_b32 s33, v0, 0
829 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
830 ; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
831 ; GFX11-NEXT: s_mov_b32 exec_lo, s0
832 ; GFX11-NEXT: s_waitcnt vmcnt(0)
833 ; GFX11-NEXT: s_setpc_b64 s[30:31]
834 call void asm sideeffect "; clobber", "~{s33}"() #0
838 define hidden void @void_func_void_clobber_s34() #1 {
839 ; GFX9-LABEL: void_func_void_clobber_s34:
841 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
842 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
843 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
844 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
845 ; GFX9-NEXT: v_writelane_b32 v0, s34, 0
846 ; GFX9-NEXT: ;;#ASMSTART
847 ; GFX9-NEXT: ; clobber
848 ; GFX9-NEXT: ;;#ASMEND
849 ; GFX9-NEXT: v_readlane_b32 s34, v0, 0
850 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
851 ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
852 ; GFX9-NEXT: s_mov_b64 exec, s[4:5]
853 ; GFX9-NEXT: s_waitcnt vmcnt(0)
854 ; GFX9-NEXT: s_setpc_b64 s[30:31]
856 ; GFX10-LABEL: void_func_void_clobber_s34:
858 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
859 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
860 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
861 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
862 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
863 ; GFX10-NEXT: v_writelane_b32 v0, s34, 0
864 ; GFX10-NEXT: ;;#ASMSTART
865 ; GFX10-NEXT: ; clobber
866 ; GFX10-NEXT: ;;#ASMEND
867 ; GFX10-NEXT: v_readlane_b32 s34, v0, 0
868 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
869 ; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
870 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
871 ; GFX10-NEXT: s_mov_b32 exec_lo, s4
872 ; GFX10-NEXT: s_waitcnt vmcnt(0)
873 ; GFX10-NEXT: s_setpc_b64 s[30:31]
875 ; GFX11-LABEL: void_func_void_clobber_s34:
877 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
878 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
879 ; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
880 ; GFX11-NEXT: s_mov_b32 exec_lo, s0
881 ; GFX11-NEXT: v_writelane_b32 v0, s34, 0
882 ; GFX11-NEXT: ;;#ASMSTART
883 ; GFX11-NEXT: ; clobber
884 ; GFX11-NEXT: ;;#ASMEND
885 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
886 ; GFX11-NEXT: v_readlane_b32 s34, v0, 0
887 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
888 ; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
889 ; GFX11-NEXT: s_mov_b32 exec_lo, s0
890 ; GFX11-NEXT: s_waitcnt vmcnt(0)
891 ; GFX11-NEXT: s_setpc_b64 s[30:31]
892 call void asm sideeffect "; clobber", "~{s34}"() #0
896 define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 {
897 ; GFX9-LABEL: test_call_void_func_void_clobber_s33:
899 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
900 ; GFX9-NEXT: s_mov_b32 s34, s33
901 ; GFX9-NEXT: s_mov_b32 s33, s32
902 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
903 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
904 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
905 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
906 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
907 ; GFX9-NEXT: s_mov_b32 s35, void_func_void_clobber_s33@abs32@hi
908 ; GFX9-NEXT: s_mov_b32 s34, void_func_void_clobber_s33@abs32@lo
909 ; GFX9-NEXT: s_addk_i32 s32, 0x400
910 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
911 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
912 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
913 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
914 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
915 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
916 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
917 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
918 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
919 ; GFX9-NEXT: s_mov_b32 s33, s34
920 ; GFX9-NEXT: s_waitcnt vmcnt(0)
921 ; GFX9-NEXT: s_setpc_b64 s[30:31]
923 ; GFX10-LABEL: test_call_void_func_void_clobber_s33:
925 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
926 ; GFX10-NEXT: s_mov_b32 s34, s33
927 ; GFX10-NEXT: s_mov_b32 s33, s32
928 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
929 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
930 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
931 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
932 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
933 ; GFX10-NEXT: s_mov_b32 s35, void_func_void_clobber_s33@abs32@hi
934 ; GFX10-NEXT: s_mov_b32 s34, void_func_void_clobber_s33@abs32@lo
935 ; GFX10-NEXT: s_addk_i32 s32, 0x200
936 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
937 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
938 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
939 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
940 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
941 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
942 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
943 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
944 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
945 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
946 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
947 ; GFX10-NEXT: s_mov_b32 s33, s34
948 ; GFX10-NEXT: s_waitcnt vmcnt(0)
949 ; GFX10-NEXT: s_setpc_b64 s[30:31]
951 ; GFX11-LABEL: test_call_void_func_void_clobber_s33:
953 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
954 ; GFX11-NEXT: s_mov_b32 s0, s33
955 ; GFX11-NEXT: s_mov_b32 s33, s32
956 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
957 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
958 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
959 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
960 ; GFX11-NEXT: s_mov_b32 s1, void_func_void_clobber_s33@abs32@hi
961 ; GFX11-NEXT: s_mov_b32 s0, void_func_void_clobber_s33@abs32@lo
962 ; GFX11-NEXT: s_add_i32 s32, s32, 16
963 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
964 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
965 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
966 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
967 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
968 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
969 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
970 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
971 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
972 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
973 ; GFX11-NEXT: s_add_i32 s32, s32, -16
974 ; GFX11-NEXT: s_mov_b32 s33, s0
975 ; GFX11-NEXT: s_waitcnt vmcnt(0)
976 ; GFX11-NEXT: s_setpc_b64 s[30:31]
977 call amdgpu_gfx void @void_func_void_clobber_s33()
981 define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 {
982 ; GFX9-LABEL: test_call_void_func_void_clobber_s34:
984 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
985 ; GFX9-NEXT: s_mov_b32 s34, s33
986 ; GFX9-NEXT: s_mov_b32 s33, s32
987 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
988 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
989 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
990 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2
991 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0
992 ; GFX9-NEXT: s_mov_b32 s35, void_func_void_clobber_s34@abs32@hi
993 ; GFX9-NEXT: s_mov_b32 s34, void_func_void_clobber_s34@abs32@lo
994 ; GFX9-NEXT: s_addk_i32 s32, 0x400
995 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
996 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
997 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1
998 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0
999 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2
1000 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1001 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1002 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1003 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1004 ; GFX9-NEXT: s_mov_b32 s33, s34
1005 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1006 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1008 ; GFX10-LABEL: test_call_void_func_void_clobber_s34:
1010 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1011 ; GFX10-NEXT: s_mov_b32 s34, s33
1012 ; GFX10-NEXT: s_mov_b32 s33, s32
1013 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1014 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1015 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1016 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1017 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2
1018 ; GFX10-NEXT: s_mov_b32 s35, void_func_void_clobber_s34@abs32@hi
1019 ; GFX10-NEXT: s_mov_b32 s34, void_func_void_clobber_s34@abs32@lo
1020 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1021 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0
1022 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1
1023 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1024 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1
1025 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0
1026 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2
1027 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1028 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1029 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1030 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1031 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1032 ; GFX10-NEXT: s_mov_b32 s33, s34
1033 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1034 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1036 ; GFX11-LABEL: test_call_void_func_void_clobber_s34:
1038 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1039 ; GFX11-NEXT: s_mov_b32 s0, s33
1040 ; GFX11-NEXT: s_mov_b32 s33, s32
1041 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1042 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1043 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1044 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2
1045 ; GFX11-NEXT: s_mov_b32 s1, void_func_void_clobber_s34@abs32@hi
1046 ; GFX11-NEXT: s_mov_b32 s0, void_func_void_clobber_s34@abs32@lo
1047 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1048 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0
1049 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
1050 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1051 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1052 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1
1053 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0
1054 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2
1055 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1056 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1057 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1058 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1059 ; GFX11-NEXT: s_mov_b32 s33, s0
1060 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1061 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1062 call amdgpu_gfx void @void_func_void_clobber_s34()
1066 define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 {
1067 ; GFX9-LABEL: callee_saved_sgpr_kernel:
1069 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1070 ; GFX9-NEXT: s_mov_b32 s34, s33
1071 ; GFX9-NEXT: s_mov_b32 s33, s32
1072 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1073 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1074 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1075 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
1076 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
1077 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
1078 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi
1079 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo
1080 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1081 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
1082 ; GFX9-NEXT: ;;#ASMSTART
1083 ; GFX9-NEXT: ; def s40
1084 ; GFX9-NEXT: ;;#ASMEND
1085 ; GFX9-NEXT: s_mov_b32 s4, s40
1086 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
1087 ; GFX9-NEXT: ;;#ASMSTART
1088 ; GFX9-NEXT: ; use s4
1089 ; GFX9-NEXT: ;;#ASMEND
1090 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
1091 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
1092 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
1093 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
1094 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1095 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1096 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1097 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1098 ; GFX9-NEXT: s_mov_b32 s33, s34
1099 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1100 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1102 ; GFX10-LABEL: callee_saved_sgpr_kernel:
1104 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1105 ; GFX10-NEXT: s_mov_b32 s34, s33
1106 ; GFX10-NEXT: s_mov_b32 s33, s32
1107 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1108 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1109 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1110 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1111 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
1112 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi
1113 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo
1114 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1115 ; GFX10-NEXT: ;;#ASMSTART
1116 ; GFX10-NEXT: ; def s40
1117 ; GFX10-NEXT: ;;#ASMEND
1118 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
1119 ; GFX10-NEXT: s_mov_b32 s4, s40
1120 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
1121 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
1122 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1123 ; GFX10-NEXT: ;;#ASMSTART
1124 ; GFX10-NEXT: ; use s4
1125 ; GFX10-NEXT: ;;#ASMEND
1126 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
1127 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
1128 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
1129 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
1130 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1131 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1132 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1133 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1134 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1135 ; GFX10-NEXT: s_mov_b32 s33, s34
1136 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1137 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1139 ; GFX11-LABEL: callee_saved_sgpr_kernel:
1141 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1142 ; GFX11-NEXT: s_mov_b32 s0, s33
1143 ; GFX11-NEXT: s_mov_b32 s33, s32
1144 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1145 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1146 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1147 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
1148 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi
1149 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo
1150 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1151 ; GFX11-NEXT: ;;#ASMSTART
1152 ; GFX11-NEXT: ; def s40
1153 ; GFX11-NEXT: ;;#ASMEND
1154 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
1155 ; GFX11-NEXT: s_mov_b32 s4, s40
1156 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
1157 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
1158 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1159 ; GFX11-NEXT: ;;#ASMSTART
1160 ; GFX11-NEXT: ; use s4
1161 ; GFX11-NEXT: ;;#ASMEND
1162 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1163 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
1164 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
1165 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
1166 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
1167 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1168 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1169 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1170 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1171 ; GFX11-NEXT: s_mov_b32 s33, s0
1172 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1173 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1174 %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
1175 call amdgpu_gfx void @external_void_func_void()
1176 call void asm sideeffect "; use $0", "s"(i32 %s40) #0
1180 define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 {
1181 ; GFX9-LABEL: callee_saved_sgpr_vgpr_kernel:
1183 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1184 ; GFX9-NEXT: s_mov_b32 s34, s33
1185 ; GFX9-NEXT: s_mov_b32 s33, s32
1186 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1187 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
1188 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1189 ; GFX9-NEXT: v_writelane_b32 v40, s34, 3
1190 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0
1191 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1
1192 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi
1193 ; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo
1194 ; GFX9-NEXT: s_addk_i32 s32, 0x400
1195 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
1196 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2
1197 ; GFX9-NEXT: ;;#ASMSTART
1198 ; GFX9-NEXT: ; def s40
1199 ; GFX9-NEXT: ;;#ASMEND
1200 ; GFX9-NEXT: s_mov_b32 s4, s40
1201 ; GFX9-NEXT: ;;#ASMSTART
1202 ; GFX9-NEXT: ; def v32
1203 ; GFX9-NEXT: ;;#ASMEND
1204 ; GFX9-NEXT: v_mov_b32_e32 v41, v32
1205 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
1206 ; GFX9-NEXT: ;;#ASMSTART
1207 ; GFX9-NEXT: ; use s4
1208 ; GFX9-NEXT: ;;#ASMEND
1209 ; GFX9-NEXT: ;;#ASMSTART
1210 ; GFX9-NEXT: ; use v41
1211 ; GFX9-NEXT: ;;#ASMEND
1212 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
1213 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2
1214 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1
1215 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0
1216 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3
1217 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
1218 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
1219 ; GFX9-NEXT: s_mov_b64 exec, s[36:37]
1220 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
1221 ; GFX9-NEXT: s_mov_b32 s33, s34
1222 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1223 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1225 ; GFX10-LABEL: callee_saved_sgpr_vgpr_kernel:
1227 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1228 ; GFX10-NEXT: s_mov_b32 s34, s33
1229 ; GFX10-NEXT: s_mov_b32 s33, s32
1230 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1231 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
1232 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1233 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1234 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3
1235 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi
1236 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo
1237 ; GFX10-NEXT: s_addk_i32 s32, 0x200
1238 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
1239 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0
1240 ; GFX10-NEXT: ;;#ASMSTART
1241 ; GFX10-NEXT: ; def s40
1242 ; GFX10-NEXT: ;;#ASMEND
1243 ; GFX10-NEXT: s_mov_b32 s4, s40
1244 ; GFX10-NEXT: ;;#ASMSTART
1245 ; GFX10-NEXT: ; def v32
1246 ; GFX10-NEXT: ;;#ASMEND
1247 ; GFX10-NEXT: v_mov_b32_e32 v41, v32
1248 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1
1249 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2
1250 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
1251 ; GFX10-NEXT: ;;#ASMSTART
1252 ; GFX10-NEXT: ; use s4
1253 ; GFX10-NEXT: ;;#ASMEND
1254 ; GFX10-NEXT: ;;#ASMSTART
1255 ; GFX10-NEXT: ; use v41
1256 ; GFX10-NEXT: ;;#ASMEND
1257 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
1258 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2
1259 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1
1260 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0
1261 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3
1262 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1
1263 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
1264 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
1265 ; GFX10-NEXT: s_mov_b32 exec_lo, s35
1266 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
1267 ; GFX10-NEXT: s_mov_b32 s33, s34
1268 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1269 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1271 ; GFX11-LABEL: callee_saved_sgpr_vgpr_kernel:
1273 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1274 ; GFX11-NEXT: s_mov_b32 s0, s33
1275 ; GFX11-NEXT: s_mov_b32 s33, s32
1276 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1277 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; 4-byte Folded Spill
1278 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1279 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3
1280 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi
1281 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo
1282 ; GFX11-NEXT: s_add_i32 s32, s32, 16
1283 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 ; 4-byte Folded Spill
1284 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0
1285 ; GFX11-NEXT: ;;#ASMSTART
1286 ; GFX11-NEXT: ; def s40
1287 ; GFX11-NEXT: ;;#ASMEND
1288 ; GFX11-NEXT: s_mov_b32 s4, s40
1289 ; GFX11-NEXT: ;;#ASMSTART
1290 ; GFX11-NEXT: ; def v32
1291 ; GFX11-NEXT: ;;#ASMEND
1292 ; GFX11-NEXT: v_mov_b32_e32 v41, v32
1293 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1
1294 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2
1295 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
1296 ; GFX11-NEXT: ;;#ASMSTART
1297 ; GFX11-NEXT: ; use s4
1298 ; GFX11-NEXT: ;;#ASMEND
1299 ; GFX11-NEXT: ;;#ASMSTART
1300 ; GFX11-NEXT: ; use v41
1301 ; GFX11-NEXT: ;;#ASMEND
1302 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 ; 4-byte Folded Reload
1303 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2
1304 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1
1305 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0
1306 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3
1307 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1
1308 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:4 ; 4-byte Folded Reload
1309 ; GFX11-NEXT: s_mov_b32 exec_lo, s1
1310 ; GFX11-NEXT: s_add_i32 s32, s32, -16
1311 ; GFX11-NEXT: s_mov_b32 s33, s0
1312 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1313 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1314 %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
1315 %v32 = call i32 asm sideeffect "; def v32", "={v32}"() #0
1316 call amdgpu_gfx void @external_void_func_void()
1317 call void asm sideeffect "; use $0", "s"(i32 %s40) #0
1318 call void asm sideeffect "; use $0", "v"(i32 %v32) #0
1322 attributes #0 = { nounwind }
1323 attributes #1 = { nounwind noinline }