1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -O0 -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
4 ; FIXME: we should disable sdwa peephole because dead-code elimination, that
5 ; runs after peephole, ruins this test (different register numbers)
7 ; Spill all SGPRs so multiple VGPRs are required for spilling all of them.
9 ; Ideally we only need 2 VGPRs for all spilling. The VGPRs are
10 ; allocated per-frame index, so it's possible to get up with more.
11 define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out, i32 %in) #0 {
12 ; GCN-LABEL: spill_sgprs_to_multiple_vgprs:
14 ; GCN-NEXT: s_mov_b32 s92, SCRATCH_RSRC_DWORD0
15 ; GCN-NEXT: s_mov_b32 s93, SCRATCH_RSRC_DWORD1
16 ; GCN-NEXT: s_mov_b32 s94, -1
17 ; GCN-NEXT: s_mov_b32 s95, 0xe8f000
18 ; GCN-NEXT: s_add_u32 s92, s92, s11
19 ; GCN-NEXT: s_addc_u32 s93, s93, 0
20 ; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
21 ; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
22 ; GCN-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
23 ; GCN-NEXT: s_load_dword s0, s[4:5], 0xb
24 ; GCN-NEXT: ;;#ASMSTART
25 ; GCN-NEXT: ; def s[4:11]
27 ; GCN-NEXT: v_writelane_b32 v2, s4, 0
28 ; GCN-NEXT: v_writelane_b32 v2, s5, 1
29 ; GCN-NEXT: v_writelane_b32 v2, s6, 2
30 ; GCN-NEXT: v_writelane_b32 v2, s7, 3
31 ; GCN-NEXT: v_writelane_b32 v2, s8, 4
32 ; GCN-NEXT: v_writelane_b32 v2, s9, 5
33 ; GCN-NEXT: v_writelane_b32 v2, s10, 6
34 ; GCN-NEXT: v_writelane_b32 v2, s11, 7
35 ; GCN-NEXT: ;;#ASMSTART
36 ; GCN-NEXT: ; def s[4:11]
38 ; GCN-NEXT: v_writelane_b32 v2, s4, 8
39 ; GCN-NEXT: v_writelane_b32 v2, s5, 9
40 ; GCN-NEXT: v_writelane_b32 v2, s6, 10
41 ; GCN-NEXT: v_writelane_b32 v2, s7, 11
42 ; GCN-NEXT: v_writelane_b32 v2, s8, 12
43 ; GCN-NEXT: v_writelane_b32 v2, s9, 13
44 ; GCN-NEXT: v_writelane_b32 v2, s10, 14
45 ; GCN-NEXT: v_writelane_b32 v2, s11, 15
46 ; GCN-NEXT: ;;#ASMSTART
47 ; GCN-NEXT: ; def s[4:11]
49 ; GCN-NEXT: v_writelane_b32 v2, s4, 16
50 ; GCN-NEXT: v_writelane_b32 v2, s5, 17
51 ; GCN-NEXT: v_writelane_b32 v2, s6, 18
52 ; GCN-NEXT: v_writelane_b32 v2, s7, 19
53 ; GCN-NEXT: v_writelane_b32 v2, s8, 20
54 ; GCN-NEXT: v_writelane_b32 v2, s9, 21
55 ; GCN-NEXT: v_writelane_b32 v2, s10, 22
56 ; GCN-NEXT: v_writelane_b32 v2, s11, 23
57 ; GCN-NEXT: ;;#ASMSTART
58 ; GCN-NEXT: ; def s[4:11]
60 ; GCN-NEXT: v_writelane_b32 v2, s4, 24
61 ; GCN-NEXT: v_writelane_b32 v2, s5, 25
62 ; GCN-NEXT: v_writelane_b32 v2, s6, 26
63 ; GCN-NEXT: v_writelane_b32 v2, s7, 27
64 ; GCN-NEXT: v_writelane_b32 v2, s8, 28
65 ; GCN-NEXT: v_writelane_b32 v2, s9, 29
66 ; GCN-NEXT: v_writelane_b32 v2, s10, 30
67 ; GCN-NEXT: v_writelane_b32 v2, s11, 31
68 ; GCN-NEXT: ;;#ASMSTART
69 ; GCN-NEXT: ; def s[4:11]
71 ; GCN-NEXT: v_writelane_b32 v2, s4, 32
72 ; GCN-NEXT: v_writelane_b32 v2, s5, 33
73 ; GCN-NEXT: v_writelane_b32 v2, s6, 34
74 ; GCN-NEXT: v_writelane_b32 v2, s7, 35
75 ; GCN-NEXT: v_writelane_b32 v2, s8, 36
76 ; GCN-NEXT: v_writelane_b32 v2, s9, 37
77 ; GCN-NEXT: v_writelane_b32 v2, s10, 38
78 ; GCN-NEXT: v_writelane_b32 v2, s11, 39
79 ; GCN-NEXT: ;;#ASMSTART
80 ; GCN-NEXT: ; def s[4:11]
82 ; GCN-NEXT: v_writelane_b32 v2, s4, 40
83 ; GCN-NEXT: v_writelane_b32 v2, s5, 41
84 ; GCN-NEXT: v_writelane_b32 v2, s6, 42
85 ; GCN-NEXT: v_writelane_b32 v2, s7, 43
86 ; GCN-NEXT: v_writelane_b32 v2, s8, 44
87 ; GCN-NEXT: v_writelane_b32 v2, s9, 45
88 ; GCN-NEXT: v_writelane_b32 v2, s10, 46
89 ; GCN-NEXT: v_writelane_b32 v2, s11, 47
90 ; GCN-NEXT: ;;#ASMSTART
91 ; GCN-NEXT: ; def s[4:11]
93 ; GCN-NEXT: v_writelane_b32 v2, s4, 48
94 ; GCN-NEXT: v_writelane_b32 v2, s5, 49
95 ; GCN-NEXT: v_writelane_b32 v2, s6, 50
96 ; GCN-NEXT: v_writelane_b32 v2, s7, 51
97 ; GCN-NEXT: v_writelane_b32 v2, s8, 52
98 ; GCN-NEXT: v_writelane_b32 v2, s9, 53
99 ; GCN-NEXT: v_writelane_b32 v2, s10, 54
100 ; GCN-NEXT: v_writelane_b32 v2, s11, 55
101 ; GCN-NEXT: ;;#ASMSTART
102 ; GCN-NEXT: ; def s[4:11]
103 ; GCN-NEXT: ;;#ASMEND
104 ; GCN-NEXT: v_writelane_b32 v2, s4, 56
105 ; GCN-NEXT: v_writelane_b32 v2, s5, 57
106 ; GCN-NEXT: v_writelane_b32 v2, s6, 58
107 ; GCN-NEXT: v_writelane_b32 v2, s7, 59
108 ; GCN-NEXT: v_writelane_b32 v2, s8, 60
109 ; GCN-NEXT: v_writelane_b32 v2, s9, 61
110 ; GCN-NEXT: v_writelane_b32 v2, s10, 62
111 ; GCN-NEXT: v_writelane_b32 v2, s11, 63
112 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
113 ; GCN-NEXT: buffer_store_dword v2, off, s[92:95], 0 offset:12 ; 4-byte Folded Spill
114 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
115 ; GCN-NEXT: ;;#ASMSTART
116 ; GCN-NEXT: ; def s[4:11]
117 ; GCN-NEXT: ;;#ASMEND
118 ; GCN-NEXT: v_writelane_b32 v1, s4, 0
119 ; GCN-NEXT: v_writelane_b32 v1, s5, 1
120 ; GCN-NEXT: v_writelane_b32 v1, s6, 2
121 ; GCN-NEXT: v_writelane_b32 v1, s7, 3
122 ; GCN-NEXT: v_writelane_b32 v1, s8, 4
123 ; GCN-NEXT: v_writelane_b32 v1, s9, 5
124 ; GCN-NEXT: v_writelane_b32 v1, s10, 6
125 ; GCN-NEXT: v_writelane_b32 v1, s11, 7
126 ; GCN-NEXT: ;;#ASMSTART
127 ; GCN-NEXT: ; def s[4:11]
128 ; GCN-NEXT: ;;#ASMEND
129 ; GCN-NEXT: v_writelane_b32 v1, s4, 8
130 ; GCN-NEXT: v_writelane_b32 v1, s5, 9
131 ; GCN-NEXT: v_writelane_b32 v1, s6, 10
132 ; GCN-NEXT: v_writelane_b32 v1, s7, 11
133 ; GCN-NEXT: v_writelane_b32 v1, s8, 12
134 ; GCN-NEXT: v_writelane_b32 v1, s9, 13
135 ; GCN-NEXT: v_writelane_b32 v1, s10, 14
136 ; GCN-NEXT: v_writelane_b32 v1, s11, 15
137 ; GCN-NEXT: ;;#ASMSTART
138 ; GCN-NEXT: ; def s[4:11]
139 ; GCN-NEXT: ;;#ASMEND
140 ; GCN-NEXT: v_writelane_b32 v1, s4, 16
141 ; GCN-NEXT: v_writelane_b32 v1, s5, 17
142 ; GCN-NEXT: v_writelane_b32 v1, s6, 18
143 ; GCN-NEXT: v_writelane_b32 v1, s7, 19
144 ; GCN-NEXT: v_writelane_b32 v1, s8, 20
145 ; GCN-NEXT: v_writelane_b32 v1, s9, 21
146 ; GCN-NEXT: v_writelane_b32 v1, s10, 22
147 ; GCN-NEXT: v_writelane_b32 v1, s11, 23
148 ; GCN-NEXT: ;;#ASMSTART
149 ; GCN-NEXT: ; def s[4:11]
150 ; GCN-NEXT: ;;#ASMEND
151 ; GCN-NEXT: v_writelane_b32 v1, s4, 24
152 ; GCN-NEXT: v_writelane_b32 v1, s5, 25
153 ; GCN-NEXT: v_writelane_b32 v1, s6, 26
154 ; GCN-NEXT: v_writelane_b32 v1, s7, 27
155 ; GCN-NEXT: v_writelane_b32 v1, s8, 28
156 ; GCN-NEXT: v_writelane_b32 v1, s9, 29
157 ; GCN-NEXT: v_writelane_b32 v1, s10, 30
158 ; GCN-NEXT: v_writelane_b32 v1, s11, 31
159 ; GCN-NEXT: ;;#ASMSTART
160 ; GCN-NEXT: ; def s[4:11]
161 ; GCN-NEXT: ;;#ASMEND
162 ; GCN-NEXT: v_writelane_b32 v1, s4, 32
163 ; GCN-NEXT: v_writelane_b32 v1, s5, 33
164 ; GCN-NEXT: v_writelane_b32 v1, s6, 34
165 ; GCN-NEXT: v_writelane_b32 v1, s7, 35
166 ; GCN-NEXT: v_writelane_b32 v1, s8, 36
167 ; GCN-NEXT: v_writelane_b32 v1, s9, 37
168 ; GCN-NEXT: v_writelane_b32 v1, s10, 38
169 ; GCN-NEXT: v_writelane_b32 v1, s11, 39
170 ; GCN-NEXT: ;;#ASMSTART
171 ; GCN-NEXT: ; def s[4:11]
172 ; GCN-NEXT: ;;#ASMEND
173 ; GCN-NEXT: v_writelane_b32 v1, s4, 40
174 ; GCN-NEXT: v_writelane_b32 v1, s5, 41
175 ; GCN-NEXT: v_writelane_b32 v1, s6, 42
176 ; GCN-NEXT: v_writelane_b32 v1, s7, 43
177 ; GCN-NEXT: v_writelane_b32 v1, s8, 44
178 ; GCN-NEXT: v_writelane_b32 v1, s9, 45
179 ; GCN-NEXT: v_writelane_b32 v1, s10, 46
180 ; GCN-NEXT: v_writelane_b32 v1, s11, 47
181 ; GCN-NEXT: ;;#ASMSTART
182 ; GCN-NEXT: ; def s[4:11]
183 ; GCN-NEXT: ;;#ASMEND
184 ; GCN-NEXT: v_writelane_b32 v1, s4, 48
185 ; GCN-NEXT: v_writelane_b32 v1, s5, 49
186 ; GCN-NEXT: v_writelane_b32 v1, s6, 50
187 ; GCN-NEXT: v_writelane_b32 v1, s7, 51
188 ; GCN-NEXT: v_writelane_b32 v1, s8, 52
189 ; GCN-NEXT: v_writelane_b32 v1, s9, 53
190 ; GCN-NEXT: v_writelane_b32 v1, s10, 54
191 ; GCN-NEXT: v_writelane_b32 v1, s11, 55
192 ; GCN-NEXT: ;;#ASMSTART
193 ; GCN-NEXT: ; def s[4:11]
194 ; GCN-NEXT: ;;#ASMEND
195 ; GCN-NEXT: v_writelane_b32 v1, s4, 56
196 ; GCN-NEXT: v_writelane_b32 v1, s5, 57
197 ; GCN-NEXT: v_writelane_b32 v1, s6, 58
198 ; GCN-NEXT: v_writelane_b32 v1, s7, 59
199 ; GCN-NEXT: v_writelane_b32 v1, s8, 60
200 ; GCN-NEXT: v_writelane_b32 v1, s9, 61
201 ; GCN-NEXT: v_writelane_b32 v1, s10, 62
202 ; GCN-NEXT: v_writelane_b32 v1, s11, 63
203 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
204 ; GCN-NEXT: buffer_store_dword v1, off, s[92:95], 0 offset:8 ; 4-byte Folded Spill
205 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
206 ; GCN-NEXT: ;;#ASMSTART
207 ; GCN-NEXT: ; def s[4:11]
208 ; GCN-NEXT: ;;#ASMEND
209 ; GCN-NEXT: v_writelane_b32 v0, s4, 0
210 ; GCN-NEXT: v_writelane_b32 v0, s5, 1
211 ; GCN-NEXT: v_writelane_b32 v0, s6, 2
212 ; GCN-NEXT: v_writelane_b32 v0, s7, 3
213 ; GCN-NEXT: v_writelane_b32 v0, s8, 4
214 ; GCN-NEXT: v_writelane_b32 v0, s9, 5
215 ; GCN-NEXT: v_writelane_b32 v0, s10, 6
216 ; GCN-NEXT: v_writelane_b32 v0, s11, 7
217 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
218 ; GCN-NEXT: buffer_store_dword v0, off, s[92:95], 0 offset:4 ; 4-byte Folded Spill
219 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
220 ; GCN-NEXT: s_mov_b32 s1, 0
221 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
222 ; GCN-NEXT: s_cmp_lg_u32 s0, s1
223 ; GCN-NEXT: s_cbranch_scc1 .LBB0_2
224 ; GCN-NEXT: ; %bb.1: ; %bb0
225 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
226 ; GCN-NEXT: buffer_load_dword v2, off, s[92:95], 0 offset:8 ; 4-byte Folded Reload
227 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
228 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
229 ; GCN-NEXT: buffer_load_dword v1, off, s[92:95], 0 offset:12 ; 4-byte Folded Reload
230 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
231 ; GCN-NEXT: s_waitcnt vmcnt(1)
232 ; GCN-NEXT: v_readlane_b32 s8, v2, 56
233 ; GCN-NEXT: v_readlane_b32 s9, v2, 57
234 ; GCN-NEXT: v_readlane_b32 s10, v2, 58
235 ; GCN-NEXT: v_readlane_b32 s11, v2, 59
236 ; GCN-NEXT: v_readlane_b32 s12, v2, 60
237 ; GCN-NEXT: v_readlane_b32 s13, v2, 61
238 ; GCN-NEXT: v_readlane_b32 s14, v2, 62
239 ; GCN-NEXT: v_readlane_b32 s15, v2, 63
240 ; GCN-NEXT: v_readlane_b32 s16, v2, 48
241 ; GCN-NEXT: v_readlane_b32 s17, v2, 49
242 ; GCN-NEXT: v_readlane_b32 s18, v2, 50
243 ; GCN-NEXT: v_readlane_b32 s19, v2, 51
244 ; GCN-NEXT: v_readlane_b32 s20, v2, 52
245 ; GCN-NEXT: v_readlane_b32 s21, v2, 53
246 ; GCN-NEXT: v_readlane_b32 s22, v2, 54
247 ; GCN-NEXT: v_readlane_b32 s23, v2, 55
248 ; GCN-NEXT: v_readlane_b32 s24, v2, 40
249 ; GCN-NEXT: v_readlane_b32 s25, v2, 41
250 ; GCN-NEXT: v_readlane_b32 s26, v2, 42
251 ; GCN-NEXT: v_readlane_b32 s27, v2, 43
252 ; GCN-NEXT: v_readlane_b32 s28, v2, 44
253 ; GCN-NEXT: v_readlane_b32 s29, v2, 45
254 ; GCN-NEXT: v_readlane_b32 s30, v2, 46
255 ; GCN-NEXT: v_readlane_b32 s31, v2, 47
256 ; GCN-NEXT: v_readlane_b32 s36, v2, 32
257 ; GCN-NEXT: v_readlane_b32 s37, v2, 33
258 ; GCN-NEXT: v_readlane_b32 s38, v2, 34
259 ; GCN-NEXT: v_readlane_b32 s39, v2, 35
260 ; GCN-NEXT: v_readlane_b32 s40, v2, 36
261 ; GCN-NEXT: v_readlane_b32 s41, v2, 37
262 ; GCN-NEXT: v_readlane_b32 s42, v2, 38
263 ; GCN-NEXT: v_readlane_b32 s43, v2, 39
264 ; GCN-NEXT: v_readlane_b32 s44, v2, 24
265 ; GCN-NEXT: v_readlane_b32 s45, v2, 25
266 ; GCN-NEXT: v_readlane_b32 s46, v2, 26
267 ; GCN-NEXT: v_readlane_b32 s47, v2, 27
268 ; GCN-NEXT: v_readlane_b32 s48, v2, 28
269 ; GCN-NEXT: v_readlane_b32 s49, v2, 29
270 ; GCN-NEXT: v_readlane_b32 s50, v2, 30
271 ; GCN-NEXT: v_readlane_b32 s51, v2, 31
272 ; GCN-NEXT: v_readlane_b32 s52, v2, 16
273 ; GCN-NEXT: v_readlane_b32 s53, v2, 17
274 ; GCN-NEXT: v_readlane_b32 s54, v2, 18
275 ; GCN-NEXT: v_readlane_b32 s55, v2, 19
276 ; GCN-NEXT: v_readlane_b32 s56, v2, 20
277 ; GCN-NEXT: v_readlane_b32 s57, v2, 21
278 ; GCN-NEXT: v_readlane_b32 s58, v2, 22
279 ; GCN-NEXT: v_readlane_b32 s59, v2, 23
280 ; GCN-NEXT: v_readlane_b32 s60, v2, 8
281 ; GCN-NEXT: v_readlane_b32 s61, v2, 9
282 ; GCN-NEXT: v_readlane_b32 s62, v2, 10
283 ; GCN-NEXT: v_readlane_b32 s63, v2, 11
284 ; GCN-NEXT: v_readlane_b32 s64, v2, 12
285 ; GCN-NEXT: v_readlane_b32 s65, v2, 13
286 ; GCN-NEXT: v_readlane_b32 s66, v2, 14
287 ; GCN-NEXT: v_readlane_b32 s67, v2, 15
288 ; GCN-NEXT: v_readlane_b32 s68, v2, 0
289 ; GCN-NEXT: v_readlane_b32 s69, v2, 1
290 ; GCN-NEXT: v_readlane_b32 s70, v2, 2
291 ; GCN-NEXT: v_readlane_b32 s71, v2, 3
292 ; GCN-NEXT: v_readlane_b32 s72, v2, 4
293 ; GCN-NEXT: v_readlane_b32 s73, v2, 5
294 ; GCN-NEXT: v_readlane_b32 s74, v2, 6
295 ; GCN-NEXT: v_readlane_b32 s75, v2, 7
296 ; GCN-NEXT: s_waitcnt vmcnt(0)
297 ; GCN-NEXT: v_readlane_b32 s76, v1, 56
298 ; GCN-NEXT: v_readlane_b32 s77, v1, 57
299 ; GCN-NEXT: v_readlane_b32 s78, v1, 58
300 ; GCN-NEXT: v_readlane_b32 s79, v1, 59
301 ; GCN-NEXT: v_readlane_b32 s80, v1, 60
302 ; GCN-NEXT: v_readlane_b32 s81, v1, 61
303 ; GCN-NEXT: v_readlane_b32 s82, v1, 62
304 ; GCN-NEXT: v_readlane_b32 s83, v1, 63
305 ; GCN-NEXT: v_readlane_b32 s84, v1, 48
306 ; GCN-NEXT: v_readlane_b32 s85, v1, 49
307 ; GCN-NEXT: v_readlane_b32 s86, v1, 50
308 ; GCN-NEXT: v_readlane_b32 s87, v1, 51
309 ; GCN-NEXT: v_readlane_b32 s88, v1, 52
310 ; GCN-NEXT: v_readlane_b32 s89, v1, 53
311 ; GCN-NEXT: v_readlane_b32 s90, v1, 54
312 ; GCN-NEXT: v_readlane_b32 s91, v1, 55
313 ; GCN-NEXT: v_readlane_b32 s0, v1, 0
314 ; GCN-NEXT: v_readlane_b32 s1, v1, 1
315 ; GCN-NEXT: v_readlane_b32 s2, v1, 2
316 ; GCN-NEXT: v_readlane_b32 s3, v1, 3
317 ; GCN-NEXT: v_readlane_b32 s4, v1, 4
318 ; GCN-NEXT: v_readlane_b32 s5, v1, 5
319 ; GCN-NEXT: v_readlane_b32 s6, v1, 6
320 ; GCN-NEXT: v_readlane_b32 s7, v1, 7
321 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
322 ; GCN-NEXT: buffer_load_dword v0, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload
323 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
324 ; GCN-NEXT: ;;#ASMSTART
325 ; GCN-NEXT: ; use s[0:7]
326 ; GCN-NEXT: ;;#ASMEND
327 ; GCN-NEXT: v_readlane_b32 s0, v1, 8
328 ; GCN-NEXT: v_readlane_b32 s1, v1, 9
329 ; GCN-NEXT: v_readlane_b32 s2, v1, 10
330 ; GCN-NEXT: v_readlane_b32 s3, v1, 11
331 ; GCN-NEXT: v_readlane_b32 s4, v1, 12
332 ; GCN-NEXT: v_readlane_b32 s5, v1, 13
333 ; GCN-NEXT: v_readlane_b32 s6, v1, 14
334 ; GCN-NEXT: v_readlane_b32 s7, v1, 15
335 ; GCN-NEXT: ;;#ASMSTART
336 ; GCN-NEXT: ; use s[0:7]
337 ; GCN-NEXT: ;;#ASMEND
338 ; GCN-NEXT: v_readlane_b32 s0, v1, 16
339 ; GCN-NEXT: v_readlane_b32 s1, v1, 17
340 ; GCN-NEXT: v_readlane_b32 s2, v1, 18
341 ; GCN-NEXT: v_readlane_b32 s3, v1, 19
342 ; GCN-NEXT: v_readlane_b32 s4, v1, 20
343 ; GCN-NEXT: v_readlane_b32 s5, v1, 21
344 ; GCN-NEXT: v_readlane_b32 s6, v1, 22
345 ; GCN-NEXT: v_readlane_b32 s7, v1, 23
346 ; GCN-NEXT: ;;#ASMSTART
347 ; GCN-NEXT: ; use s[0:7]
348 ; GCN-NEXT: ;;#ASMEND
349 ; GCN-NEXT: v_readlane_b32 s0, v1, 24
350 ; GCN-NEXT: v_readlane_b32 s1, v1, 25
351 ; GCN-NEXT: v_readlane_b32 s2, v1, 26
352 ; GCN-NEXT: v_readlane_b32 s3, v1, 27
353 ; GCN-NEXT: v_readlane_b32 s4, v1, 28
354 ; GCN-NEXT: v_readlane_b32 s5, v1, 29
355 ; GCN-NEXT: v_readlane_b32 s6, v1, 30
356 ; GCN-NEXT: v_readlane_b32 s7, v1, 31
357 ; GCN-NEXT: ;;#ASMSTART
358 ; GCN-NEXT: ; use s[0:7]
359 ; GCN-NEXT: ;;#ASMEND
360 ; GCN-NEXT: v_readlane_b32 s0, v1, 32
361 ; GCN-NEXT: v_readlane_b32 s1, v1, 33
362 ; GCN-NEXT: v_readlane_b32 s2, v1, 34
363 ; GCN-NEXT: v_readlane_b32 s3, v1, 35
364 ; GCN-NEXT: v_readlane_b32 s4, v1, 36
365 ; GCN-NEXT: v_readlane_b32 s5, v1, 37
366 ; GCN-NEXT: v_readlane_b32 s6, v1, 38
367 ; GCN-NEXT: v_readlane_b32 s7, v1, 39
368 ; GCN-NEXT: ;;#ASMSTART
369 ; GCN-NEXT: ; use s[0:7]
370 ; GCN-NEXT: ;;#ASMEND
371 ; GCN-NEXT: v_readlane_b32 s0, v1, 40
372 ; GCN-NEXT: v_readlane_b32 s1, v1, 41
373 ; GCN-NEXT: v_readlane_b32 s2, v1, 42
374 ; GCN-NEXT: v_readlane_b32 s3, v1, 43
375 ; GCN-NEXT: v_readlane_b32 s4, v1, 44
376 ; GCN-NEXT: v_readlane_b32 s5, v1, 45
377 ; GCN-NEXT: v_readlane_b32 s6, v1, 46
378 ; GCN-NEXT: v_readlane_b32 s7, v1, 47
379 ; GCN-NEXT: ;;#ASMSTART
380 ; GCN-NEXT: ; use s[0:7]
381 ; GCN-NEXT: ;;#ASMEND
382 ; GCN-NEXT: s_waitcnt vmcnt(0)
383 ; GCN-NEXT: v_readlane_b32 s0, v0, 0
384 ; GCN-NEXT: v_readlane_b32 s1, v0, 1
385 ; GCN-NEXT: v_readlane_b32 s2, v0, 2
386 ; GCN-NEXT: v_readlane_b32 s3, v0, 3
387 ; GCN-NEXT: v_readlane_b32 s4, v0, 4
388 ; GCN-NEXT: v_readlane_b32 s5, v0, 5
389 ; GCN-NEXT: v_readlane_b32 s6, v0, 6
390 ; GCN-NEXT: v_readlane_b32 s7, v0, 7
391 ; GCN-NEXT: ;;#ASMSTART
392 ; GCN-NEXT: ; use s[84:91]
393 ; GCN-NEXT: ;;#ASMEND
394 ; GCN-NEXT: ;;#ASMSTART
395 ; GCN-NEXT: ; use s[76:83]
396 ; GCN-NEXT: ;;#ASMEND
397 ; GCN-NEXT: ;;#ASMSTART
398 ; GCN-NEXT: ; use s[68:75]
399 ; GCN-NEXT: ;;#ASMEND
400 ; GCN-NEXT: ;;#ASMSTART
401 ; GCN-NEXT: ; use s[60:67]
402 ; GCN-NEXT: ;;#ASMEND
403 ; GCN-NEXT: ;;#ASMSTART
404 ; GCN-NEXT: ; use s[52:59]
405 ; GCN-NEXT: ;;#ASMEND
406 ; GCN-NEXT: ;;#ASMSTART
407 ; GCN-NEXT: ; use s[44:51]
408 ; GCN-NEXT: ;;#ASMEND
409 ; GCN-NEXT: ;;#ASMSTART
410 ; GCN-NEXT: ; use s[36:43]
411 ; GCN-NEXT: ;;#ASMEND
412 ; GCN-NEXT: ;;#ASMSTART
413 ; GCN-NEXT: ; use s[24:31]
414 ; GCN-NEXT: ;;#ASMEND
415 ; GCN-NEXT: ;;#ASMSTART
416 ; GCN-NEXT: ; use s[16:23]
417 ; GCN-NEXT: ;;#ASMEND
418 ; GCN-NEXT: ;;#ASMSTART
419 ; GCN-NEXT: ; use s[8:15]
420 ; GCN-NEXT: ;;#ASMEND
421 ; GCN-NEXT: ;;#ASMSTART
422 ; GCN-NEXT: ; use s[0:7]
423 ; GCN-NEXT: ;;#ASMEND
424 ; GCN-NEXT: .LBB0_2: ; %ret
425 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
426 ; GCN-NEXT: buffer_load_dword v0, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload
427 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
428 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
429 ; GCN-NEXT: buffer_load_dword v1, off, s[92:95], 0 offset:8 ; 4-byte Folded Reload
430 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
431 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
432 ; GCN-NEXT: buffer_load_dword v2, off, s[92:95], 0 offset:12 ; 4-byte Folded Reload
433 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
434 ; GCN-NEXT: ; kill: killed $vgpr2
435 ; GCN-NEXT: ; kill: killed $vgpr1
436 ; GCN-NEXT: ; kill: killed $vgpr0
438 %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
439 %wide.sgpr1 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
440 %wide.sgpr2 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
441 %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
442 %wide.sgpr4 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
443 %wide.sgpr5 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
444 %wide.sgpr6 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
445 %wide.sgpr7 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
446 %wide.sgpr8 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
447 %wide.sgpr9 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
448 %wide.sgpr10 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
449 %wide.sgpr11 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
450 %wide.sgpr12 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
451 %wide.sgpr13 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
452 %wide.sgpr14 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
453 %wide.sgpr15 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
454 %wide.sgpr16 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
455 %cmp = icmp eq i32 %in, 0
456 br i1 %cmp, label %bb0, label %ret
459 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr0) #0
460 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr1) #0
461 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr2) #0
462 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0
463 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr4) #0
464 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr5) #0
465 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr6) #0
466 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr7) #0
467 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr8) #0
468 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr9) #0
469 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr10) #0
470 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr11) #0
471 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr12) #0
472 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr13) #0
473 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr14) #0
474 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr15) #0
475 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr16) #0
482 ; Some of the lanes of an SGPR spill are in one VGPR and some forced
483 ; into the next available VGPR.
484 define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %in) #1 {
485 ; GCN-LABEL: split_sgpr_spill_2_vgprs:
487 ; GCN-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
488 ; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
489 ; GCN-NEXT: s_mov_b32 s54, -1
490 ; GCN-NEXT: s_mov_b32 s55, 0xe8f000
491 ; GCN-NEXT: s_add_u32 s52, s52, s11
492 ; GCN-NEXT: s_addc_u32 s53, s53, 0
493 ; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
494 ; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
495 ; GCN-NEXT: s_load_dword s0, s[4:5], 0xb
496 ; GCN-NEXT: ;;#ASMSTART
497 ; GCN-NEXT: ; def s[4:19]
498 ; GCN-NEXT: ;;#ASMEND
499 ; GCN-NEXT: v_writelane_b32 v1, s4, 0
500 ; GCN-NEXT: v_writelane_b32 v1, s5, 1
501 ; GCN-NEXT: v_writelane_b32 v1, s6, 2
502 ; GCN-NEXT: v_writelane_b32 v1, s7, 3
503 ; GCN-NEXT: v_writelane_b32 v1, s8, 4
504 ; GCN-NEXT: v_writelane_b32 v1, s9, 5
505 ; GCN-NEXT: v_writelane_b32 v1, s10, 6
506 ; GCN-NEXT: v_writelane_b32 v1, s11, 7
507 ; GCN-NEXT: v_writelane_b32 v1, s12, 8
508 ; GCN-NEXT: v_writelane_b32 v1, s13, 9
509 ; GCN-NEXT: v_writelane_b32 v1, s14, 10
510 ; GCN-NEXT: v_writelane_b32 v1, s15, 11
511 ; GCN-NEXT: v_writelane_b32 v1, s16, 12
512 ; GCN-NEXT: v_writelane_b32 v1, s17, 13
513 ; GCN-NEXT: v_writelane_b32 v1, s18, 14
514 ; GCN-NEXT: v_writelane_b32 v1, s19, 15
515 ; GCN-NEXT: ;;#ASMSTART
516 ; GCN-NEXT: ; def s[4:19]
517 ; GCN-NEXT: ;;#ASMEND
518 ; GCN-NEXT: v_writelane_b32 v1, s4, 16
519 ; GCN-NEXT: v_writelane_b32 v1, s5, 17
520 ; GCN-NEXT: v_writelane_b32 v1, s6, 18
521 ; GCN-NEXT: v_writelane_b32 v1, s7, 19
522 ; GCN-NEXT: v_writelane_b32 v1, s8, 20
523 ; GCN-NEXT: v_writelane_b32 v1, s9, 21
524 ; GCN-NEXT: v_writelane_b32 v1, s10, 22
525 ; GCN-NEXT: v_writelane_b32 v1, s11, 23
526 ; GCN-NEXT: v_writelane_b32 v1, s12, 24
527 ; GCN-NEXT: v_writelane_b32 v1, s13, 25
528 ; GCN-NEXT: v_writelane_b32 v1, s14, 26
529 ; GCN-NEXT: v_writelane_b32 v1, s15, 27
530 ; GCN-NEXT: v_writelane_b32 v1, s16, 28
531 ; GCN-NEXT: v_writelane_b32 v1, s17, 29
532 ; GCN-NEXT: v_writelane_b32 v1, s18, 30
533 ; GCN-NEXT: v_writelane_b32 v1, s19, 31
534 ; GCN-NEXT: ;;#ASMSTART
535 ; GCN-NEXT: ; def s[4:19]
536 ; GCN-NEXT: ;;#ASMEND
537 ; GCN-NEXT: v_writelane_b32 v1, s4, 32
538 ; GCN-NEXT: v_writelane_b32 v1, s5, 33
539 ; GCN-NEXT: v_writelane_b32 v1, s6, 34
540 ; GCN-NEXT: v_writelane_b32 v1, s7, 35
541 ; GCN-NEXT: v_writelane_b32 v1, s8, 36
542 ; GCN-NEXT: v_writelane_b32 v1, s9, 37
543 ; GCN-NEXT: v_writelane_b32 v1, s10, 38
544 ; GCN-NEXT: v_writelane_b32 v1, s11, 39
545 ; GCN-NEXT: v_writelane_b32 v1, s12, 40
546 ; GCN-NEXT: v_writelane_b32 v1, s13, 41
547 ; GCN-NEXT: v_writelane_b32 v1, s14, 42
548 ; GCN-NEXT: v_writelane_b32 v1, s15, 43
549 ; GCN-NEXT: v_writelane_b32 v1, s16, 44
550 ; GCN-NEXT: v_writelane_b32 v1, s17, 45
551 ; GCN-NEXT: v_writelane_b32 v1, s18, 46
552 ; GCN-NEXT: v_writelane_b32 v1, s19, 47
553 ; GCN-NEXT: ;;#ASMSTART
554 ; GCN-NEXT: ; def s[4:19]
555 ; GCN-NEXT: ;;#ASMEND
556 ; GCN-NEXT: v_writelane_b32 v1, s4, 48
557 ; GCN-NEXT: v_writelane_b32 v1, s5, 49
558 ; GCN-NEXT: v_writelane_b32 v1, s6, 50
559 ; GCN-NEXT: v_writelane_b32 v1, s7, 51
560 ; GCN-NEXT: v_writelane_b32 v1, s8, 52
561 ; GCN-NEXT: v_writelane_b32 v1, s9, 53
562 ; GCN-NEXT: v_writelane_b32 v1, s10, 54
563 ; GCN-NEXT: v_writelane_b32 v1, s11, 55
564 ; GCN-NEXT: v_writelane_b32 v1, s12, 56
565 ; GCN-NEXT: v_writelane_b32 v1, s13, 57
566 ; GCN-NEXT: v_writelane_b32 v1, s14, 58
567 ; GCN-NEXT: v_writelane_b32 v1, s15, 59
568 ; GCN-NEXT: v_writelane_b32 v1, s16, 60
569 ; GCN-NEXT: v_writelane_b32 v1, s17, 61
570 ; GCN-NEXT: v_writelane_b32 v1, s18, 62
571 ; GCN-NEXT: v_writelane_b32 v1, s19, 63
572 ; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
573 ; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Spill
574 ; GCN-NEXT: s_mov_b64 exec, s[28:29]
575 ; GCN-NEXT: ;;#ASMSTART
576 ; GCN-NEXT: ; def s[4:11]
577 ; GCN-NEXT: ;;#ASMEND
578 ; GCN-NEXT: v_writelane_b32 v0, s4, 0
579 ; GCN-NEXT: v_writelane_b32 v0, s5, 1
580 ; GCN-NEXT: v_writelane_b32 v0, s6, 2
581 ; GCN-NEXT: v_writelane_b32 v0, s7, 3
582 ; GCN-NEXT: v_writelane_b32 v0, s8, 4
583 ; GCN-NEXT: v_writelane_b32 v0, s9, 5
584 ; GCN-NEXT: v_writelane_b32 v0, s10, 6
585 ; GCN-NEXT: v_writelane_b32 v0, s11, 7
586 ; GCN-NEXT: ;;#ASMSTART
587 ; GCN-NEXT: ; def s[2:3]
588 ; GCN-NEXT: ;;#ASMEND
589 ; GCN-NEXT: v_writelane_b32 v0, s2, 8
590 ; GCN-NEXT: v_writelane_b32 v0, s3, 9
591 ; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
592 ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
593 ; GCN-NEXT: s_mov_b64 exec, s[28:29]
594 ; GCN-NEXT: s_mov_b32 s1, 0
595 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
596 ; GCN-NEXT: s_cmp_lg_u32 s0, s1
597 ; GCN-NEXT: s_cbranch_scc1 .LBB1_2
598 ; GCN-NEXT: ; %bb.1: ; %bb0
599 ; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
600 ; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
601 ; GCN-NEXT: s_mov_b64 exec, s[28:29]
602 ; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
603 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
604 ; GCN-NEXT: s_mov_b64 exec, s[28:29]
605 ; GCN-NEXT: s_waitcnt vmcnt(1)
606 ; GCN-NEXT: v_readlane_b32 s16, v1, 8
607 ; GCN-NEXT: v_readlane_b32 s17, v1, 9
608 ; GCN-NEXT: v_readlane_b32 s20, v1, 0
609 ; GCN-NEXT: v_readlane_b32 s21, v1, 1
610 ; GCN-NEXT: v_readlane_b32 s22, v1, 2
611 ; GCN-NEXT: v_readlane_b32 s23, v1, 3
612 ; GCN-NEXT: v_readlane_b32 s24, v1, 4
613 ; GCN-NEXT: v_readlane_b32 s25, v1, 5
614 ; GCN-NEXT: v_readlane_b32 s26, v1, 6
615 ; GCN-NEXT: v_readlane_b32 s27, v1, 7
616 ; GCN-NEXT: s_waitcnt vmcnt(0)
617 ; GCN-NEXT: v_readlane_b32 s36, v0, 32
618 ; GCN-NEXT: v_readlane_b32 s37, v0, 33
619 ; GCN-NEXT: v_readlane_b32 s38, v0, 34
620 ; GCN-NEXT: v_readlane_b32 s39, v0, 35
621 ; GCN-NEXT: v_readlane_b32 s40, v0, 36
622 ; GCN-NEXT: v_readlane_b32 s41, v0, 37
623 ; GCN-NEXT: v_readlane_b32 s42, v0, 38
624 ; GCN-NEXT: v_readlane_b32 s43, v0, 39
625 ; GCN-NEXT: v_readlane_b32 s44, v0, 40
626 ; GCN-NEXT: v_readlane_b32 s45, v0, 41
627 ; GCN-NEXT: v_readlane_b32 s46, v0, 42
628 ; GCN-NEXT: v_readlane_b32 s47, v0, 43
629 ; GCN-NEXT: v_readlane_b32 s48, v0, 44
630 ; GCN-NEXT: v_readlane_b32 s49, v0, 45
631 ; GCN-NEXT: v_readlane_b32 s50, v0, 46
632 ; GCN-NEXT: v_readlane_b32 s51, v0, 47
633 ; GCN-NEXT: v_readlane_b32 s0, v0, 0
634 ; GCN-NEXT: v_readlane_b32 s1, v0, 1
635 ; GCN-NEXT: v_readlane_b32 s2, v0, 2
636 ; GCN-NEXT: v_readlane_b32 s3, v0, 3
637 ; GCN-NEXT: v_readlane_b32 s4, v0, 4
638 ; GCN-NEXT: v_readlane_b32 s5, v0, 5
639 ; GCN-NEXT: v_readlane_b32 s6, v0, 6
640 ; GCN-NEXT: v_readlane_b32 s7, v0, 7
641 ; GCN-NEXT: v_readlane_b32 s8, v0, 8
642 ; GCN-NEXT: v_readlane_b32 s9, v0, 9
643 ; GCN-NEXT: v_readlane_b32 s10, v0, 10
644 ; GCN-NEXT: v_readlane_b32 s11, v0, 11
645 ; GCN-NEXT: v_readlane_b32 s12, v0, 12
646 ; GCN-NEXT: v_readlane_b32 s13, v0, 13
647 ; GCN-NEXT: v_readlane_b32 s14, v0, 14
648 ; GCN-NEXT: v_readlane_b32 s15, v0, 15
649 ; GCN-NEXT: ;;#ASMSTART
650 ; GCN-NEXT: ; use s[0:15]
651 ; GCN-NEXT: ;;#ASMEND
652 ; GCN-NEXT: v_readlane_b32 s0, v0, 16
653 ; GCN-NEXT: v_readlane_b32 s1, v0, 17
654 ; GCN-NEXT: v_readlane_b32 s2, v0, 18
655 ; GCN-NEXT: v_readlane_b32 s3, v0, 19
656 ; GCN-NEXT: v_readlane_b32 s4, v0, 20
657 ; GCN-NEXT: v_readlane_b32 s5, v0, 21
658 ; GCN-NEXT: v_readlane_b32 s6, v0, 22
659 ; GCN-NEXT: v_readlane_b32 s7, v0, 23
660 ; GCN-NEXT: v_readlane_b32 s8, v0, 24
661 ; GCN-NEXT: v_readlane_b32 s9, v0, 25
662 ; GCN-NEXT: v_readlane_b32 s10, v0, 26
663 ; GCN-NEXT: v_readlane_b32 s11, v0, 27
664 ; GCN-NEXT: v_readlane_b32 s12, v0, 28
665 ; GCN-NEXT: v_readlane_b32 s13, v0, 29
666 ; GCN-NEXT: v_readlane_b32 s14, v0, 30
667 ; GCN-NEXT: v_readlane_b32 s15, v0, 31
668 ; GCN-NEXT: ;;#ASMSTART
669 ; GCN-NEXT: ; use s[0:15]
670 ; GCN-NEXT: ;;#ASMEND
671 ; GCN-NEXT: v_readlane_b32 s0, v0, 48
672 ; GCN-NEXT: v_readlane_b32 s1, v0, 49
673 ; GCN-NEXT: v_readlane_b32 s2, v0, 50
674 ; GCN-NEXT: v_readlane_b32 s3, v0, 51
675 ; GCN-NEXT: v_readlane_b32 s4, v0, 52
676 ; GCN-NEXT: v_readlane_b32 s5, v0, 53
677 ; GCN-NEXT: v_readlane_b32 s6, v0, 54
678 ; GCN-NEXT: v_readlane_b32 s7, v0, 55
679 ; GCN-NEXT: v_readlane_b32 s8, v0, 56
680 ; GCN-NEXT: v_readlane_b32 s9, v0, 57
681 ; GCN-NEXT: v_readlane_b32 s10, v0, 58
682 ; GCN-NEXT: v_readlane_b32 s11, v0, 59
683 ; GCN-NEXT: v_readlane_b32 s12, v0, 60
684 ; GCN-NEXT: v_readlane_b32 s13, v0, 61
685 ; GCN-NEXT: v_readlane_b32 s14, v0, 62
686 ; GCN-NEXT: v_readlane_b32 s15, v0, 63
687 ; GCN-NEXT: ;;#ASMSTART
688 ; GCN-NEXT: ; use s[36:51]
689 ; GCN-NEXT: ;;#ASMEND
690 ; GCN-NEXT: ;;#ASMSTART
691 ; GCN-NEXT: ; use s[20:27]
692 ; GCN-NEXT: ;;#ASMEND
693 ; GCN-NEXT: ;;#ASMSTART
694 ; GCN-NEXT: ; use s[16:17]
695 ; GCN-NEXT: ;;#ASMEND
696 ; GCN-NEXT: ;;#ASMSTART
697 ; GCN-NEXT: ; use s[0:15]
698 ; GCN-NEXT: ;;#ASMEND
699 ; GCN-NEXT: .LBB1_2: ; %ret
700 ; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
701 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
702 ; GCN-NEXT: s_mov_b64 exec, s[28:29]
703 ; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
704 ; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
705 ; GCN-NEXT: s_mov_b64 exec, s[28:29]
706 ; GCN-NEXT: ; kill: killed $vgpr1
707 ; GCN-NEXT: ; kill: killed $vgpr0
709 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
710 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
711 %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
712 %wide.sgpr5 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
713 %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
714 %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
716 %cmp = icmp eq i32 %in, 0
717 br i1 %cmp, label %bb0, label %ret
720 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
721 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
722 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
723 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0
724 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
725 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr5) #0
732 ; The first 64 SGPR spills can go to a VGPR, but there isn't a second
733 ; so some spills must be to memory. The last 16 element spill runs out
734 ; of lanes at the 15th element.
735 define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %in) #1 {
736 ; GCN-LABEL: no_vgprs_last_sgpr_spill:
738 ; GCN-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
739 ; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
740 ; GCN-NEXT: s_mov_b32 s54, -1
741 ; GCN-NEXT: s_mov_b32 s55, 0xe8f000
742 ; GCN-NEXT: s_add_u32 s52, s52, s11
743 ; GCN-NEXT: s_addc_u32 s53, s53, 0
744 ; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
745 ; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
746 ; GCN-NEXT: s_load_dword s0, s[4:5], 0xb
747 ; GCN-NEXT: ;;#ASMSTART
748 ; GCN-NEXT: ;;#ASMEND
749 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
750 ; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
751 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
752 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
753 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
754 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
755 ; GCN-NEXT: ;;#ASMSTART
756 ; GCN-NEXT: ;;#ASMEND
757 ; GCN-NEXT: ;;#ASMSTART
758 ; GCN-NEXT: ;;#ASMEND
759 ; GCN-NEXT: ;;#ASMSTART
760 ; GCN-NEXT: ;;#ASMEND
761 ; GCN-NEXT: ;;#ASMSTART
762 ; GCN-NEXT: ;;#ASMEND
763 ; GCN-NEXT: ;;#ASMSTART
764 ; GCN-NEXT: ;;#ASMEND
765 ; GCN-NEXT: ;;#ASMSTART
766 ; GCN-NEXT: ; def s[4:19]
767 ; GCN-NEXT: ;;#ASMEND
768 ; GCN-NEXT: s_waitcnt vmcnt(1)
769 ; GCN-NEXT: v_writelane_b32 v1, s4, 0
770 ; GCN-NEXT: v_writelane_b32 v1, s5, 1
771 ; GCN-NEXT: v_writelane_b32 v1, s6, 2
772 ; GCN-NEXT: v_writelane_b32 v1, s7, 3
773 ; GCN-NEXT: v_writelane_b32 v1, s8, 4
774 ; GCN-NEXT: v_writelane_b32 v1, s9, 5
775 ; GCN-NEXT: v_writelane_b32 v1, s10, 6
776 ; GCN-NEXT: v_writelane_b32 v1, s11, 7
777 ; GCN-NEXT: v_writelane_b32 v1, s12, 8
778 ; GCN-NEXT: v_writelane_b32 v1, s13, 9
779 ; GCN-NEXT: v_writelane_b32 v1, s14, 10
780 ; GCN-NEXT: v_writelane_b32 v1, s15, 11
781 ; GCN-NEXT: v_writelane_b32 v1, s16, 12
782 ; GCN-NEXT: v_writelane_b32 v1, s17, 13
783 ; GCN-NEXT: v_writelane_b32 v1, s18, 14
784 ; GCN-NEXT: v_writelane_b32 v1, s19, 15
785 ; GCN-NEXT: ;;#ASMSTART
786 ; GCN-NEXT: ; def s[4:19]
787 ; GCN-NEXT: ;;#ASMEND
788 ; GCN-NEXT: v_writelane_b32 v1, s4, 16
789 ; GCN-NEXT: v_writelane_b32 v1, s5, 17
790 ; GCN-NEXT: v_writelane_b32 v1, s6, 18
791 ; GCN-NEXT: v_writelane_b32 v1, s7, 19
792 ; GCN-NEXT: v_writelane_b32 v1, s8, 20
793 ; GCN-NEXT: v_writelane_b32 v1, s9, 21
794 ; GCN-NEXT: v_writelane_b32 v1, s10, 22
795 ; GCN-NEXT: v_writelane_b32 v1, s11, 23
796 ; GCN-NEXT: v_writelane_b32 v1, s12, 24
797 ; GCN-NEXT: v_writelane_b32 v1, s13, 25
798 ; GCN-NEXT: v_writelane_b32 v1, s14, 26
799 ; GCN-NEXT: v_writelane_b32 v1, s15, 27
800 ; GCN-NEXT: v_writelane_b32 v1, s16, 28
801 ; GCN-NEXT: v_writelane_b32 v1, s17, 29
802 ; GCN-NEXT: v_writelane_b32 v1, s18, 30
803 ; GCN-NEXT: v_writelane_b32 v1, s19, 31
804 ; GCN-NEXT: ;;#ASMSTART
805 ; GCN-NEXT: ; def s[4:19]
806 ; GCN-NEXT: ;;#ASMEND
807 ; GCN-NEXT: v_writelane_b32 v1, s4, 32
808 ; GCN-NEXT: v_writelane_b32 v1, s5, 33
809 ; GCN-NEXT: v_writelane_b32 v1, s6, 34
810 ; GCN-NEXT: v_writelane_b32 v1, s7, 35
811 ; GCN-NEXT: v_writelane_b32 v1, s8, 36
812 ; GCN-NEXT: v_writelane_b32 v1, s9, 37
813 ; GCN-NEXT: v_writelane_b32 v1, s10, 38
814 ; GCN-NEXT: v_writelane_b32 v1, s11, 39
815 ; GCN-NEXT: v_writelane_b32 v1, s12, 40
816 ; GCN-NEXT: v_writelane_b32 v1, s13, 41
817 ; GCN-NEXT: v_writelane_b32 v1, s14, 42
818 ; GCN-NEXT: v_writelane_b32 v1, s15, 43
819 ; GCN-NEXT: v_writelane_b32 v1, s16, 44
820 ; GCN-NEXT: v_writelane_b32 v1, s17, 45
821 ; GCN-NEXT: v_writelane_b32 v1, s18, 46
822 ; GCN-NEXT: v_writelane_b32 v1, s19, 47
823 ; GCN-NEXT: ;;#ASMSTART
824 ; GCN-NEXT: ; def s[4:19]
825 ; GCN-NEXT: ;;#ASMEND
826 ; GCN-NEXT: v_writelane_b32 v1, s4, 48
827 ; GCN-NEXT: v_writelane_b32 v1, s5, 49
828 ; GCN-NEXT: v_writelane_b32 v1, s6, 50
829 ; GCN-NEXT: v_writelane_b32 v1, s7, 51
830 ; GCN-NEXT: v_writelane_b32 v1, s8, 52
831 ; GCN-NEXT: v_writelane_b32 v1, s9, 53
832 ; GCN-NEXT: v_writelane_b32 v1, s10, 54
833 ; GCN-NEXT: v_writelane_b32 v1, s11, 55
834 ; GCN-NEXT: v_writelane_b32 v1, s12, 56
835 ; GCN-NEXT: v_writelane_b32 v1, s13, 57
836 ; GCN-NEXT: v_writelane_b32 v1, s14, 58
837 ; GCN-NEXT: v_writelane_b32 v1, s15, 59
838 ; GCN-NEXT: v_writelane_b32 v1, s16, 60
839 ; GCN-NEXT: v_writelane_b32 v1, s17, 61
840 ; GCN-NEXT: v_writelane_b32 v1, s18, 62
841 ; GCN-NEXT: v_writelane_b32 v1, s19, 63
842 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
843 ; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Spill
844 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
845 ; GCN-NEXT: ;;#ASMSTART
846 ; GCN-NEXT: ; def s[2:3]
847 ; GCN-NEXT: ;;#ASMEND
848 ; GCN-NEXT: s_waitcnt vmcnt(1)
849 ; GCN-NEXT: v_writelane_b32 v0, s2, 0
850 ; GCN-NEXT: v_writelane_b32 v0, s3, 1
851 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
852 ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
853 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
854 ; GCN-NEXT: s_mov_b32 s1, 0
855 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
856 ; GCN-NEXT: s_cmp_lg_u32 s0, s1
857 ; GCN-NEXT: s_cbranch_scc1 .LBB2_2
858 ; GCN-NEXT: ; %bb.1: ; %bb0
859 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
860 ; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
861 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
862 ; GCN-NEXT: s_waitcnt vmcnt(0)
863 ; GCN-NEXT: v_readlane_b32 s36, v1, 32
864 ; GCN-NEXT: v_readlane_b32 s37, v1, 33
865 ; GCN-NEXT: v_readlane_b32 s38, v1, 34
866 ; GCN-NEXT: v_readlane_b32 s39, v1, 35
867 ; GCN-NEXT: v_readlane_b32 s40, v1, 36
868 ; GCN-NEXT: v_readlane_b32 s41, v1, 37
869 ; GCN-NEXT: v_readlane_b32 s42, v1, 38
870 ; GCN-NEXT: v_readlane_b32 s43, v1, 39
871 ; GCN-NEXT: v_readlane_b32 s44, v1, 40
872 ; GCN-NEXT: v_readlane_b32 s45, v1, 41
873 ; GCN-NEXT: v_readlane_b32 s46, v1, 42
874 ; GCN-NEXT: v_readlane_b32 s47, v1, 43
875 ; GCN-NEXT: v_readlane_b32 s48, v1, 44
876 ; GCN-NEXT: v_readlane_b32 s49, v1, 45
877 ; GCN-NEXT: v_readlane_b32 s50, v1, 46
878 ; GCN-NEXT: v_readlane_b32 s51, v1, 47
879 ; GCN-NEXT: v_readlane_b32 s0, v1, 16
880 ; GCN-NEXT: v_readlane_b32 s1, v1, 17
881 ; GCN-NEXT: v_readlane_b32 s2, v1, 18
882 ; GCN-NEXT: v_readlane_b32 s3, v1, 19
883 ; GCN-NEXT: v_readlane_b32 s4, v1, 20
884 ; GCN-NEXT: v_readlane_b32 s5, v1, 21
885 ; GCN-NEXT: v_readlane_b32 s6, v1, 22
886 ; GCN-NEXT: v_readlane_b32 s7, v1, 23
887 ; GCN-NEXT: v_readlane_b32 s8, v1, 24
888 ; GCN-NEXT: v_readlane_b32 s9, v1, 25
889 ; GCN-NEXT: v_readlane_b32 s10, v1, 26
890 ; GCN-NEXT: v_readlane_b32 s11, v1, 27
891 ; GCN-NEXT: v_readlane_b32 s12, v1, 28
892 ; GCN-NEXT: v_readlane_b32 s13, v1, 29
893 ; GCN-NEXT: v_readlane_b32 s14, v1, 30
894 ; GCN-NEXT: v_readlane_b32 s15, v1, 31
895 ; GCN-NEXT: v_readlane_b32 s16, v1, 0
896 ; GCN-NEXT: v_readlane_b32 s17, v1, 1
897 ; GCN-NEXT: v_readlane_b32 s18, v1, 2
898 ; GCN-NEXT: v_readlane_b32 s19, v1, 3
899 ; GCN-NEXT: v_readlane_b32 s20, v1, 4
900 ; GCN-NEXT: v_readlane_b32 s21, v1, 5
901 ; GCN-NEXT: v_readlane_b32 s22, v1, 6
902 ; GCN-NEXT: v_readlane_b32 s23, v1, 7
903 ; GCN-NEXT: v_readlane_b32 s24, v1, 8
904 ; GCN-NEXT: v_readlane_b32 s25, v1, 9
905 ; GCN-NEXT: v_readlane_b32 s26, v1, 10
906 ; GCN-NEXT: v_readlane_b32 s27, v1, 11
907 ; GCN-NEXT: v_readlane_b32 s28, v1, 12
908 ; GCN-NEXT: v_readlane_b32 s29, v1, 13
909 ; GCN-NEXT: v_readlane_b32 s30, v1, 14
910 ; GCN-NEXT: v_readlane_b32 s31, v1, 15
911 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
912 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
913 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
914 ; GCN-NEXT: ;;#ASMSTART
915 ; GCN-NEXT: ; use s[16:31]
916 ; GCN-NEXT: ;;#ASMEND
917 ; GCN-NEXT: ;;#ASMSTART
918 ; GCN-NEXT: ; use s[0:15]
919 ; GCN-NEXT: ;;#ASMEND
920 ; GCN-NEXT: v_readlane_b32 s4, v1, 48
921 ; GCN-NEXT: v_readlane_b32 s5, v1, 49
922 ; GCN-NEXT: v_readlane_b32 s6, v1, 50
923 ; GCN-NEXT: v_readlane_b32 s7, v1, 51
924 ; GCN-NEXT: v_readlane_b32 s8, v1, 52
925 ; GCN-NEXT: v_readlane_b32 s9, v1, 53
926 ; GCN-NEXT: v_readlane_b32 s10, v1, 54
927 ; GCN-NEXT: v_readlane_b32 s11, v1, 55
928 ; GCN-NEXT: v_readlane_b32 s12, v1, 56
929 ; GCN-NEXT: v_readlane_b32 s13, v1, 57
930 ; GCN-NEXT: v_readlane_b32 s14, v1, 58
931 ; GCN-NEXT: v_readlane_b32 s15, v1, 59
932 ; GCN-NEXT: v_readlane_b32 s16, v1, 60
933 ; GCN-NEXT: v_readlane_b32 s17, v1, 61
934 ; GCN-NEXT: v_readlane_b32 s18, v1, 62
935 ; GCN-NEXT: v_readlane_b32 s19, v1, 63
936 ; GCN-NEXT: s_waitcnt vmcnt(0)
937 ; GCN-NEXT: v_readlane_b32 s0, v0, 0
938 ; GCN-NEXT: v_readlane_b32 s1, v0, 1
939 ; GCN-NEXT: ;;#ASMSTART
940 ; GCN-NEXT: ; use s[36:51]
941 ; GCN-NEXT: ;;#ASMEND
942 ; GCN-NEXT: ;;#ASMSTART
943 ; GCN-NEXT: ; use s[4:19]
944 ; GCN-NEXT: ;;#ASMEND
945 ; GCN-NEXT: ;;#ASMSTART
946 ; GCN-NEXT: ; use s[0:1]
947 ; GCN-NEXT: ;;#ASMEND
948 ; GCN-NEXT: .LBB2_2: ; %ret
949 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
950 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
951 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
952 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
953 ; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
954 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
955 ; GCN-NEXT: ; kill: killed $vgpr1
956 ; GCN-NEXT: ; kill: killed $vgpr0
958 call void asm sideeffect "", "~{v[0:7]}" () #0
959 call void asm sideeffect "", "~{v[8:15]}" () #0
960 call void asm sideeffect "", "~{v[16:23]}" () #0
961 call void asm sideeffect "", "~{v[24:27]}"() #0
962 call void asm sideeffect "", "~{v[28:29]}"() #0
963 call void asm sideeffect "", "~{v30}"() #0
965 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
966 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
967 %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
968 %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
969 %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
970 %cmp = icmp eq i32 %in, 0
971 br i1 %cmp, label %bb0, label %ret
974 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
975 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
976 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
977 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) #0
978 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
985 ; Same as @no_vgprs_last_sgpr_spill, some SGPR spills must go to memory.
986 ; Additionally, v0 is live throughout the function.
987 define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
988 ; GCN-LABEL: no_vgprs_last_sgpr_spill_live_v0:
990 ; GCN-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
991 ; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
992 ; GCN-NEXT: s_mov_b32 s54, -1
993 ; GCN-NEXT: s_mov_b32 s55, 0xe8f000
994 ; GCN-NEXT: s_add_u32 s52, s52, s11
995 ; GCN-NEXT: s_addc_u32 s53, s53, 0
996 ; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
997 ; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
998 ; GCN-NEXT: s_load_dword s0, s[4:5], 0x9
999 ; GCN-NEXT: ;;#ASMSTART
1000 ; GCN-NEXT: ;;#ASMEND
1001 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
1002 ; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
1003 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
1004 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
1005 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
1006 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
1007 ; GCN-NEXT: ;;#ASMSTART
1008 ; GCN-NEXT: ;;#ASMEND
1009 ; GCN-NEXT: ;;#ASMSTART
1010 ; GCN-NEXT: ;;#ASMEND
1011 ; GCN-NEXT: ;;#ASMSTART
1012 ; GCN-NEXT: ;;#ASMEND
1013 ; GCN-NEXT: ;;#ASMSTART
1014 ; GCN-NEXT: ;;#ASMEND
1015 ; GCN-NEXT: ;;#ASMSTART
1016 ; GCN-NEXT: ;;#ASMEND
1017 ; GCN-NEXT: ;;#ASMSTART
1018 ; GCN-NEXT: ; def s[4:19]
1019 ; GCN-NEXT: ;;#ASMEND
1020 ; GCN-NEXT: s_waitcnt vmcnt(1)
1021 ; GCN-NEXT: v_writelane_b32 v1, s4, 0
1022 ; GCN-NEXT: v_writelane_b32 v1, s5, 1
1023 ; GCN-NEXT: v_writelane_b32 v1, s6, 2
1024 ; GCN-NEXT: v_writelane_b32 v1, s7, 3
1025 ; GCN-NEXT: v_writelane_b32 v1, s8, 4
1026 ; GCN-NEXT: v_writelane_b32 v1, s9, 5
1027 ; GCN-NEXT: v_writelane_b32 v1, s10, 6
1028 ; GCN-NEXT: v_writelane_b32 v1, s11, 7
1029 ; GCN-NEXT: v_writelane_b32 v1, s12, 8
1030 ; GCN-NEXT: v_writelane_b32 v1, s13, 9
1031 ; GCN-NEXT: v_writelane_b32 v1, s14, 10
1032 ; GCN-NEXT: v_writelane_b32 v1, s15, 11
1033 ; GCN-NEXT: v_writelane_b32 v1, s16, 12
1034 ; GCN-NEXT: v_writelane_b32 v1, s17, 13
1035 ; GCN-NEXT: v_writelane_b32 v1, s18, 14
1036 ; GCN-NEXT: v_writelane_b32 v1, s19, 15
1037 ; GCN-NEXT: ;;#ASMSTART
1038 ; GCN-NEXT: ; def s[4:19]
1039 ; GCN-NEXT: ;;#ASMEND
1040 ; GCN-NEXT: v_writelane_b32 v1, s4, 16
1041 ; GCN-NEXT: v_writelane_b32 v1, s5, 17
1042 ; GCN-NEXT: v_writelane_b32 v1, s6, 18
1043 ; GCN-NEXT: v_writelane_b32 v1, s7, 19
1044 ; GCN-NEXT: v_writelane_b32 v1, s8, 20
1045 ; GCN-NEXT: v_writelane_b32 v1, s9, 21
1046 ; GCN-NEXT: v_writelane_b32 v1, s10, 22
1047 ; GCN-NEXT: v_writelane_b32 v1, s11, 23
1048 ; GCN-NEXT: v_writelane_b32 v1, s12, 24
1049 ; GCN-NEXT: v_writelane_b32 v1, s13, 25
1050 ; GCN-NEXT: v_writelane_b32 v1, s14, 26
1051 ; GCN-NEXT: v_writelane_b32 v1, s15, 27
1052 ; GCN-NEXT: v_writelane_b32 v1, s16, 28
1053 ; GCN-NEXT: v_writelane_b32 v1, s17, 29
1054 ; GCN-NEXT: v_writelane_b32 v1, s18, 30
1055 ; GCN-NEXT: v_writelane_b32 v1, s19, 31
1056 ; GCN-NEXT: ;;#ASMSTART
1057 ; GCN-NEXT: ; def s[4:19]
1058 ; GCN-NEXT: ;;#ASMEND
1059 ; GCN-NEXT: v_writelane_b32 v1, s4, 32
1060 ; GCN-NEXT: v_writelane_b32 v1, s5, 33
1061 ; GCN-NEXT: v_writelane_b32 v1, s6, 34
1062 ; GCN-NEXT: v_writelane_b32 v1, s7, 35
1063 ; GCN-NEXT: v_writelane_b32 v1, s8, 36
1064 ; GCN-NEXT: v_writelane_b32 v1, s9, 37
1065 ; GCN-NEXT: v_writelane_b32 v1, s10, 38
1066 ; GCN-NEXT: v_writelane_b32 v1, s11, 39
1067 ; GCN-NEXT: v_writelane_b32 v1, s12, 40
1068 ; GCN-NEXT: v_writelane_b32 v1, s13, 41
1069 ; GCN-NEXT: v_writelane_b32 v1, s14, 42
1070 ; GCN-NEXT: v_writelane_b32 v1, s15, 43
1071 ; GCN-NEXT: v_writelane_b32 v1, s16, 44
1072 ; GCN-NEXT: v_writelane_b32 v1, s17, 45
1073 ; GCN-NEXT: v_writelane_b32 v1, s18, 46
1074 ; GCN-NEXT: v_writelane_b32 v1, s19, 47
1075 ; GCN-NEXT: ;;#ASMSTART
1076 ; GCN-NEXT: ; def s[4:19]
1077 ; GCN-NEXT: ;;#ASMEND
1078 ; GCN-NEXT: v_writelane_b32 v1, s4, 48
1079 ; GCN-NEXT: v_writelane_b32 v1, s5, 49
1080 ; GCN-NEXT: v_writelane_b32 v1, s6, 50
1081 ; GCN-NEXT: v_writelane_b32 v1, s7, 51
1082 ; GCN-NEXT: v_writelane_b32 v1, s8, 52
1083 ; GCN-NEXT: v_writelane_b32 v1, s9, 53
1084 ; GCN-NEXT: v_writelane_b32 v1, s10, 54
1085 ; GCN-NEXT: v_writelane_b32 v1, s11, 55
1086 ; GCN-NEXT: v_writelane_b32 v1, s12, 56
1087 ; GCN-NEXT: v_writelane_b32 v1, s13, 57
1088 ; GCN-NEXT: v_writelane_b32 v1, s14, 58
1089 ; GCN-NEXT: v_writelane_b32 v1, s15, 59
1090 ; GCN-NEXT: v_writelane_b32 v1, s16, 60
1091 ; GCN-NEXT: v_writelane_b32 v1, s17, 61
1092 ; GCN-NEXT: v_writelane_b32 v1, s18, 62
1093 ; GCN-NEXT: v_writelane_b32 v1, s19, 63
1094 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
1095 ; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Spill
1096 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
1097 ; GCN-NEXT: ;;#ASMSTART
1098 ; GCN-NEXT: ; def s[2:3]
1099 ; GCN-NEXT: ;;#ASMEND
1100 ; GCN-NEXT: s_waitcnt vmcnt(1)
1101 ; GCN-NEXT: v_writelane_b32 v0, s2, 0
1102 ; GCN-NEXT: v_writelane_b32 v0, s3, 1
1103 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
1104 ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
1105 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
1106 ; GCN-NEXT: s_mov_b32 s1, 0
1107 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
1108 ; GCN-NEXT: s_cmp_lg_u32 s0, s1
1109 ; GCN-NEXT: s_cbranch_scc1 .LBB3_2
1110 ; GCN-NEXT: ; %bb.1: ; %bb0
1111 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
1112 ; GCN-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
1113 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
1114 ; GCN-NEXT: s_waitcnt vmcnt(0)
1115 ; GCN-NEXT: v_readlane_b32 s36, v2, 32
1116 ; GCN-NEXT: v_readlane_b32 s37, v2, 33
1117 ; GCN-NEXT: v_readlane_b32 s38, v2, 34
1118 ; GCN-NEXT: v_readlane_b32 s39, v2, 35
1119 ; GCN-NEXT: v_readlane_b32 s40, v2, 36
1120 ; GCN-NEXT: v_readlane_b32 s41, v2, 37
1121 ; GCN-NEXT: v_readlane_b32 s42, v2, 38
1122 ; GCN-NEXT: v_readlane_b32 s43, v2, 39
1123 ; GCN-NEXT: v_readlane_b32 s44, v2, 40
1124 ; GCN-NEXT: v_readlane_b32 s45, v2, 41
1125 ; GCN-NEXT: v_readlane_b32 s46, v2, 42
1126 ; GCN-NEXT: v_readlane_b32 s47, v2, 43
1127 ; GCN-NEXT: v_readlane_b32 s48, v2, 44
1128 ; GCN-NEXT: v_readlane_b32 s49, v2, 45
1129 ; GCN-NEXT: v_readlane_b32 s50, v2, 46
1130 ; GCN-NEXT: v_readlane_b32 s51, v2, 47
1131 ; GCN-NEXT: v_readlane_b32 s0, v2, 16
1132 ; GCN-NEXT: v_readlane_b32 s1, v2, 17
1133 ; GCN-NEXT: v_readlane_b32 s2, v2, 18
1134 ; GCN-NEXT: v_readlane_b32 s3, v2, 19
1135 ; GCN-NEXT: v_readlane_b32 s4, v2, 20
1136 ; GCN-NEXT: v_readlane_b32 s5, v2, 21
1137 ; GCN-NEXT: v_readlane_b32 s6, v2, 22
1138 ; GCN-NEXT: v_readlane_b32 s7, v2, 23
1139 ; GCN-NEXT: v_readlane_b32 s8, v2, 24
1140 ; GCN-NEXT: v_readlane_b32 s9, v2, 25
1141 ; GCN-NEXT: v_readlane_b32 s10, v2, 26
1142 ; GCN-NEXT: v_readlane_b32 s11, v2, 27
1143 ; GCN-NEXT: v_readlane_b32 s12, v2, 28
1144 ; GCN-NEXT: v_readlane_b32 s13, v2, 29
1145 ; GCN-NEXT: v_readlane_b32 s14, v2, 30
1146 ; GCN-NEXT: v_readlane_b32 s15, v2, 31
1147 ; GCN-NEXT: v_readlane_b32 s16, v2, 0
1148 ; GCN-NEXT: v_readlane_b32 s17, v2, 1
1149 ; GCN-NEXT: v_readlane_b32 s18, v2, 2
1150 ; GCN-NEXT: v_readlane_b32 s19, v2, 3
1151 ; GCN-NEXT: v_readlane_b32 s20, v2, 4
1152 ; GCN-NEXT: v_readlane_b32 s21, v2, 5
1153 ; GCN-NEXT: v_readlane_b32 s22, v2, 6
1154 ; GCN-NEXT: v_readlane_b32 s23, v2, 7
1155 ; GCN-NEXT: v_readlane_b32 s24, v2, 8
1156 ; GCN-NEXT: v_readlane_b32 s25, v2, 9
1157 ; GCN-NEXT: v_readlane_b32 s26, v2, 10
1158 ; GCN-NEXT: v_readlane_b32 s27, v2, 11
1159 ; GCN-NEXT: v_readlane_b32 s28, v2, 12
1160 ; GCN-NEXT: v_readlane_b32 s29, v2, 13
1161 ; GCN-NEXT: v_readlane_b32 s30, v2, 14
1162 ; GCN-NEXT: v_readlane_b32 s31, v2, 15
1163 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
1164 ; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
1165 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
1166 ; GCN-NEXT: ;;#ASMSTART
1167 ; GCN-NEXT: ; def v0
1168 ; GCN-NEXT: ;;#ASMEND
1169 ; GCN-NEXT: ;;#ASMSTART
1170 ; GCN-NEXT: ; use s[16:31]
1171 ; GCN-NEXT: ;;#ASMEND
1172 ; GCN-NEXT: ;;#ASMSTART
1173 ; GCN-NEXT: ; use s[0:15]
1174 ; GCN-NEXT: ;;#ASMEND
1175 ; GCN-NEXT: v_readlane_b32 s4, v2, 48
1176 ; GCN-NEXT: v_readlane_b32 s5, v2, 49
1177 ; GCN-NEXT: v_readlane_b32 s6, v2, 50
1178 ; GCN-NEXT: v_readlane_b32 s7, v2, 51
1179 ; GCN-NEXT: v_readlane_b32 s8, v2, 52
1180 ; GCN-NEXT: v_readlane_b32 s9, v2, 53
1181 ; GCN-NEXT: v_readlane_b32 s10, v2, 54
1182 ; GCN-NEXT: v_readlane_b32 s11, v2, 55
1183 ; GCN-NEXT: v_readlane_b32 s12, v2, 56
1184 ; GCN-NEXT: v_readlane_b32 s13, v2, 57
1185 ; GCN-NEXT: v_readlane_b32 s14, v2, 58
1186 ; GCN-NEXT: v_readlane_b32 s15, v2, 59
1187 ; GCN-NEXT: v_readlane_b32 s16, v2, 60
1188 ; GCN-NEXT: v_readlane_b32 s17, v2, 61
1189 ; GCN-NEXT: v_readlane_b32 s18, v2, 62
1190 ; GCN-NEXT: v_readlane_b32 s19, v2, 63
1191 ; GCN-NEXT: s_waitcnt vmcnt(0)
1192 ; GCN-NEXT: v_readlane_b32 s0, v1, 0
1193 ; GCN-NEXT: v_readlane_b32 s1, v1, 1
1194 ; GCN-NEXT: ;;#ASMSTART
1195 ; GCN-NEXT: ; use s[36:51]
1196 ; GCN-NEXT: ;;#ASMEND
1197 ; GCN-NEXT: ;;#ASMSTART
1198 ; GCN-NEXT: ; use s[4:19]
1199 ; GCN-NEXT: ;;#ASMEND
1200 ; GCN-NEXT: ;;#ASMSTART
1201 ; GCN-NEXT: ; use s[0:1]
1202 ; GCN-NEXT: ;;#ASMEND
1203 ; GCN-NEXT: ;;#ASMSTART
1204 ; GCN-NEXT: ; use v0
1205 ; GCN-NEXT: ;;#ASMEND
1206 ; GCN-NEXT: .LBB3_2: ; %ret
1207 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
1208 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
1209 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
1210 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
1211 ; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
1212 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
1213 ; GCN-NEXT: ; kill: killed $vgpr1
1214 ; GCN-NEXT: ; kill: killed $vgpr0
1215 ; GCN-NEXT: s_endpgm
1216 call void asm sideeffect "", "~{v[0:7]}" () #0
1217 call void asm sideeffect "", "~{v[8:15]}" () #0
1218 call void asm sideeffect "", "~{v[16:23]}" () #0
1219 call void asm sideeffect "", "~{v[24:27]}"() #0
1220 call void asm sideeffect "", "~{v[28:29]}"() #0
1221 call void asm sideeffect "", "~{v30}"() #0
1223 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
1224 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
1225 %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
1226 %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
1227 %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
1228 %cmp = icmp eq i32 %in, 0
1229 br i1 %cmp, label %bb0, label %ret
1232 %vgpr0 = call i32 asm sideeffect "; def $0", "=v" () #0
1233 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
1234 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
1235 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
1236 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) #0
1237 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
1238 call void asm sideeffect "; use $0", "v"(i32 %vgpr0) #0
1245 attributes #0 = { nounwind }
1246 attributes #1 = { nounwind "amdgpu-waves-per-eu"="8,8" }