1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -O0 -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
4 ; FIXME: we should disable sdwa peephole because dead-code elimination, that
5 ; runs after peephole, ruins this test (different register numbers)
7 ; Spill all SGPRs so multiple VGPRs are required for spilling all of them.
9 ; Ideally we only need 2 VGPRs for all spilling. The VGPRs are
10 ; allocated per-frame index, so it's possible to get up with more.
11 define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out, i32 %in) #0 {
12 ; GCN-LABEL: spill_sgprs_to_multiple_vgprs:
14 ; GCN-NEXT: s_mov_b32 s92, SCRATCH_RSRC_DWORD0
15 ; GCN-NEXT: s_mov_b32 s93, SCRATCH_RSRC_DWORD1
16 ; GCN-NEXT: s_mov_b32 s94, -1
17 ; GCN-NEXT: s_mov_b32 s95, 0xe8f000
18 ; GCN-NEXT: s_add_u32 s92, s92, s11
19 ; GCN-NEXT: s_addc_u32 s93, s93, 0
20 ; GCN-NEXT: ; implicit-def: $vgpr0
21 ; GCN-NEXT: ; implicit-def: $vgpr1
22 ; GCN-NEXT: ; implicit-def: $vgpr2
23 ; GCN-NEXT: s_load_dword s0, s[4:5], 0xb
24 ; GCN-NEXT: ;;#ASMSTART
25 ; GCN-NEXT: ; def s[4:11]
27 ; GCN-NEXT: v_writelane_b32 v2, s4, 0
28 ; GCN-NEXT: v_writelane_b32 v2, s5, 1
29 ; GCN-NEXT: v_writelane_b32 v2, s6, 2
30 ; GCN-NEXT: v_writelane_b32 v2, s7, 3
31 ; GCN-NEXT: v_writelane_b32 v2, s8, 4
32 ; GCN-NEXT: v_writelane_b32 v2, s9, 5
33 ; GCN-NEXT: v_writelane_b32 v2, s10, 6
34 ; GCN-NEXT: v_writelane_b32 v2, s11, 7
35 ; GCN-NEXT: ;;#ASMSTART
36 ; GCN-NEXT: ; def s[4:11]
38 ; GCN-NEXT: v_writelane_b32 v2, s4, 8
39 ; GCN-NEXT: v_writelane_b32 v2, s5, 9
40 ; GCN-NEXT: v_writelane_b32 v2, s6, 10
41 ; GCN-NEXT: v_writelane_b32 v2, s7, 11
42 ; GCN-NEXT: v_writelane_b32 v2, s8, 12
43 ; GCN-NEXT: v_writelane_b32 v2, s9, 13
44 ; GCN-NEXT: v_writelane_b32 v2, s10, 14
45 ; GCN-NEXT: v_writelane_b32 v2, s11, 15
46 ; GCN-NEXT: ;;#ASMSTART
47 ; GCN-NEXT: ; def s[4:11]
49 ; GCN-NEXT: v_writelane_b32 v2, s4, 16
50 ; GCN-NEXT: v_writelane_b32 v2, s5, 17
51 ; GCN-NEXT: v_writelane_b32 v2, s6, 18
52 ; GCN-NEXT: v_writelane_b32 v2, s7, 19
53 ; GCN-NEXT: v_writelane_b32 v2, s8, 20
54 ; GCN-NEXT: v_writelane_b32 v2, s9, 21
55 ; GCN-NEXT: v_writelane_b32 v2, s10, 22
56 ; GCN-NEXT: v_writelane_b32 v2, s11, 23
57 ; GCN-NEXT: ;;#ASMSTART
58 ; GCN-NEXT: ; def s[4:11]
60 ; GCN-NEXT: v_writelane_b32 v2, s4, 24
61 ; GCN-NEXT: v_writelane_b32 v2, s5, 25
62 ; GCN-NEXT: v_writelane_b32 v2, s6, 26
63 ; GCN-NEXT: v_writelane_b32 v2, s7, 27
64 ; GCN-NEXT: v_writelane_b32 v2, s8, 28
65 ; GCN-NEXT: v_writelane_b32 v2, s9, 29
66 ; GCN-NEXT: v_writelane_b32 v2, s10, 30
67 ; GCN-NEXT: v_writelane_b32 v2, s11, 31
68 ; GCN-NEXT: ;;#ASMSTART
69 ; GCN-NEXT: ; def s[4:11]
71 ; GCN-NEXT: v_writelane_b32 v2, s4, 32
72 ; GCN-NEXT: v_writelane_b32 v2, s5, 33
73 ; GCN-NEXT: v_writelane_b32 v2, s6, 34
74 ; GCN-NEXT: v_writelane_b32 v2, s7, 35
75 ; GCN-NEXT: v_writelane_b32 v2, s8, 36
76 ; GCN-NEXT: v_writelane_b32 v2, s9, 37
77 ; GCN-NEXT: v_writelane_b32 v2, s10, 38
78 ; GCN-NEXT: v_writelane_b32 v2, s11, 39
79 ; GCN-NEXT: ;;#ASMSTART
80 ; GCN-NEXT: ; def s[4:11]
82 ; GCN-NEXT: v_writelane_b32 v2, s4, 40
83 ; GCN-NEXT: v_writelane_b32 v2, s5, 41
84 ; GCN-NEXT: v_writelane_b32 v2, s6, 42
85 ; GCN-NEXT: v_writelane_b32 v2, s7, 43
86 ; GCN-NEXT: v_writelane_b32 v2, s8, 44
87 ; GCN-NEXT: v_writelane_b32 v2, s9, 45
88 ; GCN-NEXT: v_writelane_b32 v2, s10, 46
89 ; GCN-NEXT: v_writelane_b32 v2, s11, 47
90 ; GCN-NEXT: ;;#ASMSTART
91 ; GCN-NEXT: ; def s[4:11]
93 ; GCN-NEXT: v_writelane_b32 v2, s4, 48
94 ; GCN-NEXT: v_writelane_b32 v2, s5, 49
95 ; GCN-NEXT: v_writelane_b32 v2, s6, 50
96 ; GCN-NEXT: v_writelane_b32 v2, s7, 51
97 ; GCN-NEXT: v_writelane_b32 v2, s8, 52
98 ; GCN-NEXT: v_writelane_b32 v2, s9, 53
99 ; GCN-NEXT: v_writelane_b32 v2, s10, 54
100 ; GCN-NEXT: v_writelane_b32 v2, s11, 55
101 ; GCN-NEXT: ;;#ASMSTART
102 ; GCN-NEXT: ; def s[4:11]
103 ; GCN-NEXT: ;;#ASMEND
104 ; GCN-NEXT: v_writelane_b32 v2, s4, 56
105 ; GCN-NEXT: v_writelane_b32 v2, s5, 57
106 ; GCN-NEXT: v_writelane_b32 v2, s6, 58
107 ; GCN-NEXT: v_writelane_b32 v2, s7, 59
108 ; GCN-NEXT: v_writelane_b32 v2, s8, 60
109 ; GCN-NEXT: v_writelane_b32 v2, s9, 61
110 ; GCN-NEXT: v_writelane_b32 v2, s10, 62
111 ; GCN-NEXT: v_writelane_b32 v2, s11, 63
112 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
113 ; GCN-NEXT: buffer_store_dword v2, off, s[92:95], 0 offset:12 ; 4-byte Folded Spill
114 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
115 ; GCN-NEXT: ;;#ASMSTART
116 ; GCN-NEXT: ; def s[4:11]
117 ; GCN-NEXT: ;;#ASMEND
118 ; GCN-NEXT: v_writelane_b32 v1, s4, 0
119 ; GCN-NEXT: v_writelane_b32 v1, s5, 1
120 ; GCN-NEXT: v_writelane_b32 v1, s6, 2
121 ; GCN-NEXT: v_writelane_b32 v1, s7, 3
122 ; GCN-NEXT: v_writelane_b32 v1, s8, 4
123 ; GCN-NEXT: v_writelane_b32 v1, s9, 5
124 ; GCN-NEXT: v_writelane_b32 v1, s10, 6
125 ; GCN-NEXT: v_writelane_b32 v1, s11, 7
126 ; GCN-NEXT: ;;#ASMSTART
127 ; GCN-NEXT: ; def s[4:11]
128 ; GCN-NEXT: ;;#ASMEND
129 ; GCN-NEXT: v_writelane_b32 v1, s4, 8
130 ; GCN-NEXT: v_writelane_b32 v1, s5, 9
131 ; GCN-NEXT: v_writelane_b32 v1, s6, 10
132 ; GCN-NEXT: v_writelane_b32 v1, s7, 11
133 ; GCN-NEXT: v_writelane_b32 v1, s8, 12
134 ; GCN-NEXT: v_writelane_b32 v1, s9, 13
135 ; GCN-NEXT: v_writelane_b32 v1, s10, 14
136 ; GCN-NEXT: v_writelane_b32 v1, s11, 15
137 ; GCN-NEXT: ;;#ASMSTART
138 ; GCN-NEXT: ; def s[4:11]
139 ; GCN-NEXT: ;;#ASMEND
140 ; GCN-NEXT: v_writelane_b32 v1, s4, 16
141 ; GCN-NEXT: v_writelane_b32 v1, s5, 17
142 ; GCN-NEXT: v_writelane_b32 v1, s6, 18
143 ; GCN-NEXT: v_writelane_b32 v1, s7, 19
144 ; GCN-NEXT: v_writelane_b32 v1, s8, 20
145 ; GCN-NEXT: v_writelane_b32 v1, s9, 21
146 ; GCN-NEXT: v_writelane_b32 v1, s10, 22
147 ; GCN-NEXT: v_writelane_b32 v1, s11, 23
148 ; GCN-NEXT: ;;#ASMSTART
149 ; GCN-NEXT: ; def s[4:11]
150 ; GCN-NEXT: ;;#ASMEND
151 ; GCN-NEXT: v_writelane_b32 v1, s4, 24
152 ; GCN-NEXT: v_writelane_b32 v1, s5, 25
153 ; GCN-NEXT: v_writelane_b32 v1, s6, 26
154 ; GCN-NEXT: v_writelane_b32 v1, s7, 27
155 ; GCN-NEXT: v_writelane_b32 v1, s8, 28
156 ; GCN-NEXT: v_writelane_b32 v1, s9, 29
157 ; GCN-NEXT: v_writelane_b32 v1, s10, 30
158 ; GCN-NEXT: v_writelane_b32 v1, s11, 31
159 ; GCN-NEXT: ;;#ASMSTART
160 ; GCN-NEXT: ; def s[4:11]
161 ; GCN-NEXT: ;;#ASMEND
162 ; GCN-NEXT: v_writelane_b32 v1, s4, 32
163 ; GCN-NEXT: v_writelane_b32 v1, s5, 33
164 ; GCN-NEXT: v_writelane_b32 v1, s6, 34
165 ; GCN-NEXT: v_writelane_b32 v1, s7, 35
166 ; GCN-NEXT: v_writelane_b32 v1, s8, 36
167 ; GCN-NEXT: v_writelane_b32 v1, s9, 37
168 ; GCN-NEXT: v_writelane_b32 v1, s10, 38
169 ; GCN-NEXT: v_writelane_b32 v1, s11, 39
170 ; GCN-NEXT: ;;#ASMSTART
171 ; GCN-NEXT: ; def s[4:11]
172 ; GCN-NEXT: ;;#ASMEND
173 ; GCN-NEXT: v_writelane_b32 v1, s4, 40
174 ; GCN-NEXT: v_writelane_b32 v1, s5, 41
175 ; GCN-NEXT: v_writelane_b32 v1, s6, 42
176 ; GCN-NEXT: v_writelane_b32 v1, s7, 43
177 ; GCN-NEXT: v_writelane_b32 v1, s8, 44
178 ; GCN-NEXT: v_writelane_b32 v1, s9, 45
179 ; GCN-NEXT: v_writelane_b32 v1, s10, 46
180 ; GCN-NEXT: v_writelane_b32 v1, s11, 47
181 ; GCN-NEXT: ;;#ASMSTART
182 ; GCN-NEXT: ; def s[4:11]
183 ; GCN-NEXT: ;;#ASMEND
184 ; GCN-NEXT: v_writelane_b32 v1, s4, 48
185 ; GCN-NEXT: v_writelane_b32 v1, s5, 49
186 ; GCN-NEXT: v_writelane_b32 v1, s6, 50
187 ; GCN-NEXT: v_writelane_b32 v1, s7, 51
188 ; GCN-NEXT: v_writelane_b32 v1, s8, 52
189 ; GCN-NEXT: v_writelane_b32 v1, s9, 53
190 ; GCN-NEXT: v_writelane_b32 v1, s10, 54
191 ; GCN-NEXT: v_writelane_b32 v1, s11, 55
192 ; GCN-NEXT: ;;#ASMSTART
193 ; GCN-NEXT: ; def s[4:11]
194 ; GCN-NEXT: ;;#ASMEND
195 ; GCN-NEXT: v_writelane_b32 v1, s4, 56
196 ; GCN-NEXT: v_writelane_b32 v1, s5, 57
197 ; GCN-NEXT: v_writelane_b32 v1, s6, 58
198 ; GCN-NEXT: v_writelane_b32 v1, s7, 59
199 ; GCN-NEXT: v_writelane_b32 v1, s8, 60
200 ; GCN-NEXT: v_writelane_b32 v1, s9, 61
201 ; GCN-NEXT: v_writelane_b32 v1, s10, 62
202 ; GCN-NEXT: v_writelane_b32 v1, s11, 63
203 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
204 ; GCN-NEXT: buffer_store_dword v1, off, s[92:95], 0 offset:8 ; 4-byte Folded Spill
205 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
206 ; GCN-NEXT: ;;#ASMSTART
207 ; GCN-NEXT: ; def s[4:11]
208 ; GCN-NEXT: ;;#ASMEND
209 ; GCN-NEXT: v_writelane_b32 v0, s4, 0
210 ; GCN-NEXT: v_writelane_b32 v0, s5, 1
211 ; GCN-NEXT: v_writelane_b32 v0, s6, 2
212 ; GCN-NEXT: v_writelane_b32 v0, s7, 3
213 ; GCN-NEXT: v_writelane_b32 v0, s8, 4
214 ; GCN-NEXT: v_writelane_b32 v0, s9, 5
215 ; GCN-NEXT: v_writelane_b32 v0, s10, 6
216 ; GCN-NEXT: v_writelane_b32 v0, s11, 7
217 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
218 ; GCN-NEXT: buffer_store_dword v0, off, s[92:95], 0 offset:4 ; 4-byte Folded Spill
219 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
220 ; GCN-NEXT: s_mov_b32 s1, 0
221 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
222 ; GCN-NEXT: s_cmp_lg_u32 s0, s1
223 ; GCN-NEXT: s_cbranch_scc1 .LBB0_2
224 ; GCN-NEXT: ; %bb.1: ; %bb0
225 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
226 ; GCN-NEXT: buffer_load_dword v0, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload
227 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
228 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
229 ; GCN-NEXT: buffer_load_dword v1, off, s[92:95], 0 offset:12 ; 4-byte Folded Reload
230 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
231 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
232 ; GCN-NEXT: buffer_load_dword v2, off, s[92:95], 0 offset:8 ; 4-byte Folded Reload
233 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
234 ; GCN-NEXT: s_waitcnt vmcnt(0)
235 ; GCN-NEXT: v_readlane_b32 s8, v2, 56
236 ; GCN-NEXT: v_readlane_b32 s9, v2, 57
237 ; GCN-NEXT: v_readlane_b32 s10, v2, 58
238 ; GCN-NEXT: v_readlane_b32 s11, v2, 59
239 ; GCN-NEXT: v_readlane_b32 s12, v2, 60
240 ; GCN-NEXT: v_readlane_b32 s13, v2, 61
241 ; GCN-NEXT: v_readlane_b32 s14, v2, 62
242 ; GCN-NEXT: v_readlane_b32 s15, v2, 63
243 ; GCN-NEXT: v_readlane_b32 s16, v2, 48
244 ; GCN-NEXT: v_readlane_b32 s17, v2, 49
245 ; GCN-NEXT: v_readlane_b32 s18, v2, 50
246 ; GCN-NEXT: v_readlane_b32 s19, v2, 51
247 ; GCN-NEXT: v_readlane_b32 s20, v2, 52
248 ; GCN-NEXT: v_readlane_b32 s21, v2, 53
249 ; GCN-NEXT: v_readlane_b32 s22, v2, 54
250 ; GCN-NEXT: v_readlane_b32 s23, v2, 55
251 ; GCN-NEXT: v_readlane_b32 s24, v2, 40
252 ; GCN-NEXT: v_readlane_b32 s25, v2, 41
253 ; GCN-NEXT: v_readlane_b32 s26, v2, 42
254 ; GCN-NEXT: v_readlane_b32 s27, v2, 43
255 ; GCN-NEXT: v_readlane_b32 s28, v2, 44
256 ; GCN-NEXT: v_readlane_b32 s29, v2, 45
257 ; GCN-NEXT: v_readlane_b32 s30, v2, 46
258 ; GCN-NEXT: v_readlane_b32 s31, v2, 47
259 ; GCN-NEXT: v_readlane_b32 s36, v2, 32
260 ; GCN-NEXT: v_readlane_b32 s37, v2, 33
261 ; GCN-NEXT: v_readlane_b32 s38, v2, 34
262 ; GCN-NEXT: v_readlane_b32 s39, v2, 35
263 ; GCN-NEXT: v_readlane_b32 s40, v2, 36
264 ; GCN-NEXT: v_readlane_b32 s41, v2, 37
265 ; GCN-NEXT: v_readlane_b32 s42, v2, 38
266 ; GCN-NEXT: v_readlane_b32 s43, v2, 39
267 ; GCN-NEXT: v_readlane_b32 s44, v2, 24
268 ; GCN-NEXT: v_readlane_b32 s45, v2, 25
269 ; GCN-NEXT: v_readlane_b32 s46, v2, 26
270 ; GCN-NEXT: v_readlane_b32 s47, v2, 27
271 ; GCN-NEXT: v_readlane_b32 s48, v2, 28
272 ; GCN-NEXT: v_readlane_b32 s49, v2, 29
273 ; GCN-NEXT: v_readlane_b32 s50, v2, 30
274 ; GCN-NEXT: v_readlane_b32 s51, v2, 31
275 ; GCN-NEXT: v_readlane_b32 s52, v2, 16
276 ; GCN-NEXT: v_readlane_b32 s53, v2, 17
277 ; GCN-NEXT: v_readlane_b32 s54, v2, 18
278 ; GCN-NEXT: v_readlane_b32 s55, v2, 19
279 ; GCN-NEXT: v_readlane_b32 s56, v2, 20
280 ; GCN-NEXT: v_readlane_b32 s57, v2, 21
281 ; GCN-NEXT: v_readlane_b32 s58, v2, 22
282 ; GCN-NEXT: v_readlane_b32 s59, v2, 23
283 ; GCN-NEXT: v_readlane_b32 s60, v2, 8
284 ; GCN-NEXT: v_readlane_b32 s61, v2, 9
285 ; GCN-NEXT: v_readlane_b32 s62, v2, 10
286 ; GCN-NEXT: v_readlane_b32 s63, v2, 11
287 ; GCN-NEXT: v_readlane_b32 s64, v2, 12
288 ; GCN-NEXT: v_readlane_b32 s65, v2, 13
289 ; GCN-NEXT: v_readlane_b32 s66, v2, 14
290 ; GCN-NEXT: v_readlane_b32 s67, v2, 15
291 ; GCN-NEXT: v_readlane_b32 s68, v2, 0
292 ; GCN-NEXT: v_readlane_b32 s69, v2, 1
293 ; GCN-NEXT: v_readlane_b32 s70, v2, 2
294 ; GCN-NEXT: v_readlane_b32 s71, v2, 3
295 ; GCN-NEXT: v_readlane_b32 s72, v2, 4
296 ; GCN-NEXT: v_readlane_b32 s73, v2, 5
297 ; GCN-NEXT: v_readlane_b32 s74, v2, 6
298 ; GCN-NEXT: v_readlane_b32 s75, v2, 7
299 ; GCN-NEXT: v_readlane_b32 s76, v1, 56
300 ; GCN-NEXT: v_readlane_b32 s77, v1, 57
301 ; GCN-NEXT: v_readlane_b32 s78, v1, 58
302 ; GCN-NEXT: v_readlane_b32 s79, v1, 59
303 ; GCN-NEXT: v_readlane_b32 s80, v1, 60
304 ; GCN-NEXT: v_readlane_b32 s81, v1, 61
305 ; GCN-NEXT: v_readlane_b32 s82, v1, 62
306 ; GCN-NEXT: v_readlane_b32 s83, v1, 63
307 ; GCN-NEXT: v_readlane_b32 s84, v1, 48
308 ; GCN-NEXT: v_readlane_b32 s85, v1, 49
309 ; GCN-NEXT: v_readlane_b32 s86, v1, 50
310 ; GCN-NEXT: v_readlane_b32 s87, v1, 51
311 ; GCN-NEXT: v_readlane_b32 s88, v1, 52
312 ; GCN-NEXT: v_readlane_b32 s89, v1, 53
313 ; GCN-NEXT: v_readlane_b32 s90, v1, 54
314 ; GCN-NEXT: v_readlane_b32 s91, v1, 55
315 ; GCN-NEXT: v_readlane_b32 s0, v1, 0
316 ; GCN-NEXT: v_readlane_b32 s1, v1, 1
317 ; GCN-NEXT: v_readlane_b32 s2, v1, 2
318 ; GCN-NEXT: v_readlane_b32 s3, v1, 3
319 ; GCN-NEXT: v_readlane_b32 s4, v1, 4
320 ; GCN-NEXT: v_readlane_b32 s5, v1, 5
321 ; GCN-NEXT: v_readlane_b32 s6, v1, 6
322 ; GCN-NEXT: v_readlane_b32 s7, v1, 7
323 ; GCN-NEXT: ;;#ASMSTART
324 ; GCN-NEXT: ; use s[0:7]
325 ; GCN-NEXT: ;;#ASMEND
326 ; GCN-NEXT: v_readlane_b32 s0, v1, 8
327 ; GCN-NEXT: v_readlane_b32 s1, v1, 9
328 ; GCN-NEXT: v_readlane_b32 s2, v1, 10
329 ; GCN-NEXT: v_readlane_b32 s3, v1, 11
330 ; GCN-NEXT: v_readlane_b32 s4, v1, 12
331 ; GCN-NEXT: v_readlane_b32 s5, v1, 13
332 ; GCN-NEXT: v_readlane_b32 s6, v1, 14
333 ; GCN-NEXT: v_readlane_b32 s7, v1, 15
334 ; GCN-NEXT: ;;#ASMSTART
335 ; GCN-NEXT: ; use s[0:7]
336 ; GCN-NEXT: ;;#ASMEND
337 ; GCN-NEXT: v_readlane_b32 s0, v1, 16
338 ; GCN-NEXT: v_readlane_b32 s1, v1, 17
339 ; GCN-NEXT: v_readlane_b32 s2, v1, 18
340 ; GCN-NEXT: v_readlane_b32 s3, v1, 19
341 ; GCN-NEXT: v_readlane_b32 s4, v1, 20
342 ; GCN-NEXT: v_readlane_b32 s5, v1, 21
343 ; GCN-NEXT: v_readlane_b32 s6, v1, 22
344 ; GCN-NEXT: v_readlane_b32 s7, v1, 23
345 ; GCN-NEXT: ;;#ASMSTART
346 ; GCN-NEXT: ; use s[0:7]
347 ; GCN-NEXT: ;;#ASMEND
348 ; GCN-NEXT: v_readlane_b32 s0, v1, 24
349 ; GCN-NEXT: v_readlane_b32 s1, v1, 25
350 ; GCN-NEXT: v_readlane_b32 s2, v1, 26
351 ; GCN-NEXT: v_readlane_b32 s3, v1, 27
352 ; GCN-NEXT: v_readlane_b32 s4, v1, 28
353 ; GCN-NEXT: v_readlane_b32 s5, v1, 29
354 ; GCN-NEXT: v_readlane_b32 s6, v1, 30
355 ; GCN-NEXT: v_readlane_b32 s7, v1, 31
356 ; GCN-NEXT: ;;#ASMSTART
357 ; GCN-NEXT: ; use s[0:7]
358 ; GCN-NEXT: ;;#ASMEND
359 ; GCN-NEXT: v_readlane_b32 s0, v1, 32
360 ; GCN-NEXT: v_readlane_b32 s1, v1, 33
361 ; GCN-NEXT: v_readlane_b32 s2, v1, 34
362 ; GCN-NEXT: v_readlane_b32 s3, v1, 35
363 ; GCN-NEXT: v_readlane_b32 s4, v1, 36
364 ; GCN-NEXT: v_readlane_b32 s5, v1, 37
365 ; GCN-NEXT: v_readlane_b32 s6, v1, 38
366 ; GCN-NEXT: v_readlane_b32 s7, v1, 39
367 ; GCN-NEXT: ;;#ASMSTART
368 ; GCN-NEXT: ; use s[0:7]
369 ; GCN-NEXT: ;;#ASMEND
370 ; GCN-NEXT: v_readlane_b32 s0, v1, 40
371 ; GCN-NEXT: v_readlane_b32 s1, v1, 41
372 ; GCN-NEXT: v_readlane_b32 s2, v1, 42
373 ; GCN-NEXT: v_readlane_b32 s3, v1, 43
374 ; GCN-NEXT: v_readlane_b32 s4, v1, 44
375 ; GCN-NEXT: v_readlane_b32 s5, v1, 45
376 ; GCN-NEXT: v_readlane_b32 s6, v1, 46
377 ; GCN-NEXT: v_readlane_b32 s7, v1, 47
378 ; GCN-NEXT: ;;#ASMSTART
379 ; GCN-NEXT: ; use s[0:7]
380 ; GCN-NEXT: ;;#ASMEND
381 ; GCN-NEXT: v_readlane_b32 s0, v0, 0
382 ; GCN-NEXT: v_readlane_b32 s1, v0, 1
383 ; GCN-NEXT: v_readlane_b32 s2, v0, 2
384 ; GCN-NEXT: v_readlane_b32 s3, v0, 3
385 ; GCN-NEXT: v_readlane_b32 s4, v0, 4
386 ; GCN-NEXT: v_readlane_b32 s5, v0, 5
387 ; GCN-NEXT: v_readlane_b32 s6, v0, 6
388 ; GCN-NEXT: v_readlane_b32 s7, v0, 7
389 ; GCN-NEXT: ;;#ASMSTART
390 ; GCN-NEXT: ; use s[84:91]
391 ; GCN-NEXT: ;;#ASMEND
392 ; GCN-NEXT: ;;#ASMSTART
393 ; GCN-NEXT: ; use s[76:83]
394 ; GCN-NEXT: ;;#ASMEND
395 ; GCN-NEXT: ;;#ASMSTART
396 ; GCN-NEXT: ; use s[68:75]
397 ; GCN-NEXT: ;;#ASMEND
398 ; GCN-NEXT: ;;#ASMSTART
399 ; GCN-NEXT: ; use s[60:67]
400 ; GCN-NEXT: ;;#ASMEND
401 ; GCN-NEXT: ;;#ASMSTART
402 ; GCN-NEXT: ; use s[52:59]
403 ; GCN-NEXT: ;;#ASMEND
404 ; GCN-NEXT: ;;#ASMSTART
405 ; GCN-NEXT: ; use s[44:51]
406 ; GCN-NEXT: ;;#ASMEND
407 ; GCN-NEXT: ;;#ASMSTART
408 ; GCN-NEXT: ; use s[36:43]
409 ; GCN-NEXT: ;;#ASMEND
410 ; GCN-NEXT: ;;#ASMSTART
411 ; GCN-NEXT: ; use s[24:31]
412 ; GCN-NEXT: ;;#ASMEND
413 ; GCN-NEXT: ;;#ASMSTART
414 ; GCN-NEXT: ; use s[16:23]
415 ; GCN-NEXT: ;;#ASMEND
416 ; GCN-NEXT: ;;#ASMSTART
417 ; GCN-NEXT: ; use s[8:15]
418 ; GCN-NEXT: ;;#ASMEND
419 ; GCN-NEXT: ;;#ASMSTART
420 ; GCN-NEXT: ; use s[0:7]
421 ; GCN-NEXT: ;;#ASMEND
422 ; GCN-NEXT: .LBB0_2: ; %ret
423 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
424 ; GCN-NEXT: buffer_load_dword v0, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload
425 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
426 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
427 ; GCN-NEXT: buffer_load_dword v1, off, s[92:95], 0 offset:8 ; 4-byte Folded Reload
428 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
429 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
430 ; GCN-NEXT: buffer_load_dword v2, off, s[92:95], 0 offset:12 ; 4-byte Folded Reload
431 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
432 ; GCN-NEXT: ; kill: killed $vgpr2
433 ; GCN-NEXT: ; kill: killed $vgpr1
434 ; GCN-NEXT: ; kill: killed $vgpr0
436 %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
437 %wide.sgpr1 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
438 %wide.sgpr2 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
439 %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
440 %wide.sgpr4 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
441 %wide.sgpr5 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
442 %wide.sgpr6 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
443 %wide.sgpr7 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
444 %wide.sgpr8 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
445 %wide.sgpr9 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
446 %wide.sgpr10 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
447 %wide.sgpr11 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
448 %wide.sgpr12 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
449 %wide.sgpr13 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
450 %wide.sgpr14 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
451 %wide.sgpr15 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
452 %wide.sgpr16 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
453 %cmp = icmp eq i32 %in, 0
454 br i1 %cmp, label %bb0, label %ret
457 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr0) #0
458 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr1) #0
459 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr2) #0
460 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0
461 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr4) #0
462 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr5) #0
463 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr6) #0
464 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr7) #0
465 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr8) #0
466 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr9) #0
467 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr10) #0
468 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr11) #0
469 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr12) #0
470 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr13) #0
471 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr14) #0
472 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr15) #0
473 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr16) #0
480 ; Some of the lanes of an SGPR spill are in one VGPR and some forced
481 ; into the next available VGPR.
482 define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %in) #1 {
483 ; GCN-LABEL: split_sgpr_spill_2_vgprs:
485 ; GCN-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
486 ; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
487 ; GCN-NEXT: s_mov_b32 s54, -1
488 ; GCN-NEXT: s_mov_b32 s55, 0xe8f000
489 ; GCN-NEXT: s_add_u32 s52, s52, s11
490 ; GCN-NEXT: s_addc_u32 s53, s53, 0
491 ; GCN-NEXT: ; implicit-def: $vgpr0
492 ; GCN-NEXT: ; implicit-def: $vgpr1
493 ; GCN-NEXT: s_load_dword s0, s[4:5], 0xb
494 ; GCN-NEXT: ;;#ASMSTART
495 ; GCN-NEXT: ; def s[4:19]
496 ; GCN-NEXT: ;;#ASMEND
497 ; GCN-NEXT: v_writelane_b32 v1, s4, 0
498 ; GCN-NEXT: v_writelane_b32 v1, s5, 1
499 ; GCN-NEXT: v_writelane_b32 v1, s6, 2
500 ; GCN-NEXT: v_writelane_b32 v1, s7, 3
501 ; GCN-NEXT: v_writelane_b32 v1, s8, 4
502 ; GCN-NEXT: v_writelane_b32 v1, s9, 5
503 ; GCN-NEXT: v_writelane_b32 v1, s10, 6
504 ; GCN-NEXT: v_writelane_b32 v1, s11, 7
505 ; GCN-NEXT: v_writelane_b32 v1, s12, 8
506 ; GCN-NEXT: v_writelane_b32 v1, s13, 9
507 ; GCN-NEXT: v_writelane_b32 v1, s14, 10
508 ; GCN-NEXT: v_writelane_b32 v1, s15, 11
509 ; GCN-NEXT: v_writelane_b32 v1, s16, 12
510 ; GCN-NEXT: v_writelane_b32 v1, s17, 13
511 ; GCN-NEXT: v_writelane_b32 v1, s18, 14
512 ; GCN-NEXT: v_writelane_b32 v1, s19, 15
513 ; GCN-NEXT: ;;#ASMSTART
514 ; GCN-NEXT: ; def s[4:19]
515 ; GCN-NEXT: ;;#ASMEND
516 ; GCN-NEXT: v_writelane_b32 v1, s4, 16
517 ; GCN-NEXT: v_writelane_b32 v1, s5, 17
518 ; GCN-NEXT: v_writelane_b32 v1, s6, 18
519 ; GCN-NEXT: v_writelane_b32 v1, s7, 19
520 ; GCN-NEXT: v_writelane_b32 v1, s8, 20
521 ; GCN-NEXT: v_writelane_b32 v1, s9, 21
522 ; GCN-NEXT: v_writelane_b32 v1, s10, 22
523 ; GCN-NEXT: v_writelane_b32 v1, s11, 23
524 ; GCN-NEXT: v_writelane_b32 v1, s12, 24
525 ; GCN-NEXT: v_writelane_b32 v1, s13, 25
526 ; GCN-NEXT: v_writelane_b32 v1, s14, 26
527 ; GCN-NEXT: v_writelane_b32 v1, s15, 27
528 ; GCN-NEXT: v_writelane_b32 v1, s16, 28
529 ; GCN-NEXT: v_writelane_b32 v1, s17, 29
530 ; GCN-NEXT: v_writelane_b32 v1, s18, 30
531 ; GCN-NEXT: v_writelane_b32 v1, s19, 31
532 ; GCN-NEXT: ;;#ASMSTART
533 ; GCN-NEXT: ; def s[4:19]
534 ; GCN-NEXT: ;;#ASMEND
535 ; GCN-NEXT: v_writelane_b32 v1, s4, 32
536 ; GCN-NEXT: v_writelane_b32 v1, s5, 33
537 ; GCN-NEXT: v_writelane_b32 v1, s6, 34
538 ; GCN-NEXT: v_writelane_b32 v1, s7, 35
539 ; GCN-NEXT: v_writelane_b32 v1, s8, 36
540 ; GCN-NEXT: v_writelane_b32 v1, s9, 37
541 ; GCN-NEXT: v_writelane_b32 v1, s10, 38
542 ; GCN-NEXT: v_writelane_b32 v1, s11, 39
543 ; GCN-NEXT: v_writelane_b32 v1, s12, 40
544 ; GCN-NEXT: v_writelane_b32 v1, s13, 41
545 ; GCN-NEXT: v_writelane_b32 v1, s14, 42
546 ; GCN-NEXT: v_writelane_b32 v1, s15, 43
547 ; GCN-NEXT: v_writelane_b32 v1, s16, 44
548 ; GCN-NEXT: v_writelane_b32 v1, s17, 45
549 ; GCN-NEXT: v_writelane_b32 v1, s18, 46
550 ; GCN-NEXT: v_writelane_b32 v1, s19, 47
551 ; GCN-NEXT: ;;#ASMSTART
552 ; GCN-NEXT: ; def s[4:19]
553 ; GCN-NEXT: ;;#ASMEND
554 ; GCN-NEXT: v_writelane_b32 v1, s4, 48
555 ; GCN-NEXT: v_writelane_b32 v1, s5, 49
556 ; GCN-NEXT: v_writelane_b32 v1, s6, 50
557 ; GCN-NEXT: v_writelane_b32 v1, s7, 51
558 ; GCN-NEXT: v_writelane_b32 v1, s8, 52
559 ; GCN-NEXT: v_writelane_b32 v1, s9, 53
560 ; GCN-NEXT: v_writelane_b32 v1, s10, 54
561 ; GCN-NEXT: v_writelane_b32 v1, s11, 55
562 ; GCN-NEXT: v_writelane_b32 v1, s12, 56
563 ; GCN-NEXT: v_writelane_b32 v1, s13, 57
564 ; GCN-NEXT: v_writelane_b32 v1, s14, 58
565 ; GCN-NEXT: v_writelane_b32 v1, s15, 59
566 ; GCN-NEXT: v_writelane_b32 v1, s16, 60
567 ; GCN-NEXT: v_writelane_b32 v1, s17, 61
568 ; GCN-NEXT: v_writelane_b32 v1, s18, 62
569 ; GCN-NEXT: v_writelane_b32 v1, s19, 63
570 ; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
571 ; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Spill
572 ; GCN-NEXT: s_mov_b64 exec, s[28:29]
573 ; GCN-NEXT: ;;#ASMSTART
574 ; GCN-NEXT: ; def s[4:11]
575 ; GCN-NEXT: ;;#ASMEND
576 ; GCN-NEXT: v_writelane_b32 v0, s4, 0
577 ; GCN-NEXT: v_writelane_b32 v0, s5, 1
578 ; GCN-NEXT: v_writelane_b32 v0, s6, 2
579 ; GCN-NEXT: v_writelane_b32 v0, s7, 3
580 ; GCN-NEXT: v_writelane_b32 v0, s8, 4
581 ; GCN-NEXT: v_writelane_b32 v0, s9, 5
582 ; GCN-NEXT: v_writelane_b32 v0, s10, 6
583 ; GCN-NEXT: v_writelane_b32 v0, s11, 7
584 ; GCN-NEXT: ;;#ASMSTART
585 ; GCN-NEXT: ; def s[2:3]
586 ; GCN-NEXT: ;;#ASMEND
587 ; GCN-NEXT: v_writelane_b32 v0, s2, 8
588 ; GCN-NEXT: v_writelane_b32 v0, s3, 9
589 ; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
590 ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
591 ; GCN-NEXT: s_mov_b64 exec, s[28:29]
592 ; GCN-NEXT: s_mov_b32 s1, 0
593 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
594 ; GCN-NEXT: s_cmp_lg_u32 s0, s1
595 ; GCN-NEXT: s_cbranch_scc1 .LBB1_2
596 ; GCN-NEXT: ; %bb.1: ; %bb0
597 ; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
598 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
599 ; GCN-NEXT: s_mov_b64 exec, s[28:29]
600 ; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
601 ; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
602 ; GCN-NEXT: s_mov_b64 exec, s[28:29]
603 ; GCN-NEXT: s_waitcnt vmcnt(0)
604 ; GCN-NEXT: v_readlane_b32 s16, v1, 8
605 ; GCN-NEXT: v_readlane_b32 s17, v1, 9
606 ; GCN-NEXT: v_readlane_b32 s20, v1, 0
607 ; GCN-NEXT: v_readlane_b32 s21, v1, 1
608 ; GCN-NEXT: v_readlane_b32 s22, v1, 2
609 ; GCN-NEXT: v_readlane_b32 s23, v1, 3
610 ; GCN-NEXT: v_readlane_b32 s24, v1, 4
611 ; GCN-NEXT: v_readlane_b32 s25, v1, 5
612 ; GCN-NEXT: v_readlane_b32 s26, v1, 6
613 ; GCN-NEXT: v_readlane_b32 s27, v1, 7
614 ; GCN-NEXT: v_readlane_b32 s36, v0, 32
615 ; GCN-NEXT: v_readlane_b32 s37, v0, 33
616 ; GCN-NEXT: v_readlane_b32 s38, v0, 34
617 ; GCN-NEXT: v_readlane_b32 s39, v0, 35
618 ; GCN-NEXT: v_readlane_b32 s40, v0, 36
619 ; GCN-NEXT: v_readlane_b32 s41, v0, 37
620 ; GCN-NEXT: v_readlane_b32 s42, v0, 38
621 ; GCN-NEXT: v_readlane_b32 s43, v0, 39
622 ; GCN-NEXT: v_readlane_b32 s44, v0, 40
623 ; GCN-NEXT: v_readlane_b32 s45, v0, 41
624 ; GCN-NEXT: v_readlane_b32 s46, v0, 42
625 ; GCN-NEXT: v_readlane_b32 s47, v0, 43
626 ; GCN-NEXT: v_readlane_b32 s48, v0, 44
627 ; GCN-NEXT: v_readlane_b32 s49, v0, 45
628 ; GCN-NEXT: v_readlane_b32 s50, v0, 46
629 ; GCN-NEXT: v_readlane_b32 s51, v0, 47
630 ; GCN-NEXT: v_readlane_b32 s0, v0, 0
631 ; GCN-NEXT: v_readlane_b32 s1, v0, 1
632 ; GCN-NEXT: v_readlane_b32 s2, v0, 2
633 ; GCN-NEXT: v_readlane_b32 s3, v0, 3
634 ; GCN-NEXT: v_readlane_b32 s4, v0, 4
635 ; GCN-NEXT: v_readlane_b32 s5, v0, 5
636 ; GCN-NEXT: v_readlane_b32 s6, v0, 6
637 ; GCN-NEXT: v_readlane_b32 s7, v0, 7
638 ; GCN-NEXT: v_readlane_b32 s8, v0, 8
639 ; GCN-NEXT: v_readlane_b32 s9, v0, 9
640 ; GCN-NEXT: v_readlane_b32 s10, v0, 10
641 ; GCN-NEXT: v_readlane_b32 s11, v0, 11
642 ; GCN-NEXT: v_readlane_b32 s12, v0, 12
643 ; GCN-NEXT: v_readlane_b32 s13, v0, 13
644 ; GCN-NEXT: v_readlane_b32 s14, v0, 14
645 ; GCN-NEXT: v_readlane_b32 s15, v0, 15
646 ; GCN-NEXT: ;;#ASMSTART
647 ; GCN-NEXT: ; use s[0:15]
648 ; GCN-NEXT: ;;#ASMEND
649 ; GCN-NEXT: v_readlane_b32 s0, v0, 16
650 ; GCN-NEXT: v_readlane_b32 s1, v0, 17
651 ; GCN-NEXT: v_readlane_b32 s2, v0, 18
652 ; GCN-NEXT: v_readlane_b32 s3, v0, 19
653 ; GCN-NEXT: v_readlane_b32 s4, v0, 20
654 ; GCN-NEXT: v_readlane_b32 s5, v0, 21
655 ; GCN-NEXT: v_readlane_b32 s6, v0, 22
656 ; GCN-NEXT: v_readlane_b32 s7, v0, 23
657 ; GCN-NEXT: v_readlane_b32 s8, v0, 24
658 ; GCN-NEXT: v_readlane_b32 s9, v0, 25
659 ; GCN-NEXT: v_readlane_b32 s10, v0, 26
660 ; GCN-NEXT: v_readlane_b32 s11, v0, 27
661 ; GCN-NEXT: v_readlane_b32 s12, v0, 28
662 ; GCN-NEXT: v_readlane_b32 s13, v0, 29
663 ; GCN-NEXT: v_readlane_b32 s14, v0, 30
664 ; GCN-NEXT: v_readlane_b32 s15, v0, 31
665 ; GCN-NEXT: ;;#ASMSTART
666 ; GCN-NEXT: ; use s[0:15]
667 ; GCN-NEXT: ;;#ASMEND
668 ; GCN-NEXT: v_readlane_b32 s0, v0, 48
669 ; GCN-NEXT: v_readlane_b32 s1, v0, 49
670 ; GCN-NEXT: v_readlane_b32 s2, v0, 50
671 ; GCN-NEXT: v_readlane_b32 s3, v0, 51
672 ; GCN-NEXT: v_readlane_b32 s4, v0, 52
673 ; GCN-NEXT: v_readlane_b32 s5, v0, 53
674 ; GCN-NEXT: v_readlane_b32 s6, v0, 54
675 ; GCN-NEXT: v_readlane_b32 s7, v0, 55
676 ; GCN-NEXT: v_readlane_b32 s8, v0, 56
677 ; GCN-NEXT: v_readlane_b32 s9, v0, 57
678 ; GCN-NEXT: v_readlane_b32 s10, v0, 58
679 ; GCN-NEXT: v_readlane_b32 s11, v0, 59
680 ; GCN-NEXT: v_readlane_b32 s12, v0, 60
681 ; GCN-NEXT: v_readlane_b32 s13, v0, 61
682 ; GCN-NEXT: v_readlane_b32 s14, v0, 62
683 ; GCN-NEXT: v_readlane_b32 s15, v0, 63
684 ; GCN-NEXT: ;;#ASMSTART
685 ; GCN-NEXT: ; use s[36:51]
686 ; GCN-NEXT: ;;#ASMEND
687 ; GCN-NEXT: ;;#ASMSTART
688 ; GCN-NEXT: ; use s[20:27]
689 ; GCN-NEXT: ;;#ASMEND
690 ; GCN-NEXT: ;;#ASMSTART
691 ; GCN-NEXT: ; use s[16:17]
692 ; GCN-NEXT: ;;#ASMEND
693 ; GCN-NEXT: ;;#ASMSTART
694 ; GCN-NEXT: ; use s[0:15]
695 ; GCN-NEXT: ;;#ASMEND
696 ; GCN-NEXT: .LBB1_2: ; %ret
697 ; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
698 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
699 ; GCN-NEXT: s_mov_b64 exec, s[28:29]
700 ; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
701 ; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
702 ; GCN-NEXT: s_mov_b64 exec, s[28:29]
703 ; GCN-NEXT: ; kill: killed $vgpr1
704 ; GCN-NEXT: ; kill: killed $vgpr0
706 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
707 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
708 %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
709 %wide.sgpr5 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
710 %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
711 %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
713 %cmp = icmp eq i32 %in, 0
714 br i1 %cmp, label %bb0, label %ret
717 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
718 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
719 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
720 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0
721 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
722 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr5) #0
729 ; The first 64 SGPR spills can go to a VGPR, but there isn't a second
730 ; so some spills must be to memory. The last 16 element spill runs out
731 ; of lanes at the 15th element.
732 define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %in) #1 {
733 ; GCN-LABEL: no_vgprs_last_sgpr_spill:
735 ; GCN-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
736 ; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
737 ; GCN-NEXT: s_mov_b32 s54, -1
738 ; GCN-NEXT: s_mov_b32 s55, 0xe8f000
739 ; GCN-NEXT: s_add_u32 s52, s52, s11
740 ; GCN-NEXT: s_addc_u32 s53, s53, 0
741 ; GCN-NEXT: ; implicit-def: $vgpr0
742 ; GCN-NEXT: ; implicit-def: $vgpr0
743 ; GCN-NEXT: s_load_dword s0, s[4:5], 0xb
744 ; GCN-NEXT: ;;#ASMSTART
745 ; GCN-NEXT: ;;#ASMEND
746 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
747 ; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
748 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
749 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
750 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
751 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
752 ; GCN-NEXT: ;;#ASMSTART
753 ; GCN-NEXT: ;;#ASMEND
754 ; GCN-NEXT: ;;#ASMSTART
755 ; GCN-NEXT: ;;#ASMEND
756 ; GCN-NEXT: ;;#ASMSTART
757 ; GCN-NEXT: ;;#ASMEND
758 ; GCN-NEXT: ;;#ASMSTART
759 ; GCN-NEXT: ;;#ASMEND
760 ; GCN-NEXT: ;;#ASMSTART
761 ; GCN-NEXT: ;;#ASMEND
762 ; GCN-NEXT: ;;#ASMSTART
763 ; GCN-NEXT: ; def s[4:19]
764 ; GCN-NEXT: ;;#ASMEND
765 ; GCN-NEXT: s_waitcnt vmcnt(1)
766 ; GCN-NEXT: v_writelane_b32 v1, s4, 0
767 ; GCN-NEXT: v_writelane_b32 v1, s5, 1
768 ; GCN-NEXT: v_writelane_b32 v1, s6, 2
769 ; GCN-NEXT: v_writelane_b32 v1, s7, 3
770 ; GCN-NEXT: v_writelane_b32 v1, s8, 4
771 ; GCN-NEXT: v_writelane_b32 v1, s9, 5
772 ; GCN-NEXT: v_writelane_b32 v1, s10, 6
773 ; GCN-NEXT: v_writelane_b32 v1, s11, 7
774 ; GCN-NEXT: v_writelane_b32 v1, s12, 8
775 ; GCN-NEXT: v_writelane_b32 v1, s13, 9
776 ; GCN-NEXT: v_writelane_b32 v1, s14, 10
777 ; GCN-NEXT: v_writelane_b32 v1, s15, 11
778 ; GCN-NEXT: v_writelane_b32 v1, s16, 12
779 ; GCN-NEXT: v_writelane_b32 v1, s17, 13
780 ; GCN-NEXT: v_writelane_b32 v1, s18, 14
781 ; GCN-NEXT: v_writelane_b32 v1, s19, 15
782 ; GCN-NEXT: ;;#ASMSTART
783 ; GCN-NEXT: ; def s[4:19]
784 ; GCN-NEXT: ;;#ASMEND
785 ; GCN-NEXT: v_writelane_b32 v1, s4, 16
786 ; GCN-NEXT: v_writelane_b32 v1, s5, 17
787 ; GCN-NEXT: v_writelane_b32 v1, s6, 18
788 ; GCN-NEXT: v_writelane_b32 v1, s7, 19
789 ; GCN-NEXT: v_writelane_b32 v1, s8, 20
790 ; GCN-NEXT: v_writelane_b32 v1, s9, 21
791 ; GCN-NEXT: v_writelane_b32 v1, s10, 22
792 ; GCN-NEXT: v_writelane_b32 v1, s11, 23
793 ; GCN-NEXT: v_writelane_b32 v1, s12, 24
794 ; GCN-NEXT: v_writelane_b32 v1, s13, 25
795 ; GCN-NEXT: v_writelane_b32 v1, s14, 26
796 ; GCN-NEXT: v_writelane_b32 v1, s15, 27
797 ; GCN-NEXT: v_writelane_b32 v1, s16, 28
798 ; GCN-NEXT: v_writelane_b32 v1, s17, 29
799 ; GCN-NEXT: v_writelane_b32 v1, s18, 30
800 ; GCN-NEXT: v_writelane_b32 v1, s19, 31
801 ; GCN-NEXT: ;;#ASMSTART
802 ; GCN-NEXT: ; def s[4:19]
803 ; GCN-NEXT: ;;#ASMEND
804 ; GCN-NEXT: v_writelane_b32 v1, s4, 32
805 ; GCN-NEXT: v_writelane_b32 v1, s5, 33
806 ; GCN-NEXT: v_writelane_b32 v1, s6, 34
807 ; GCN-NEXT: v_writelane_b32 v1, s7, 35
808 ; GCN-NEXT: v_writelane_b32 v1, s8, 36
809 ; GCN-NEXT: v_writelane_b32 v1, s9, 37
810 ; GCN-NEXT: v_writelane_b32 v1, s10, 38
811 ; GCN-NEXT: v_writelane_b32 v1, s11, 39
812 ; GCN-NEXT: v_writelane_b32 v1, s12, 40
813 ; GCN-NEXT: v_writelane_b32 v1, s13, 41
814 ; GCN-NEXT: v_writelane_b32 v1, s14, 42
815 ; GCN-NEXT: v_writelane_b32 v1, s15, 43
816 ; GCN-NEXT: v_writelane_b32 v1, s16, 44
817 ; GCN-NEXT: v_writelane_b32 v1, s17, 45
818 ; GCN-NEXT: v_writelane_b32 v1, s18, 46
819 ; GCN-NEXT: v_writelane_b32 v1, s19, 47
820 ; GCN-NEXT: ;;#ASMSTART
821 ; GCN-NEXT: ; def s[4:19]
822 ; GCN-NEXT: ;;#ASMEND
823 ; GCN-NEXT: v_writelane_b32 v1, s4, 48
824 ; GCN-NEXT: v_writelane_b32 v1, s5, 49
825 ; GCN-NEXT: v_writelane_b32 v1, s6, 50
826 ; GCN-NEXT: v_writelane_b32 v1, s7, 51
827 ; GCN-NEXT: v_writelane_b32 v1, s8, 52
828 ; GCN-NEXT: v_writelane_b32 v1, s9, 53
829 ; GCN-NEXT: v_writelane_b32 v1, s10, 54
830 ; GCN-NEXT: v_writelane_b32 v1, s11, 55
831 ; GCN-NEXT: v_writelane_b32 v1, s12, 56
832 ; GCN-NEXT: v_writelane_b32 v1, s13, 57
833 ; GCN-NEXT: v_writelane_b32 v1, s14, 58
834 ; GCN-NEXT: v_writelane_b32 v1, s15, 59
835 ; GCN-NEXT: v_writelane_b32 v1, s16, 60
836 ; GCN-NEXT: v_writelane_b32 v1, s17, 61
837 ; GCN-NEXT: v_writelane_b32 v1, s18, 62
838 ; GCN-NEXT: v_writelane_b32 v1, s19, 63
839 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
840 ; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Spill
841 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
842 ; GCN-NEXT: ;;#ASMSTART
843 ; GCN-NEXT: ; def s[2:3]
844 ; GCN-NEXT: ;;#ASMEND
845 ; GCN-NEXT: s_waitcnt vmcnt(1)
846 ; GCN-NEXT: v_writelane_b32 v0, s2, 0
847 ; GCN-NEXT: v_writelane_b32 v0, s3, 1
848 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
849 ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
850 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
851 ; GCN-NEXT: s_mov_b32 s1, 0
852 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
853 ; GCN-NEXT: s_cmp_lg_u32 s0, s1
854 ; GCN-NEXT: s_cbranch_scc1 .LBB2_2
855 ; GCN-NEXT: ; %bb.1: ; %bb0
856 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
857 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
858 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
859 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
860 ; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
861 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
862 ; GCN-NEXT: s_waitcnt vmcnt(0)
863 ; GCN-NEXT: v_readlane_b32 s36, v1, 32
864 ; GCN-NEXT: v_readlane_b32 s37, v1, 33
865 ; GCN-NEXT: v_readlane_b32 s38, v1, 34
866 ; GCN-NEXT: v_readlane_b32 s39, v1, 35
867 ; GCN-NEXT: v_readlane_b32 s40, v1, 36
868 ; GCN-NEXT: v_readlane_b32 s41, v1, 37
869 ; GCN-NEXT: v_readlane_b32 s42, v1, 38
870 ; GCN-NEXT: v_readlane_b32 s43, v1, 39
871 ; GCN-NEXT: v_readlane_b32 s44, v1, 40
872 ; GCN-NEXT: v_readlane_b32 s45, v1, 41
873 ; GCN-NEXT: v_readlane_b32 s46, v1, 42
874 ; GCN-NEXT: v_readlane_b32 s47, v1, 43
875 ; GCN-NEXT: v_readlane_b32 s48, v1, 44
876 ; GCN-NEXT: v_readlane_b32 s49, v1, 45
877 ; GCN-NEXT: v_readlane_b32 s50, v1, 46
878 ; GCN-NEXT: v_readlane_b32 s51, v1, 47
879 ; GCN-NEXT: v_readlane_b32 s0, v1, 16
880 ; GCN-NEXT: v_readlane_b32 s1, v1, 17
881 ; GCN-NEXT: v_readlane_b32 s2, v1, 18
882 ; GCN-NEXT: v_readlane_b32 s3, v1, 19
883 ; GCN-NEXT: v_readlane_b32 s4, v1, 20
884 ; GCN-NEXT: v_readlane_b32 s5, v1, 21
885 ; GCN-NEXT: v_readlane_b32 s6, v1, 22
886 ; GCN-NEXT: v_readlane_b32 s7, v1, 23
887 ; GCN-NEXT: v_readlane_b32 s8, v1, 24
888 ; GCN-NEXT: v_readlane_b32 s9, v1, 25
889 ; GCN-NEXT: v_readlane_b32 s10, v1, 26
890 ; GCN-NEXT: v_readlane_b32 s11, v1, 27
891 ; GCN-NEXT: v_readlane_b32 s12, v1, 28
892 ; GCN-NEXT: v_readlane_b32 s13, v1, 29
893 ; GCN-NEXT: v_readlane_b32 s14, v1, 30
894 ; GCN-NEXT: v_readlane_b32 s15, v1, 31
895 ; GCN-NEXT: v_readlane_b32 s16, v1, 0
896 ; GCN-NEXT: v_readlane_b32 s17, v1, 1
897 ; GCN-NEXT: v_readlane_b32 s18, v1, 2
898 ; GCN-NEXT: v_readlane_b32 s19, v1, 3
899 ; GCN-NEXT: v_readlane_b32 s20, v1, 4
900 ; GCN-NEXT: v_readlane_b32 s21, v1, 5
901 ; GCN-NEXT: v_readlane_b32 s22, v1, 6
902 ; GCN-NEXT: v_readlane_b32 s23, v1, 7
903 ; GCN-NEXT: v_readlane_b32 s24, v1, 8
904 ; GCN-NEXT: v_readlane_b32 s25, v1, 9
905 ; GCN-NEXT: v_readlane_b32 s26, v1, 10
906 ; GCN-NEXT: v_readlane_b32 s27, v1, 11
907 ; GCN-NEXT: v_readlane_b32 s28, v1, 12
908 ; GCN-NEXT: v_readlane_b32 s29, v1, 13
909 ; GCN-NEXT: v_readlane_b32 s30, v1, 14
910 ; GCN-NEXT: v_readlane_b32 s31, v1, 15
911 ; GCN-NEXT: ;;#ASMSTART
912 ; GCN-NEXT: ; use s[16:31]
913 ; GCN-NEXT: ;;#ASMEND
914 ; GCN-NEXT: ;;#ASMSTART
915 ; GCN-NEXT: ; use s[0:15]
916 ; GCN-NEXT: ;;#ASMEND
917 ; GCN-NEXT: v_readlane_b32 s4, v1, 48
918 ; GCN-NEXT: v_readlane_b32 s5, v1, 49
919 ; GCN-NEXT: v_readlane_b32 s6, v1, 50
920 ; GCN-NEXT: v_readlane_b32 s7, v1, 51
921 ; GCN-NEXT: v_readlane_b32 s8, v1, 52
922 ; GCN-NEXT: v_readlane_b32 s9, v1, 53
923 ; GCN-NEXT: v_readlane_b32 s10, v1, 54
924 ; GCN-NEXT: v_readlane_b32 s11, v1, 55
925 ; GCN-NEXT: v_readlane_b32 s12, v1, 56
926 ; GCN-NEXT: v_readlane_b32 s13, v1, 57
927 ; GCN-NEXT: v_readlane_b32 s14, v1, 58
928 ; GCN-NEXT: v_readlane_b32 s15, v1, 59
929 ; GCN-NEXT: v_readlane_b32 s16, v1, 60
930 ; GCN-NEXT: v_readlane_b32 s17, v1, 61
931 ; GCN-NEXT: v_readlane_b32 s18, v1, 62
932 ; GCN-NEXT: v_readlane_b32 s19, v1, 63
933 ; GCN-NEXT: v_readlane_b32 s0, v0, 0
934 ; GCN-NEXT: v_readlane_b32 s1, v0, 1
935 ; GCN-NEXT: ;;#ASMSTART
936 ; GCN-NEXT: ; use s[36:51]
937 ; GCN-NEXT: ;;#ASMEND
938 ; GCN-NEXT: ;;#ASMSTART
939 ; GCN-NEXT: ; use s[4:19]
940 ; GCN-NEXT: ;;#ASMEND
941 ; GCN-NEXT: ;;#ASMSTART
942 ; GCN-NEXT: ; use s[0:1]
943 ; GCN-NEXT: ;;#ASMEND
944 ; GCN-NEXT: .LBB2_2: ; %ret
945 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
946 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
947 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
948 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
949 ; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
950 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
951 ; GCN-NEXT: ; kill: killed $vgpr1
952 ; GCN-NEXT: ; kill: killed $vgpr0
954 call void asm sideeffect "", "~{v[0:7]}" () #0
955 call void asm sideeffect "", "~{v[8:15]}" () #0
956 call void asm sideeffect "", "~{v[16:23]}" () #0
957 call void asm sideeffect "", "~{v[24:27]}"() #0
958 call void asm sideeffect "", "~{v[28:29]}"() #0
959 call void asm sideeffect "", "~{v30}"() #0
961 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
962 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
963 %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
964 %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
965 %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
966 %cmp = icmp eq i32 %in, 0
967 br i1 %cmp, label %bb0, label %ret
970 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
971 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
972 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
973 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) #0
974 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
981 ; Same as @no_vgprs_last_sgpr_spill, some SGPR spills must go to memory.
982 ; Additionally, v0 is live throughout the function.
983 define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
984 ; GCN-LABEL: no_vgprs_last_sgpr_spill_live_v0:
986 ; GCN-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
987 ; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
988 ; GCN-NEXT: s_mov_b32 s54, -1
989 ; GCN-NEXT: s_mov_b32 s55, 0xe8f000
990 ; GCN-NEXT: s_add_u32 s52, s52, s11
991 ; GCN-NEXT: s_addc_u32 s53, s53, 0
992 ; GCN-NEXT: ; implicit-def: $vgpr0
993 ; GCN-NEXT: ; implicit-def: $vgpr0
994 ; GCN-NEXT: s_load_dword s0, s[4:5], 0x9
995 ; GCN-NEXT: ;;#ASMSTART
996 ; GCN-NEXT: ;;#ASMEND
997 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
998 ; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
999 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
1000 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
1001 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
1002 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
1003 ; GCN-NEXT: ;;#ASMSTART
1004 ; GCN-NEXT: ;;#ASMEND
1005 ; GCN-NEXT: ;;#ASMSTART
1006 ; GCN-NEXT: ;;#ASMEND
1007 ; GCN-NEXT: ;;#ASMSTART
1008 ; GCN-NEXT: ;;#ASMEND
1009 ; GCN-NEXT: ;;#ASMSTART
1010 ; GCN-NEXT: ;;#ASMEND
1011 ; GCN-NEXT: ;;#ASMSTART
1012 ; GCN-NEXT: ;;#ASMEND
1013 ; GCN-NEXT: ;;#ASMSTART
1014 ; GCN-NEXT: ; def s[4:19]
1015 ; GCN-NEXT: ;;#ASMEND
1016 ; GCN-NEXT: s_waitcnt vmcnt(1)
1017 ; GCN-NEXT: v_writelane_b32 v1, s4, 0
1018 ; GCN-NEXT: v_writelane_b32 v1, s5, 1
1019 ; GCN-NEXT: v_writelane_b32 v1, s6, 2
1020 ; GCN-NEXT: v_writelane_b32 v1, s7, 3
1021 ; GCN-NEXT: v_writelane_b32 v1, s8, 4
1022 ; GCN-NEXT: v_writelane_b32 v1, s9, 5
1023 ; GCN-NEXT: v_writelane_b32 v1, s10, 6
1024 ; GCN-NEXT: v_writelane_b32 v1, s11, 7
1025 ; GCN-NEXT: v_writelane_b32 v1, s12, 8
1026 ; GCN-NEXT: v_writelane_b32 v1, s13, 9
1027 ; GCN-NEXT: v_writelane_b32 v1, s14, 10
1028 ; GCN-NEXT: v_writelane_b32 v1, s15, 11
1029 ; GCN-NEXT: v_writelane_b32 v1, s16, 12
1030 ; GCN-NEXT: v_writelane_b32 v1, s17, 13
1031 ; GCN-NEXT: v_writelane_b32 v1, s18, 14
1032 ; GCN-NEXT: v_writelane_b32 v1, s19, 15
1033 ; GCN-NEXT: ;;#ASMSTART
1034 ; GCN-NEXT: ; def s[4:19]
1035 ; GCN-NEXT: ;;#ASMEND
1036 ; GCN-NEXT: v_writelane_b32 v1, s4, 16
1037 ; GCN-NEXT: v_writelane_b32 v1, s5, 17
1038 ; GCN-NEXT: v_writelane_b32 v1, s6, 18
1039 ; GCN-NEXT: v_writelane_b32 v1, s7, 19
1040 ; GCN-NEXT: v_writelane_b32 v1, s8, 20
1041 ; GCN-NEXT: v_writelane_b32 v1, s9, 21
1042 ; GCN-NEXT: v_writelane_b32 v1, s10, 22
1043 ; GCN-NEXT: v_writelane_b32 v1, s11, 23
1044 ; GCN-NEXT: v_writelane_b32 v1, s12, 24
1045 ; GCN-NEXT: v_writelane_b32 v1, s13, 25
1046 ; GCN-NEXT: v_writelane_b32 v1, s14, 26
1047 ; GCN-NEXT: v_writelane_b32 v1, s15, 27
1048 ; GCN-NEXT: v_writelane_b32 v1, s16, 28
1049 ; GCN-NEXT: v_writelane_b32 v1, s17, 29
1050 ; GCN-NEXT: v_writelane_b32 v1, s18, 30
1051 ; GCN-NEXT: v_writelane_b32 v1, s19, 31
1052 ; GCN-NEXT: ;;#ASMSTART
1053 ; GCN-NEXT: ; def s[4:19]
1054 ; GCN-NEXT: ;;#ASMEND
1055 ; GCN-NEXT: v_writelane_b32 v1, s4, 32
1056 ; GCN-NEXT: v_writelane_b32 v1, s5, 33
1057 ; GCN-NEXT: v_writelane_b32 v1, s6, 34
1058 ; GCN-NEXT: v_writelane_b32 v1, s7, 35
1059 ; GCN-NEXT: v_writelane_b32 v1, s8, 36
1060 ; GCN-NEXT: v_writelane_b32 v1, s9, 37
1061 ; GCN-NEXT: v_writelane_b32 v1, s10, 38
1062 ; GCN-NEXT: v_writelane_b32 v1, s11, 39
1063 ; GCN-NEXT: v_writelane_b32 v1, s12, 40
1064 ; GCN-NEXT: v_writelane_b32 v1, s13, 41
1065 ; GCN-NEXT: v_writelane_b32 v1, s14, 42
1066 ; GCN-NEXT: v_writelane_b32 v1, s15, 43
1067 ; GCN-NEXT: v_writelane_b32 v1, s16, 44
1068 ; GCN-NEXT: v_writelane_b32 v1, s17, 45
1069 ; GCN-NEXT: v_writelane_b32 v1, s18, 46
1070 ; GCN-NEXT: v_writelane_b32 v1, s19, 47
1071 ; GCN-NEXT: ;;#ASMSTART
1072 ; GCN-NEXT: ; def s[4:19]
1073 ; GCN-NEXT: ;;#ASMEND
1074 ; GCN-NEXT: v_writelane_b32 v1, s4, 48
1075 ; GCN-NEXT: v_writelane_b32 v1, s5, 49
1076 ; GCN-NEXT: v_writelane_b32 v1, s6, 50
1077 ; GCN-NEXT: v_writelane_b32 v1, s7, 51
1078 ; GCN-NEXT: v_writelane_b32 v1, s8, 52
1079 ; GCN-NEXT: v_writelane_b32 v1, s9, 53
1080 ; GCN-NEXT: v_writelane_b32 v1, s10, 54
1081 ; GCN-NEXT: v_writelane_b32 v1, s11, 55
1082 ; GCN-NEXT: v_writelane_b32 v1, s12, 56
1083 ; GCN-NEXT: v_writelane_b32 v1, s13, 57
1084 ; GCN-NEXT: v_writelane_b32 v1, s14, 58
1085 ; GCN-NEXT: v_writelane_b32 v1, s15, 59
1086 ; GCN-NEXT: v_writelane_b32 v1, s16, 60
1087 ; GCN-NEXT: v_writelane_b32 v1, s17, 61
1088 ; GCN-NEXT: v_writelane_b32 v1, s18, 62
1089 ; GCN-NEXT: v_writelane_b32 v1, s19, 63
1090 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
1091 ; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Spill
1092 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
1093 ; GCN-NEXT: ;;#ASMSTART
1094 ; GCN-NEXT: ; def s[2:3]
1095 ; GCN-NEXT: ;;#ASMEND
1096 ; GCN-NEXT: s_waitcnt vmcnt(1)
1097 ; GCN-NEXT: v_writelane_b32 v0, s2, 0
1098 ; GCN-NEXT: v_writelane_b32 v0, s3, 1
1099 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
1100 ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
1101 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
1102 ; GCN-NEXT: s_mov_b32 s1, 0
1103 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
1104 ; GCN-NEXT: s_cmp_lg_u32 s0, s1
1105 ; GCN-NEXT: s_cbranch_scc1 .LBB3_2
1106 ; GCN-NEXT: ; %bb.1: ; %bb0
1107 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
1108 ; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
1109 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
1110 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
1111 ; GCN-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
1112 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
1113 ; GCN-NEXT: s_waitcnt vmcnt(0)
1114 ; GCN-NEXT: v_readlane_b32 s36, v2, 32
1115 ; GCN-NEXT: v_readlane_b32 s37, v2, 33
1116 ; GCN-NEXT: v_readlane_b32 s38, v2, 34
1117 ; GCN-NEXT: v_readlane_b32 s39, v2, 35
1118 ; GCN-NEXT: v_readlane_b32 s40, v2, 36
1119 ; GCN-NEXT: v_readlane_b32 s41, v2, 37
1120 ; GCN-NEXT: v_readlane_b32 s42, v2, 38
1121 ; GCN-NEXT: v_readlane_b32 s43, v2, 39
1122 ; GCN-NEXT: v_readlane_b32 s44, v2, 40
1123 ; GCN-NEXT: v_readlane_b32 s45, v2, 41
1124 ; GCN-NEXT: v_readlane_b32 s46, v2, 42
1125 ; GCN-NEXT: v_readlane_b32 s47, v2, 43
1126 ; GCN-NEXT: v_readlane_b32 s48, v2, 44
1127 ; GCN-NEXT: v_readlane_b32 s49, v2, 45
1128 ; GCN-NEXT: v_readlane_b32 s50, v2, 46
1129 ; GCN-NEXT: v_readlane_b32 s51, v2, 47
1130 ; GCN-NEXT: v_readlane_b32 s0, v2, 16
1131 ; GCN-NEXT: v_readlane_b32 s1, v2, 17
1132 ; GCN-NEXT: v_readlane_b32 s2, v2, 18
1133 ; GCN-NEXT: v_readlane_b32 s3, v2, 19
1134 ; GCN-NEXT: v_readlane_b32 s4, v2, 20
1135 ; GCN-NEXT: v_readlane_b32 s5, v2, 21
1136 ; GCN-NEXT: v_readlane_b32 s6, v2, 22
1137 ; GCN-NEXT: v_readlane_b32 s7, v2, 23
1138 ; GCN-NEXT: v_readlane_b32 s8, v2, 24
1139 ; GCN-NEXT: v_readlane_b32 s9, v2, 25
1140 ; GCN-NEXT: v_readlane_b32 s10, v2, 26
1141 ; GCN-NEXT: v_readlane_b32 s11, v2, 27
1142 ; GCN-NEXT: v_readlane_b32 s12, v2, 28
1143 ; GCN-NEXT: v_readlane_b32 s13, v2, 29
1144 ; GCN-NEXT: v_readlane_b32 s14, v2, 30
1145 ; GCN-NEXT: v_readlane_b32 s15, v2, 31
1146 ; GCN-NEXT: v_readlane_b32 s16, v2, 0
1147 ; GCN-NEXT: v_readlane_b32 s17, v2, 1
1148 ; GCN-NEXT: v_readlane_b32 s18, v2, 2
1149 ; GCN-NEXT: v_readlane_b32 s19, v2, 3
1150 ; GCN-NEXT: v_readlane_b32 s20, v2, 4
1151 ; GCN-NEXT: v_readlane_b32 s21, v2, 5
1152 ; GCN-NEXT: v_readlane_b32 s22, v2, 6
1153 ; GCN-NEXT: v_readlane_b32 s23, v2, 7
1154 ; GCN-NEXT: v_readlane_b32 s24, v2, 8
1155 ; GCN-NEXT: v_readlane_b32 s25, v2, 9
1156 ; GCN-NEXT: v_readlane_b32 s26, v2, 10
1157 ; GCN-NEXT: v_readlane_b32 s27, v2, 11
1158 ; GCN-NEXT: v_readlane_b32 s28, v2, 12
1159 ; GCN-NEXT: v_readlane_b32 s29, v2, 13
1160 ; GCN-NEXT: v_readlane_b32 s30, v2, 14
1161 ; GCN-NEXT: v_readlane_b32 s31, v2, 15
1162 ; GCN-NEXT: ;;#ASMSTART
1163 ; GCN-NEXT: ; def v0
1164 ; GCN-NEXT: ;;#ASMEND
1165 ; GCN-NEXT: ;;#ASMSTART
1166 ; GCN-NEXT: ; use s[16:31]
1167 ; GCN-NEXT: ;;#ASMEND
1168 ; GCN-NEXT: ;;#ASMSTART
1169 ; GCN-NEXT: ; use s[0:15]
1170 ; GCN-NEXT: ;;#ASMEND
1171 ; GCN-NEXT: v_readlane_b32 s4, v2, 48
1172 ; GCN-NEXT: v_readlane_b32 s5, v2, 49
1173 ; GCN-NEXT: v_readlane_b32 s6, v2, 50
1174 ; GCN-NEXT: v_readlane_b32 s7, v2, 51
1175 ; GCN-NEXT: v_readlane_b32 s8, v2, 52
1176 ; GCN-NEXT: v_readlane_b32 s9, v2, 53
1177 ; GCN-NEXT: v_readlane_b32 s10, v2, 54
1178 ; GCN-NEXT: v_readlane_b32 s11, v2, 55
1179 ; GCN-NEXT: v_readlane_b32 s12, v2, 56
1180 ; GCN-NEXT: v_readlane_b32 s13, v2, 57
1181 ; GCN-NEXT: v_readlane_b32 s14, v2, 58
1182 ; GCN-NEXT: v_readlane_b32 s15, v2, 59
1183 ; GCN-NEXT: v_readlane_b32 s16, v2, 60
1184 ; GCN-NEXT: v_readlane_b32 s17, v2, 61
1185 ; GCN-NEXT: v_readlane_b32 s18, v2, 62
1186 ; GCN-NEXT: v_readlane_b32 s19, v2, 63
1187 ; GCN-NEXT: v_readlane_b32 s0, v1, 0
1188 ; GCN-NEXT: v_readlane_b32 s1, v1, 1
1189 ; GCN-NEXT: ;;#ASMSTART
1190 ; GCN-NEXT: ; use s[36:51]
1191 ; GCN-NEXT: ;;#ASMEND
1192 ; GCN-NEXT: ;;#ASMSTART
1193 ; GCN-NEXT: ; use s[4:19]
1194 ; GCN-NEXT: ;;#ASMEND
1195 ; GCN-NEXT: ;;#ASMSTART
1196 ; GCN-NEXT: ; use s[0:1]
1197 ; GCN-NEXT: ;;#ASMEND
1198 ; GCN-NEXT: ;;#ASMSTART
1199 ; GCN-NEXT: ; use v0
1200 ; GCN-NEXT: ;;#ASMEND
1201 ; GCN-NEXT: .LBB3_2: ; %ret
1202 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
1203 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
1204 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
1205 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
1206 ; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
1207 ; GCN-NEXT: s_mov_b64 exec, s[34:35]
1208 ; GCN-NEXT: ; kill: killed $vgpr1
1209 ; GCN-NEXT: ; kill: killed $vgpr0
1210 ; GCN-NEXT: s_endpgm
1211 call void asm sideeffect "", "~{v[0:7]}" () #0
1212 call void asm sideeffect "", "~{v[8:15]}" () #0
1213 call void asm sideeffect "", "~{v[16:23]}" () #0
1214 call void asm sideeffect "", "~{v[24:27]}"() #0
1215 call void asm sideeffect "", "~{v[28:29]}"() #0
1216 call void asm sideeffect "", "~{v30}"() #0
1218 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
1219 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
1220 %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
1221 %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
1222 %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
1223 %cmp = icmp eq i32 %in, 0
1224 br i1 %cmp, label %bb0, label %ret
1227 %vgpr0 = call i32 asm sideeffect "; def $0", "=v" () #0
1228 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
1229 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
1230 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
1231 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) #0
1232 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
1233 call void asm sideeffect "; use $0", "v"(i32 %vgpr0) #0
1240 attributes #0 = { nounwind }
1241 attributes #1 = { nounwind "amdgpu-waves-per-eu"="8,8" }