1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -O0 -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
4 ; FIXME: we should disable sdwa peephole because dead-code elimination, that
5 ; runs after peephole, ruins this test (different register numbers)
7 ; Spill all SGPRs so multiple VGPRs are required for spilling all of them.
9 ; Ideally we only need 2 VGPRs for all spilling. The VGPRs are
10 ; allocated per-frame index, so it's possible to get up with more.
11 define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, i32 %in) #0 {
12 ; GCN-LABEL: spill_sgprs_to_multiple_vgprs:
14 ; GCN-NEXT: s_load_dword s0, s[0:1], 0xb
15 ; GCN-NEXT: ;;#ASMSTART
16 ; GCN-NEXT: ; def s[4:11]
18 ; GCN-NEXT: v_writelane_b32 v0, s4, 0
19 ; GCN-NEXT: v_writelane_b32 v0, s5, 1
20 ; GCN-NEXT: v_writelane_b32 v0, s6, 2
21 ; GCN-NEXT: v_writelane_b32 v0, s7, 3
22 ; GCN-NEXT: v_writelane_b32 v0, s8, 4
23 ; GCN-NEXT: v_writelane_b32 v0, s9, 5
24 ; GCN-NEXT: v_writelane_b32 v0, s10, 6
25 ; GCN-NEXT: v_writelane_b32 v0, s11, 7
26 ; GCN-NEXT: ;;#ASMSTART
27 ; GCN-NEXT: ; def s[4:11]
29 ; GCN-NEXT: v_writelane_b32 v0, s4, 8
30 ; GCN-NEXT: v_writelane_b32 v0, s5, 9
31 ; GCN-NEXT: v_writelane_b32 v0, s6, 10
32 ; GCN-NEXT: v_writelane_b32 v0, s7, 11
33 ; GCN-NEXT: v_writelane_b32 v0, s8, 12
34 ; GCN-NEXT: v_writelane_b32 v0, s9, 13
35 ; GCN-NEXT: v_writelane_b32 v0, s10, 14
36 ; GCN-NEXT: v_writelane_b32 v0, s11, 15
37 ; GCN-NEXT: ;;#ASMSTART
38 ; GCN-NEXT: ; def s[4:11]
40 ; GCN-NEXT: v_writelane_b32 v0, s4, 16
41 ; GCN-NEXT: v_writelane_b32 v0, s5, 17
42 ; GCN-NEXT: v_writelane_b32 v0, s6, 18
43 ; GCN-NEXT: v_writelane_b32 v0, s7, 19
44 ; GCN-NEXT: v_writelane_b32 v0, s8, 20
45 ; GCN-NEXT: v_writelane_b32 v0, s9, 21
46 ; GCN-NEXT: v_writelane_b32 v0, s10, 22
47 ; GCN-NEXT: v_writelane_b32 v0, s11, 23
48 ; GCN-NEXT: ;;#ASMSTART
49 ; GCN-NEXT: ; def s[4:11]
51 ; GCN-NEXT: v_writelane_b32 v0, s4, 24
52 ; GCN-NEXT: v_writelane_b32 v0, s5, 25
53 ; GCN-NEXT: v_writelane_b32 v0, s6, 26
54 ; GCN-NEXT: v_writelane_b32 v0, s7, 27
55 ; GCN-NEXT: v_writelane_b32 v0, s8, 28
56 ; GCN-NEXT: v_writelane_b32 v0, s9, 29
57 ; GCN-NEXT: v_writelane_b32 v0, s10, 30
58 ; GCN-NEXT: v_writelane_b32 v0, s11, 31
59 ; GCN-NEXT: ;;#ASMSTART
60 ; GCN-NEXT: ; def s[4:11]
62 ; GCN-NEXT: v_writelane_b32 v0, s4, 32
63 ; GCN-NEXT: v_writelane_b32 v0, s5, 33
64 ; GCN-NEXT: v_writelane_b32 v0, s6, 34
65 ; GCN-NEXT: v_writelane_b32 v0, s7, 35
66 ; GCN-NEXT: v_writelane_b32 v0, s8, 36
67 ; GCN-NEXT: v_writelane_b32 v0, s9, 37
68 ; GCN-NEXT: v_writelane_b32 v0, s10, 38
69 ; GCN-NEXT: v_writelane_b32 v0, s11, 39
70 ; GCN-NEXT: ;;#ASMSTART
71 ; GCN-NEXT: ; def s[4:11]
73 ; GCN-NEXT: v_writelane_b32 v0, s4, 40
74 ; GCN-NEXT: v_writelane_b32 v0, s5, 41
75 ; GCN-NEXT: v_writelane_b32 v0, s6, 42
76 ; GCN-NEXT: v_writelane_b32 v0, s7, 43
77 ; GCN-NEXT: v_writelane_b32 v0, s8, 44
78 ; GCN-NEXT: v_writelane_b32 v0, s9, 45
79 ; GCN-NEXT: v_writelane_b32 v0, s10, 46
80 ; GCN-NEXT: v_writelane_b32 v0, s11, 47
81 ; GCN-NEXT: ;;#ASMSTART
82 ; GCN-NEXT: ; def s[4:11]
84 ; GCN-NEXT: v_writelane_b32 v0, s4, 48
85 ; GCN-NEXT: v_writelane_b32 v0, s5, 49
86 ; GCN-NEXT: v_writelane_b32 v0, s6, 50
87 ; GCN-NEXT: v_writelane_b32 v0, s7, 51
88 ; GCN-NEXT: v_writelane_b32 v0, s8, 52
89 ; GCN-NEXT: v_writelane_b32 v0, s9, 53
90 ; GCN-NEXT: v_writelane_b32 v0, s10, 54
91 ; GCN-NEXT: v_writelane_b32 v0, s11, 55
92 ; GCN-NEXT: ;;#ASMSTART
93 ; GCN-NEXT: ; def s[4:11]
95 ; GCN-NEXT: v_writelane_b32 v0, s4, 56
96 ; GCN-NEXT: v_writelane_b32 v0, s5, 57
97 ; GCN-NEXT: v_writelane_b32 v0, s6, 58
98 ; GCN-NEXT: v_writelane_b32 v0, s7, 59
99 ; GCN-NEXT: v_writelane_b32 v0, s8, 60
100 ; GCN-NEXT: v_writelane_b32 v0, s9, 61
101 ; GCN-NEXT: v_writelane_b32 v0, s10, 62
102 ; GCN-NEXT: v_writelane_b32 v0, s11, 63
103 ; GCN-NEXT: ;;#ASMSTART
104 ; GCN-NEXT: ; def s[4:11]
105 ; GCN-NEXT: ;;#ASMEND
106 ; GCN-NEXT: v_writelane_b32 v1, s4, 0
107 ; GCN-NEXT: v_writelane_b32 v1, s5, 1
108 ; GCN-NEXT: v_writelane_b32 v1, s6, 2
109 ; GCN-NEXT: v_writelane_b32 v1, s7, 3
110 ; GCN-NEXT: v_writelane_b32 v1, s8, 4
111 ; GCN-NEXT: v_writelane_b32 v1, s9, 5
112 ; GCN-NEXT: v_writelane_b32 v1, s10, 6
113 ; GCN-NEXT: v_writelane_b32 v1, s11, 7
114 ; GCN-NEXT: ;;#ASMSTART
115 ; GCN-NEXT: ; def s[4:11]
116 ; GCN-NEXT: ;;#ASMEND
117 ; GCN-NEXT: v_writelane_b32 v1, s4, 8
118 ; GCN-NEXT: v_writelane_b32 v1, s5, 9
119 ; GCN-NEXT: v_writelane_b32 v1, s6, 10
120 ; GCN-NEXT: v_writelane_b32 v1, s7, 11
121 ; GCN-NEXT: v_writelane_b32 v1, s8, 12
122 ; GCN-NEXT: v_writelane_b32 v1, s9, 13
123 ; GCN-NEXT: v_writelane_b32 v1, s10, 14
124 ; GCN-NEXT: v_writelane_b32 v1, s11, 15
125 ; GCN-NEXT: ;;#ASMSTART
126 ; GCN-NEXT: ; def s[4:11]
127 ; GCN-NEXT: ;;#ASMEND
128 ; GCN-NEXT: v_writelane_b32 v1, s4, 16
129 ; GCN-NEXT: v_writelane_b32 v1, s5, 17
130 ; GCN-NEXT: v_writelane_b32 v1, s6, 18
131 ; GCN-NEXT: v_writelane_b32 v1, s7, 19
132 ; GCN-NEXT: v_writelane_b32 v1, s8, 20
133 ; GCN-NEXT: v_writelane_b32 v1, s9, 21
134 ; GCN-NEXT: v_writelane_b32 v1, s10, 22
135 ; GCN-NEXT: v_writelane_b32 v1, s11, 23
136 ; GCN-NEXT: ;;#ASMSTART
137 ; GCN-NEXT: ; def s[4:11]
138 ; GCN-NEXT: ;;#ASMEND
139 ; GCN-NEXT: v_writelane_b32 v1, s4, 24
140 ; GCN-NEXT: v_writelane_b32 v1, s5, 25
141 ; GCN-NEXT: v_writelane_b32 v1, s6, 26
142 ; GCN-NEXT: v_writelane_b32 v1, s7, 27
143 ; GCN-NEXT: v_writelane_b32 v1, s8, 28
144 ; GCN-NEXT: v_writelane_b32 v1, s9, 29
145 ; GCN-NEXT: v_writelane_b32 v1, s10, 30
146 ; GCN-NEXT: v_writelane_b32 v1, s11, 31
147 ; GCN-NEXT: ;;#ASMSTART
148 ; GCN-NEXT: ; def s[4:11]
149 ; GCN-NEXT: ;;#ASMEND
150 ; GCN-NEXT: v_writelane_b32 v1, s4, 32
151 ; GCN-NEXT: v_writelane_b32 v1, s5, 33
152 ; GCN-NEXT: v_writelane_b32 v1, s6, 34
153 ; GCN-NEXT: v_writelane_b32 v1, s7, 35
154 ; GCN-NEXT: v_writelane_b32 v1, s8, 36
155 ; GCN-NEXT: v_writelane_b32 v1, s9, 37
156 ; GCN-NEXT: v_writelane_b32 v1, s10, 38
157 ; GCN-NEXT: v_writelane_b32 v1, s11, 39
158 ; GCN-NEXT: ;;#ASMSTART
159 ; GCN-NEXT: ; def s[4:11]
160 ; GCN-NEXT: ;;#ASMEND
161 ; GCN-NEXT: v_writelane_b32 v1, s4, 40
162 ; GCN-NEXT: v_writelane_b32 v1, s5, 41
163 ; GCN-NEXT: v_writelane_b32 v1, s6, 42
164 ; GCN-NEXT: v_writelane_b32 v1, s7, 43
165 ; GCN-NEXT: v_writelane_b32 v1, s8, 44
166 ; GCN-NEXT: v_writelane_b32 v1, s9, 45
167 ; GCN-NEXT: v_writelane_b32 v1, s10, 46
168 ; GCN-NEXT: v_writelane_b32 v1, s11, 47
169 ; GCN-NEXT: ;;#ASMSTART
170 ; GCN-NEXT: ; def s[4:11]
171 ; GCN-NEXT: ;;#ASMEND
172 ; GCN-NEXT: v_writelane_b32 v1, s4, 48
173 ; GCN-NEXT: v_writelane_b32 v1, s5, 49
174 ; GCN-NEXT: v_writelane_b32 v1, s6, 50
175 ; GCN-NEXT: v_writelane_b32 v1, s7, 51
176 ; GCN-NEXT: v_writelane_b32 v1, s8, 52
177 ; GCN-NEXT: v_writelane_b32 v1, s9, 53
178 ; GCN-NEXT: v_writelane_b32 v1, s10, 54
179 ; GCN-NEXT: v_writelane_b32 v1, s11, 55
180 ; GCN-NEXT: ;;#ASMSTART
181 ; GCN-NEXT: ; def s[4:11]
182 ; GCN-NEXT: ;;#ASMEND
183 ; GCN-NEXT: v_writelane_b32 v1, s4, 56
184 ; GCN-NEXT: v_writelane_b32 v1, s5, 57
185 ; GCN-NEXT: v_writelane_b32 v1, s6, 58
186 ; GCN-NEXT: v_writelane_b32 v1, s7, 59
187 ; GCN-NEXT: v_writelane_b32 v1, s8, 60
188 ; GCN-NEXT: v_writelane_b32 v1, s9, 61
189 ; GCN-NEXT: v_writelane_b32 v1, s10, 62
190 ; GCN-NEXT: v_writelane_b32 v1, s11, 63
191 ; GCN-NEXT: ;;#ASMSTART
192 ; GCN-NEXT: ; def s[4:11]
193 ; GCN-NEXT: ;;#ASMEND
194 ; GCN-NEXT: v_writelane_b32 v2, s4, 0
195 ; GCN-NEXT: v_writelane_b32 v2, s5, 1
196 ; GCN-NEXT: v_writelane_b32 v2, s6, 2
197 ; GCN-NEXT: v_writelane_b32 v2, s7, 3
198 ; GCN-NEXT: v_writelane_b32 v2, s8, 4
199 ; GCN-NEXT: v_writelane_b32 v2, s9, 5
200 ; GCN-NEXT: v_writelane_b32 v2, s10, 6
201 ; GCN-NEXT: v_writelane_b32 v2, s11, 7
202 ; GCN-NEXT: s_mov_b32 s1, 0
203 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
204 ; GCN-NEXT: s_cmp_lg_u32 s0, s1
205 ; GCN-NEXT: s_cbranch_scc1 BB0_2
206 ; GCN-NEXT: ; %bb.1: ; %bb0
207 ; GCN-NEXT: v_readlane_b32 s8, v1, 56
208 ; GCN-NEXT: v_readlane_b32 s9, v1, 57
209 ; GCN-NEXT: v_readlane_b32 s10, v1, 58
210 ; GCN-NEXT: v_readlane_b32 s11, v1, 59
211 ; GCN-NEXT: v_readlane_b32 s12, v1, 60
212 ; GCN-NEXT: v_readlane_b32 s13, v1, 61
213 ; GCN-NEXT: v_readlane_b32 s14, v1, 62
214 ; GCN-NEXT: v_readlane_b32 s15, v1, 63
215 ; GCN-NEXT: v_readlane_b32 s16, v1, 48
216 ; GCN-NEXT: v_readlane_b32 s17, v1, 49
217 ; GCN-NEXT: v_readlane_b32 s18, v1, 50
218 ; GCN-NEXT: v_readlane_b32 s19, v1, 51
219 ; GCN-NEXT: v_readlane_b32 s20, v1, 52
220 ; GCN-NEXT: v_readlane_b32 s21, v1, 53
221 ; GCN-NEXT: v_readlane_b32 s22, v1, 54
222 ; GCN-NEXT: v_readlane_b32 s23, v1, 55
223 ; GCN-NEXT: v_readlane_b32 s24, v1, 40
224 ; GCN-NEXT: v_readlane_b32 s25, v1, 41
225 ; GCN-NEXT: v_readlane_b32 s26, v1, 42
226 ; GCN-NEXT: v_readlane_b32 s27, v1, 43
227 ; GCN-NEXT: v_readlane_b32 s28, v1, 44
228 ; GCN-NEXT: v_readlane_b32 s29, v1, 45
229 ; GCN-NEXT: v_readlane_b32 s30, v1, 46
230 ; GCN-NEXT: v_readlane_b32 s31, v1, 47
231 ; GCN-NEXT: v_readlane_b32 s36, v1, 32
232 ; GCN-NEXT: v_readlane_b32 s37, v1, 33
233 ; GCN-NEXT: v_readlane_b32 s38, v1, 34
234 ; GCN-NEXT: v_readlane_b32 s39, v1, 35
235 ; GCN-NEXT: v_readlane_b32 s40, v1, 36
236 ; GCN-NEXT: v_readlane_b32 s41, v1, 37
237 ; GCN-NEXT: v_readlane_b32 s42, v1, 38
238 ; GCN-NEXT: v_readlane_b32 s43, v1, 39
239 ; GCN-NEXT: v_readlane_b32 s44, v1, 24
240 ; GCN-NEXT: v_readlane_b32 s45, v1, 25
241 ; GCN-NEXT: v_readlane_b32 s46, v1, 26
242 ; GCN-NEXT: v_readlane_b32 s47, v1, 27
243 ; GCN-NEXT: v_readlane_b32 s48, v1, 28
244 ; GCN-NEXT: v_readlane_b32 s49, v1, 29
245 ; GCN-NEXT: v_readlane_b32 s50, v1, 30
246 ; GCN-NEXT: v_readlane_b32 s51, v1, 31
247 ; GCN-NEXT: v_readlane_b32 s52, v1, 16
248 ; GCN-NEXT: v_readlane_b32 s53, v1, 17
249 ; GCN-NEXT: v_readlane_b32 s54, v1, 18
250 ; GCN-NEXT: v_readlane_b32 s55, v1, 19
251 ; GCN-NEXT: v_readlane_b32 s56, v1, 20
252 ; GCN-NEXT: v_readlane_b32 s57, v1, 21
253 ; GCN-NEXT: v_readlane_b32 s58, v1, 22
254 ; GCN-NEXT: v_readlane_b32 s59, v1, 23
255 ; GCN-NEXT: v_readlane_b32 s60, v1, 8
256 ; GCN-NEXT: v_readlane_b32 s61, v1, 9
257 ; GCN-NEXT: v_readlane_b32 s62, v1, 10
258 ; GCN-NEXT: v_readlane_b32 s63, v1, 11
259 ; GCN-NEXT: v_readlane_b32 s64, v1, 12
260 ; GCN-NEXT: v_readlane_b32 s65, v1, 13
261 ; GCN-NEXT: v_readlane_b32 s66, v1, 14
262 ; GCN-NEXT: v_readlane_b32 s67, v1, 15
263 ; GCN-NEXT: v_readlane_b32 s68, v1, 0
264 ; GCN-NEXT: v_readlane_b32 s69, v1, 1
265 ; GCN-NEXT: v_readlane_b32 s70, v1, 2
266 ; GCN-NEXT: v_readlane_b32 s71, v1, 3
267 ; GCN-NEXT: v_readlane_b32 s72, v1, 4
268 ; GCN-NEXT: v_readlane_b32 s73, v1, 5
269 ; GCN-NEXT: v_readlane_b32 s74, v1, 6
270 ; GCN-NEXT: v_readlane_b32 s75, v1, 7
271 ; GCN-NEXT: v_readlane_b32 s76, v0, 56
272 ; GCN-NEXT: v_readlane_b32 s77, v0, 57
273 ; GCN-NEXT: v_readlane_b32 s78, v0, 58
274 ; GCN-NEXT: v_readlane_b32 s79, v0, 59
275 ; GCN-NEXT: v_readlane_b32 s80, v0, 60
276 ; GCN-NEXT: v_readlane_b32 s81, v0, 61
277 ; GCN-NEXT: v_readlane_b32 s82, v0, 62
278 ; GCN-NEXT: v_readlane_b32 s83, v0, 63
279 ; GCN-NEXT: v_readlane_b32 s84, v0, 48
280 ; GCN-NEXT: v_readlane_b32 s85, v0, 49
281 ; GCN-NEXT: v_readlane_b32 s86, v0, 50
282 ; GCN-NEXT: v_readlane_b32 s87, v0, 51
283 ; GCN-NEXT: v_readlane_b32 s88, v0, 52
284 ; GCN-NEXT: v_readlane_b32 s89, v0, 53
285 ; GCN-NEXT: v_readlane_b32 s90, v0, 54
286 ; GCN-NEXT: v_readlane_b32 s91, v0, 55
287 ; GCN-NEXT: v_readlane_b32 s0, v0, 0
288 ; GCN-NEXT: v_readlane_b32 s1, v0, 1
289 ; GCN-NEXT: v_readlane_b32 s2, v0, 2
290 ; GCN-NEXT: v_readlane_b32 s3, v0, 3
291 ; GCN-NEXT: v_readlane_b32 s4, v0, 4
292 ; GCN-NEXT: v_readlane_b32 s5, v0, 5
293 ; GCN-NEXT: v_readlane_b32 s6, v0, 6
294 ; GCN-NEXT: v_readlane_b32 s7, v0, 7
295 ; GCN-NEXT: ;;#ASMSTART
296 ; GCN-NEXT: ; use s[0:7]
297 ; GCN-NEXT: ;;#ASMEND
298 ; GCN-NEXT: v_readlane_b32 s0, v0, 8
299 ; GCN-NEXT: v_readlane_b32 s1, v0, 9
300 ; GCN-NEXT: v_readlane_b32 s2, v0, 10
301 ; GCN-NEXT: v_readlane_b32 s3, v0, 11
302 ; GCN-NEXT: v_readlane_b32 s4, v0, 12
303 ; GCN-NEXT: v_readlane_b32 s5, v0, 13
304 ; GCN-NEXT: v_readlane_b32 s6, v0, 14
305 ; GCN-NEXT: v_readlane_b32 s7, v0, 15
306 ; GCN-NEXT: ;;#ASMSTART
307 ; GCN-NEXT: ; use s[0:7]
308 ; GCN-NEXT: ;;#ASMEND
309 ; GCN-NEXT: v_readlane_b32 s0, v0, 16
310 ; GCN-NEXT: v_readlane_b32 s1, v0, 17
311 ; GCN-NEXT: v_readlane_b32 s2, v0, 18
312 ; GCN-NEXT: v_readlane_b32 s3, v0, 19
313 ; GCN-NEXT: v_readlane_b32 s4, v0, 20
314 ; GCN-NEXT: v_readlane_b32 s5, v0, 21
315 ; GCN-NEXT: v_readlane_b32 s6, v0, 22
316 ; GCN-NEXT: v_readlane_b32 s7, v0, 23
317 ; GCN-NEXT: ;;#ASMSTART
318 ; GCN-NEXT: ; use s[0:7]
319 ; GCN-NEXT: ;;#ASMEND
320 ; GCN-NEXT: v_readlane_b32 s0, v0, 24
321 ; GCN-NEXT: v_readlane_b32 s1, v0, 25
322 ; GCN-NEXT: v_readlane_b32 s2, v0, 26
323 ; GCN-NEXT: v_readlane_b32 s3, v0, 27
324 ; GCN-NEXT: v_readlane_b32 s4, v0, 28
325 ; GCN-NEXT: v_readlane_b32 s5, v0, 29
326 ; GCN-NEXT: v_readlane_b32 s6, v0, 30
327 ; GCN-NEXT: v_readlane_b32 s7, v0, 31
328 ; GCN-NEXT: ;;#ASMSTART
329 ; GCN-NEXT: ; use s[0:7]
330 ; GCN-NEXT: ;;#ASMEND
331 ; GCN-NEXT: v_readlane_b32 s0, v0, 32
332 ; GCN-NEXT: v_readlane_b32 s1, v0, 33
333 ; GCN-NEXT: v_readlane_b32 s2, v0, 34
334 ; GCN-NEXT: v_readlane_b32 s3, v0, 35
335 ; GCN-NEXT: v_readlane_b32 s4, v0, 36
336 ; GCN-NEXT: v_readlane_b32 s5, v0, 37
337 ; GCN-NEXT: v_readlane_b32 s6, v0, 38
338 ; GCN-NEXT: v_readlane_b32 s7, v0, 39
339 ; GCN-NEXT: ;;#ASMSTART
340 ; GCN-NEXT: ; use s[0:7]
341 ; GCN-NEXT: ;;#ASMEND
342 ; GCN-NEXT: v_readlane_b32 s0, v0, 40
343 ; GCN-NEXT: v_readlane_b32 s1, v0, 41
344 ; GCN-NEXT: v_readlane_b32 s2, v0, 42
345 ; GCN-NEXT: v_readlane_b32 s3, v0, 43
346 ; GCN-NEXT: v_readlane_b32 s4, v0, 44
347 ; GCN-NEXT: v_readlane_b32 s5, v0, 45
348 ; GCN-NEXT: v_readlane_b32 s6, v0, 46
349 ; GCN-NEXT: v_readlane_b32 s7, v0, 47
350 ; GCN-NEXT: ;;#ASMSTART
351 ; GCN-NEXT: ; use s[0:7]
352 ; GCN-NEXT: ;;#ASMEND
353 ; GCN-NEXT: v_readlane_b32 s0, v2, 0
354 ; GCN-NEXT: v_readlane_b32 s1, v2, 1
355 ; GCN-NEXT: v_readlane_b32 s2, v2, 2
356 ; GCN-NEXT: v_readlane_b32 s3, v2, 3
357 ; GCN-NEXT: v_readlane_b32 s4, v2, 4
358 ; GCN-NEXT: v_readlane_b32 s5, v2, 5
359 ; GCN-NEXT: v_readlane_b32 s6, v2, 6
360 ; GCN-NEXT: v_readlane_b32 s7, v2, 7
361 ; GCN-NEXT: ;;#ASMSTART
362 ; GCN-NEXT: ; use s[84:91]
363 ; GCN-NEXT: ;;#ASMEND
364 ; GCN-NEXT: ;;#ASMSTART
365 ; GCN-NEXT: ; use s[76:83]
366 ; GCN-NEXT: ;;#ASMEND
367 ; GCN-NEXT: ;;#ASMSTART
368 ; GCN-NEXT: ; use s[68:75]
369 ; GCN-NEXT: ;;#ASMEND
370 ; GCN-NEXT: ;;#ASMSTART
371 ; GCN-NEXT: ; use s[60:67]
372 ; GCN-NEXT: ;;#ASMEND
373 ; GCN-NEXT: ;;#ASMSTART
374 ; GCN-NEXT: ; use s[52:59]
375 ; GCN-NEXT: ;;#ASMEND
376 ; GCN-NEXT: ;;#ASMSTART
377 ; GCN-NEXT: ; use s[44:51]
378 ; GCN-NEXT: ;;#ASMEND
379 ; GCN-NEXT: ;;#ASMSTART
380 ; GCN-NEXT: ; use s[36:43]
381 ; GCN-NEXT: ;;#ASMEND
382 ; GCN-NEXT: ;;#ASMSTART
383 ; GCN-NEXT: ; use s[24:31]
384 ; GCN-NEXT: ;;#ASMEND
385 ; GCN-NEXT: ;;#ASMSTART
386 ; GCN-NEXT: ; use s[16:23]
387 ; GCN-NEXT: ;;#ASMEND
388 ; GCN-NEXT: ;;#ASMSTART
389 ; GCN-NEXT: ; use s[8:15]
390 ; GCN-NEXT: ;;#ASMEND
391 ; GCN-NEXT: ;;#ASMSTART
392 ; GCN-NEXT: ; use s[0:7]
393 ; GCN-NEXT: ;;#ASMEND
394 ; GCN-NEXT: BB0_2: ; %ret
396 %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
397 %wide.sgpr1 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
398 %wide.sgpr2 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
399 %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
400 %wide.sgpr4 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
401 %wide.sgpr5 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
402 %wide.sgpr6 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
403 %wide.sgpr7 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
404 %wide.sgpr8 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
405 %wide.sgpr9 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
406 %wide.sgpr10 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
407 %wide.sgpr11 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
408 %wide.sgpr12 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
409 %wide.sgpr13 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
410 %wide.sgpr14 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
411 %wide.sgpr15 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
412 %wide.sgpr16 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
413 %cmp = icmp eq i32 %in, 0
414 br i1 %cmp, label %bb0, label %ret
417 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr0) #0
418 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr1) #0
419 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr2) #0
420 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0
421 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr4) #0
422 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr5) #0
423 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr6) #0
424 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr7) #0
425 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr8) #0
426 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr9) #0
427 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr10) #0
428 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr11) #0
429 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr12) #0
430 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr13) #0
431 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr14) #0
432 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr15) #0
433 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr16) #0
440 ; Some of the lanes of an SGPR spill are in one VGPR and some forced
441 ; into the next available VGPR.
442 define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32 %in) #1 {
443 ; GCN-LABEL: split_sgpr_spill_2_vgprs:
445 ; GCN-NEXT: s_load_dword s0, s[0:1], 0xb
446 ; GCN-NEXT: ;;#ASMSTART
447 ; GCN-NEXT: ; def s[4:19]
448 ; GCN-NEXT: ;;#ASMEND
449 ; GCN-NEXT: v_writelane_b32 v0, s4, 0
450 ; GCN-NEXT: v_writelane_b32 v0, s5, 1
451 ; GCN-NEXT: v_writelane_b32 v0, s6, 2
452 ; GCN-NEXT: v_writelane_b32 v0, s7, 3
453 ; GCN-NEXT: v_writelane_b32 v0, s8, 4
454 ; GCN-NEXT: v_writelane_b32 v0, s9, 5
455 ; GCN-NEXT: v_writelane_b32 v0, s10, 6
456 ; GCN-NEXT: v_writelane_b32 v0, s11, 7
457 ; GCN-NEXT: v_writelane_b32 v0, s12, 8
458 ; GCN-NEXT: v_writelane_b32 v0, s13, 9
459 ; GCN-NEXT: v_writelane_b32 v0, s14, 10
460 ; GCN-NEXT: v_writelane_b32 v0, s15, 11
461 ; GCN-NEXT: v_writelane_b32 v0, s16, 12
462 ; GCN-NEXT: v_writelane_b32 v0, s17, 13
463 ; GCN-NEXT: v_writelane_b32 v0, s18, 14
464 ; GCN-NEXT: v_writelane_b32 v0, s19, 15
465 ; GCN-NEXT: ;;#ASMSTART
466 ; GCN-NEXT: ; def s[4:19]
467 ; GCN-NEXT: ;;#ASMEND
468 ; GCN-NEXT: v_writelane_b32 v0, s4, 16
469 ; GCN-NEXT: v_writelane_b32 v0, s5, 17
470 ; GCN-NEXT: v_writelane_b32 v0, s6, 18
471 ; GCN-NEXT: v_writelane_b32 v0, s7, 19
472 ; GCN-NEXT: v_writelane_b32 v0, s8, 20
473 ; GCN-NEXT: v_writelane_b32 v0, s9, 21
474 ; GCN-NEXT: v_writelane_b32 v0, s10, 22
475 ; GCN-NEXT: v_writelane_b32 v0, s11, 23
476 ; GCN-NEXT: v_writelane_b32 v0, s12, 24
477 ; GCN-NEXT: v_writelane_b32 v0, s13, 25
478 ; GCN-NEXT: v_writelane_b32 v0, s14, 26
479 ; GCN-NEXT: v_writelane_b32 v0, s15, 27
480 ; GCN-NEXT: v_writelane_b32 v0, s16, 28
481 ; GCN-NEXT: v_writelane_b32 v0, s17, 29
482 ; GCN-NEXT: v_writelane_b32 v0, s18, 30
483 ; GCN-NEXT: v_writelane_b32 v0, s19, 31
484 ; GCN-NEXT: ;;#ASMSTART
485 ; GCN-NEXT: ; def s[4:19]
486 ; GCN-NEXT: ;;#ASMEND
487 ; GCN-NEXT: v_writelane_b32 v0, s4, 32
488 ; GCN-NEXT: v_writelane_b32 v0, s5, 33
489 ; GCN-NEXT: v_writelane_b32 v0, s6, 34
490 ; GCN-NEXT: v_writelane_b32 v0, s7, 35
491 ; GCN-NEXT: v_writelane_b32 v0, s8, 36
492 ; GCN-NEXT: v_writelane_b32 v0, s9, 37
493 ; GCN-NEXT: v_writelane_b32 v0, s10, 38
494 ; GCN-NEXT: v_writelane_b32 v0, s11, 39
495 ; GCN-NEXT: v_writelane_b32 v0, s12, 40
496 ; GCN-NEXT: v_writelane_b32 v0, s13, 41
497 ; GCN-NEXT: v_writelane_b32 v0, s14, 42
498 ; GCN-NEXT: v_writelane_b32 v0, s15, 43
499 ; GCN-NEXT: v_writelane_b32 v0, s16, 44
500 ; GCN-NEXT: v_writelane_b32 v0, s17, 45
501 ; GCN-NEXT: v_writelane_b32 v0, s18, 46
502 ; GCN-NEXT: v_writelane_b32 v0, s19, 47
503 ; GCN-NEXT: ;;#ASMSTART
504 ; GCN-NEXT: ; def s[4:19]
505 ; GCN-NEXT: ;;#ASMEND
506 ; GCN-NEXT: v_writelane_b32 v0, s4, 48
507 ; GCN-NEXT: v_writelane_b32 v0, s5, 49
508 ; GCN-NEXT: v_writelane_b32 v0, s6, 50
509 ; GCN-NEXT: v_writelane_b32 v0, s7, 51
510 ; GCN-NEXT: v_writelane_b32 v0, s8, 52
511 ; GCN-NEXT: v_writelane_b32 v0, s9, 53
512 ; GCN-NEXT: v_writelane_b32 v0, s10, 54
513 ; GCN-NEXT: v_writelane_b32 v0, s11, 55
514 ; GCN-NEXT: v_writelane_b32 v0, s12, 56
515 ; GCN-NEXT: v_writelane_b32 v0, s13, 57
516 ; GCN-NEXT: v_writelane_b32 v0, s14, 58
517 ; GCN-NEXT: v_writelane_b32 v0, s15, 59
518 ; GCN-NEXT: v_writelane_b32 v0, s16, 60
519 ; GCN-NEXT: v_writelane_b32 v0, s17, 61
520 ; GCN-NEXT: v_writelane_b32 v0, s18, 62
521 ; GCN-NEXT: v_writelane_b32 v0, s19, 63
522 ; GCN-NEXT: ;;#ASMSTART
523 ; GCN-NEXT: ; def s[4:11]
524 ; GCN-NEXT: ;;#ASMEND
525 ; GCN-NEXT: v_writelane_b32 v1, s4, 0
526 ; GCN-NEXT: v_writelane_b32 v1, s5, 1
527 ; GCN-NEXT: v_writelane_b32 v1, s6, 2
528 ; GCN-NEXT: v_writelane_b32 v1, s7, 3
529 ; GCN-NEXT: v_writelane_b32 v1, s8, 4
530 ; GCN-NEXT: v_writelane_b32 v1, s9, 5
531 ; GCN-NEXT: v_writelane_b32 v1, s10, 6
532 ; GCN-NEXT: v_writelane_b32 v1, s11, 7
533 ; GCN-NEXT: ;;#ASMSTART
534 ; GCN-NEXT: ; def s[2:3]
535 ; GCN-NEXT: ;;#ASMEND
536 ; GCN-NEXT: v_writelane_b32 v1, s2, 8
537 ; GCN-NEXT: v_writelane_b32 v1, s3, 9
538 ; GCN-NEXT: s_mov_b32 s1, 0
539 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
540 ; GCN-NEXT: s_cmp_lg_u32 s0, s1
541 ; GCN-NEXT: s_cbranch_scc1 BB1_2
542 ; GCN-NEXT: ; %bb.1: ; %bb0
543 ; GCN-NEXT: v_readlane_b32 s16, v1, 8
544 ; GCN-NEXT: v_readlane_b32 s17, v1, 9
545 ; GCN-NEXT: v_readlane_b32 s20, v1, 0
546 ; GCN-NEXT: v_readlane_b32 s21, v1, 1
547 ; GCN-NEXT: v_readlane_b32 s22, v1, 2
548 ; GCN-NEXT: v_readlane_b32 s23, v1, 3
549 ; GCN-NEXT: v_readlane_b32 s24, v1, 4
550 ; GCN-NEXT: v_readlane_b32 s25, v1, 5
551 ; GCN-NEXT: v_readlane_b32 s26, v1, 6
552 ; GCN-NEXT: v_readlane_b32 s27, v1, 7
553 ; GCN-NEXT: v_readlane_b32 s36, v0, 32
554 ; GCN-NEXT: v_readlane_b32 s37, v0, 33
555 ; GCN-NEXT: v_readlane_b32 s38, v0, 34
556 ; GCN-NEXT: v_readlane_b32 s39, v0, 35
557 ; GCN-NEXT: v_readlane_b32 s40, v0, 36
558 ; GCN-NEXT: v_readlane_b32 s41, v0, 37
559 ; GCN-NEXT: v_readlane_b32 s42, v0, 38
560 ; GCN-NEXT: v_readlane_b32 s43, v0, 39
561 ; GCN-NEXT: v_readlane_b32 s44, v0, 40
562 ; GCN-NEXT: v_readlane_b32 s45, v0, 41
563 ; GCN-NEXT: v_readlane_b32 s46, v0, 42
564 ; GCN-NEXT: v_readlane_b32 s47, v0, 43
565 ; GCN-NEXT: v_readlane_b32 s48, v0, 44
566 ; GCN-NEXT: v_readlane_b32 s49, v0, 45
567 ; GCN-NEXT: v_readlane_b32 s50, v0, 46
568 ; GCN-NEXT: v_readlane_b32 s51, v0, 47
569 ; GCN-NEXT: v_readlane_b32 s0, v0, 0
570 ; GCN-NEXT: v_readlane_b32 s1, v0, 1
571 ; GCN-NEXT: v_readlane_b32 s2, v0, 2
572 ; GCN-NEXT: v_readlane_b32 s3, v0, 3
573 ; GCN-NEXT: v_readlane_b32 s4, v0, 4
574 ; GCN-NEXT: v_readlane_b32 s5, v0, 5
575 ; GCN-NEXT: v_readlane_b32 s6, v0, 6
576 ; GCN-NEXT: v_readlane_b32 s7, v0, 7
577 ; GCN-NEXT: v_readlane_b32 s8, v0, 8
578 ; GCN-NEXT: v_readlane_b32 s9, v0, 9
579 ; GCN-NEXT: v_readlane_b32 s10, v0, 10
580 ; GCN-NEXT: v_readlane_b32 s11, v0, 11
581 ; GCN-NEXT: v_readlane_b32 s12, v0, 12
582 ; GCN-NEXT: v_readlane_b32 s13, v0, 13
583 ; GCN-NEXT: v_readlane_b32 s14, v0, 14
584 ; GCN-NEXT: v_readlane_b32 s15, v0, 15
585 ; GCN-NEXT: ;;#ASMSTART
586 ; GCN-NEXT: ; use s[0:15]
587 ; GCN-NEXT: ;;#ASMEND
588 ; GCN-NEXT: v_readlane_b32 s0, v0, 16
589 ; GCN-NEXT: v_readlane_b32 s1, v0, 17
590 ; GCN-NEXT: v_readlane_b32 s2, v0, 18
591 ; GCN-NEXT: v_readlane_b32 s3, v0, 19
592 ; GCN-NEXT: v_readlane_b32 s4, v0, 20
593 ; GCN-NEXT: v_readlane_b32 s5, v0, 21
594 ; GCN-NEXT: v_readlane_b32 s6, v0, 22
595 ; GCN-NEXT: v_readlane_b32 s7, v0, 23
596 ; GCN-NEXT: v_readlane_b32 s8, v0, 24
597 ; GCN-NEXT: v_readlane_b32 s9, v0, 25
598 ; GCN-NEXT: v_readlane_b32 s10, v0, 26
599 ; GCN-NEXT: v_readlane_b32 s11, v0, 27
600 ; GCN-NEXT: v_readlane_b32 s12, v0, 28
601 ; GCN-NEXT: v_readlane_b32 s13, v0, 29
602 ; GCN-NEXT: v_readlane_b32 s14, v0, 30
603 ; GCN-NEXT: v_readlane_b32 s15, v0, 31
604 ; GCN-NEXT: ;;#ASMSTART
605 ; GCN-NEXT: ; use s[0:15]
606 ; GCN-NEXT: ;;#ASMEND
607 ; GCN-NEXT: v_readlane_b32 s0, v0, 48
608 ; GCN-NEXT: v_readlane_b32 s1, v0, 49
609 ; GCN-NEXT: v_readlane_b32 s2, v0, 50
610 ; GCN-NEXT: v_readlane_b32 s3, v0, 51
611 ; GCN-NEXT: v_readlane_b32 s4, v0, 52
612 ; GCN-NEXT: v_readlane_b32 s5, v0, 53
613 ; GCN-NEXT: v_readlane_b32 s6, v0, 54
614 ; GCN-NEXT: v_readlane_b32 s7, v0, 55
615 ; GCN-NEXT: v_readlane_b32 s8, v0, 56
616 ; GCN-NEXT: v_readlane_b32 s9, v0, 57
617 ; GCN-NEXT: v_readlane_b32 s10, v0, 58
618 ; GCN-NEXT: v_readlane_b32 s11, v0, 59
619 ; GCN-NEXT: v_readlane_b32 s12, v0, 60
620 ; GCN-NEXT: v_readlane_b32 s13, v0, 61
621 ; GCN-NEXT: v_readlane_b32 s14, v0, 62
622 ; GCN-NEXT: v_readlane_b32 s15, v0, 63
623 ; GCN-NEXT: ;;#ASMSTART
624 ; GCN-NEXT: ; use s[36:51]
625 ; GCN-NEXT: ;;#ASMEND
626 ; GCN-NEXT: ;;#ASMSTART
627 ; GCN-NEXT: ; use s[20:27]
628 ; GCN-NEXT: ;;#ASMEND
629 ; GCN-NEXT: ;;#ASMSTART
630 ; GCN-NEXT: ; use s[16:17]
631 ; GCN-NEXT: ;;#ASMEND
632 ; GCN-NEXT: ;;#ASMSTART
633 ; GCN-NEXT: ; use s[0:15]
634 ; GCN-NEXT: ;;#ASMEND
635 ; GCN-NEXT: BB1_2: ; %ret
637 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
638 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
639 %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
640 %wide.sgpr5 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
641 %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
642 %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
644 %cmp = icmp eq i32 %in, 0
645 br i1 %cmp, label %bb0, label %ret
648 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
649 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
650 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
651 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0
652 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
653 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr5) #0
660 ; The first 64 SGPR spills can go to a VGPR, but there isn't a second
661 ; so some spills must be to memory. The last 16 element spill runs out
662 ; of lanes at the 15th element.
663 define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 {
664 ; GCN-LABEL: no_vgprs_last_sgpr_spill:
666 ; GCN-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
667 ; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
668 ; GCN-NEXT: s_mov_b32 s54, -1
669 ; GCN-NEXT: s_mov_b32 s55, 0xe8f000
670 ; GCN-NEXT: s_add_u32 s52, s52, s3
671 ; GCN-NEXT: s_addc_u32 s53, s53, 0
672 ; GCN-NEXT: s_load_dword s0, s[0:1], 0xb
673 ; GCN-NEXT: ;;#ASMSTART
674 ; GCN-NEXT: ;;#ASMEND
675 ; GCN-NEXT: ;;#ASMSTART
676 ; GCN-NEXT: ;;#ASMEND
677 ; GCN-NEXT: ;;#ASMSTART
678 ; GCN-NEXT: ;;#ASMEND
679 ; GCN-NEXT: ;;#ASMSTART
680 ; GCN-NEXT: ;;#ASMEND
681 ; GCN-NEXT: ;;#ASMSTART
682 ; GCN-NEXT: ;;#ASMEND
683 ; GCN-NEXT: ;;#ASMSTART
684 ; GCN-NEXT: ;;#ASMEND
685 ; GCN-NEXT: ;;#ASMSTART
686 ; GCN-NEXT: ; def s[4:19]
687 ; GCN-NEXT: ;;#ASMEND
688 ; GCN-NEXT: v_writelane_b32 v31, s4, 0
689 ; GCN-NEXT: v_writelane_b32 v31, s5, 1
690 ; GCN-NEXT: v_writelane_b32 v31, s6, 2
691 ; GCN-NEXT: v_writelane_b32 v31, s7, 3
692 ; GCN-NEXT: v_writelane_b32 v31, s8, 4
693 ; GCN-NEXT: v_writelane_b32 v31, s9, 5
694 ; GCN-NEXT: v_writelane_b32 v31, s10, 6
695 ; GCN-NEXT: v_writelane_b32 v31, s11, 7
696 ; GCN-NEXT: v_writelane_b32 v31, s12, 8
697 ; GCN-NEXT: v_writelane_b32 v31, s13, 9
698 ; GCN-NEXT: v_writelane_b32 v31, s14, 10
699 ; GCN-NEXT: v_writelane_b32 v31, s15, 11
700 ; GCN-NEXT: v_writelane_b32 v31, s16, 12
701 ; GCN-NEXT: v_writelane_b32 v31, s17, 13
702 ; GCN-NEXT: v_writelane_b32 v31, s18, 14
703 ; GCN-NEXT: v_writelane_b32 v31, s19, 15
704 ; GCN-NEXT: ;;#ASMSTART
705 ; GCN-NEXT: ; def s[4:19]
706 ; GCN-NEXT: ;;#ASMEND
707 ; GCN-NEXT: v_writelane_b32 v31, s4, 16
708 ; GCN-NEXT: v_writelane_b32 v31, s5, 17
709 ; GCN-NEXT: v_writelane_b32 v31, s6, 18
710 ; GCN-NEXT: v_writelane_b32 v31, s7, 19
711 ; GCN-NEXT: v_writelane_b32 v31, s8, 20
712 ; GCN-NEXT: v_writelane_b32 v31, s9, 21
713 ; GCN-NEXT: v_writelane_b32 v31, s10, 22
714 ; GCN-NEXT: v_writelane_b32 v31, s11, 23
715 ; GCN-NEXT: v_writelane_b32 v31, s12, 24
716 ; GCN-NEXT: v_writelane_b32 v31, s13, 25
717 ; GCN-NEXT: v_writelane_b32 v31, s14, 26
718 ; GCN-NEXT: v_writelane_b32 v31, s15, 27
719 ; GCN-NEXT: v_writelane_b32 v31, s16, 28
720 ; GCN-NEXT: v_writelane_b32 v31, s17, 29
721 ; GCN-NEXT: v_writelane_b32 v31, s18, 30
722 ; GCN-NEXT: v_writelane_b32 v31, s19, 31
723 ; GCN-NEXT: ;;#ASMSTART
724 ; GCN-NEXT: ; def s[4:19]
725 ; GCN-NEXT: ;;#ASMEND
726 ; GCN-NEXT: v_writelane_b32 v31, s4, 32
727 ; GCN-NEXT: v_writelane_b32 v31, s5, 33
728 ; GCN-NEXT: v_writelane_b32 v31, s6, 34
729 ; GCN-NEXT: v_writelane_b32 v31, s7, 35
730 ; GCN-NEXT: v_writelane_b32 v31, s8, 36
731 ; GCN-NEXT: v_writelane_b32 v31, s9, 37
732 ; GCN-NEXT: v_writelane_b32 v31, s10, 38
733 ; GCN-NEXT: v_writelane_b32 v31, s11, 39
734 ; GCN-NEXT: v_writelane_b32 v31, s12, 40
735 ; GCN-NEXT: v_writelane_b32 v31, s13, 41
736 ; GCN-NEXT: v_writelane_b32 v31, s14, 42
737 ; GCN-NEXT: v_writelane_b32 v31, s15, 43
738 ; GCN-NEXT: v_writelane_b32 v31, s16, 44
739 ; GCN-NEXT: v_writelane_b32 v31, s17, 45
740 ; GCN-NEXT: v_writelane_b32 v31, s18, 46
741 ; GCN-NEXT: v_writelane_b32 v31, s19, 47
742 ; GCN-NEXT: ;;#ASMSTART
743 ; GCN-NEXT: ; def s[4:19]
744 ; GCN-NEXT: ;;#ASMEND
745 ; GCN-NEXT: v_writelane_b32 v31, s4, 48
746 ; GCN-NEXT: v_writelane_b32 v31, s5, 49
747 ; GCN-NEXT: v_writelane_b32 v31, s6, 50
748 ; GCN-NEXT: v_writelane_b32 v31, s7, 51
749 ; GCN-NEXT: v_writelane_b32 v31, s8, 52
750 ; GCN-NEXT: v_writelane_b32 v31, s9, 53
751 ; GCN-NEXT: v_writelane_b32 v31, s10, 54
752 ; GCN-NEXT: v_writelane_b32 v31, s11, 55
753 ; GCN-NEXT: v_writelane_b32 v31, s12, 56
754 ; GCN-NEXT: v_writelane_b32 v31, s13, 57
755 ; GCN-NEXT: v_writelane_b32 v31, s14, 58
756 ; GCN-NEXT: v_writelane_b32 v31, s15, 59
757 ; GCN-NEXT: v_writelane_b32 v31, s16, 60
758 ; GCN-NEXT: v_writelane_b32 v31, s17, 61
759 ; GCN-NEXT: v_writelane_b32 v31, s18, 62
760 ; GCN-NEXT: v_writelane_b32 v31, s19, 63
761 ; GCN-NEXT: ;;#ASMSTART
762 ; GCN-NEXT: ; def s[2:3]
763 ; GCN-NEXT: ;;#ASMEND
764 ; GCN-NEXT: s_mov_b64 s[4:5], exec
765 ; GCN-NEXT: s_mov_b64 exec, 3
766 ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0
767 ; GCN-NEXT: v_writelane_b32 v0, s2, 0
768 ; GCN-NEXT: v_writelane_b32 v0, s3, 1
769 ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
770 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0
771 ; GCN-NEXT: s_waitcnt vmcnt(0)
772 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
773 ; GCN-NEXT: s_mov_b32 s1, 0
774 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
775 ; GCN-NEXT: s_cmp_lg_u32 s0, s1
776 ; GCN-NEXT: s_cbranch_scc1 BB2_2
777 ; GCN-NEXT: ; %bb.1: ; %bb0
778 ; GCN-NEXT: v_readlane_b32 s36, v31, 32
779 ; GCN-NEXT: v_readlane_b32 s37, v31, 33
780 ; GCN-NEXT: v_readlane_b32 s38, v31, 34
781 ; GCN-NEXT: v_readlane_b32 s39, v31, 35
782 ; GCN-NEXT: v_readlane_b32 s40, v31, 36
783 ; GCN-NEXT: v_readlane_b32 s41, v31, 37
784 ; GCN-NEXT: v_readlane_b32 s42, v31, 38
785 ; GCN-NEXT: v_readlane_b32 s43, v31, 39
786 ; GCN-NEXT: v_readlane_b32 s44, v31, 40
787 ; GCN-NEXT: v_readlane_b32 s45, v31, 41
788 ; GCN-NEXT: v_readlane_b32 s46, v31, 42
789 ; GCN-NEXT: v_readlane_b32 s47, v31, 43
790 ; GCN-NEXT: v_readlane_b32 s48, v31, 44
791 ; GCN-NEXT: v_readlane_b32 s49, v31, 45
792 ; GCN-NEXT: v_readlane_b32 s50, v31, 46
793 ; GCN-NEXT: v_readlane_b32 s51, v31, 47
794 ; GCN-NEXT: v_readlane_b32 s0, v31, 16
795 ; GCN-NEXT: v_readlane_b32 s1, v31, 17
796 ; GCN-NEXT: v_readlane_b32 s2, v31, 18
797 ; GCN-NEXT: v_readlane_b32 s3, v31, 19
798 ; GCN-NEXT: v_readlane_b32 s4, v31, 20
799 ; GCN-NEXT: v_readlane_b32 s5, v31, 21
800 ; GCN-NEXT: v_readlane_b32 s6, v31, 22
801 ; GCN-NEXT: v_readlane_b32 s7, v31, 23
802 ; GCN-NEXT: v_readlane_b32 s8, v31, 24
803 ; GCN-NEXT: v_readlane_b32 s9, v31, 25
804 ; GCN-NEXT: v_readlane_b32 s10, v31, 26
805 ; GCN-NEXT: v_readlane_b32 s11, v31, 27
806 ; GCN-NEXT: v_readlane_b32 s12, v31, 28
807 ; GCN-NEXT: v_readlane_b32 s13, v31, 29
808 ; GCN-NEXT: v_readlane_b32 s14, v31, 30
809 ; GCN-NEXT: v_readlane_b32 s15, v31, 31
810 ; GCN-NEXT: v_readlane_b32 s16, v31, 0
811 ; GCN-NEXT: v_readlane_b32 s17, v31, 1
812 ; GCN-NEXT: v_readlane_b32 s18, v31, 2
813 ; GCN-NEXT: v_readlane_b32 s19, v31, 3
814 ; GCN-NEXT: v_readlane_b32 s20, v31, 4
815 ; GCN-NEXT: v_readlane_b32 s21, v31, 5
816 ; GCN-NEXT: v_readlane_b32 s22, v31, 6
817 ; GCN-NEXT: v_readlane_b32 s23, v31, 7
818 ; GCN-NEXT: v_readlane_b32 s24, v31, 8
819 ; GCN-NEXT: v_readlane_b32 s25, v31, 9
820 ; GCN-NEXT: v_readlane_b32 s26, v31, 10
821 ; GCN-NEXT: v_readlane_b32 s27, v31, 11
822 ; GCN-NEXT: v_readlane_b32 s28, v31, 12
823 ; GCN-NEXT: v_readlane_b32 s29, v31, 13
824 ; GCN-NEXT: v_readlane_b32 s30, v31, 14
825 ; GCN-NEXT: v_readlane_b32 s31, v31, 15
826 ; GCN-NEXT: ;;#ASMSTART
827 ; GCN-NEXT: ; use s[16:31]
828 ; GCN-NEXT: ;;#ASMEND
829 ; GCN-NEXT: ;;#ASMSTART
830 ; GCN-NEXT: ; use s[0:15]
831 ; GCN-NEXT: ;;#ASMEND
832 ; GCN-NEXT: v_readlane_b32 s4, v31, 48
833 ; GCN-NEXT: v_readlane_b32 s5, v31, 49
834 ; GCN-NEXT: v_readlane_b32 s6, v31, 50
835 ; GCN-NEXT: v_readlane_b32 s7, v31, 51
836 ; GCN-NEXT: v_readlane_b32 s8, v31, 52
837 ; GCN-NEXT: v_readlane_b32 s9, v31, 53
838 ; GCN-NEXT: v_readlane_b32 s10, v31, 54
839 ; GCN-NEXT: v_readlane_b32 s11, v31, 55
840 ; GCN-NEXT: v_readlane_b32 s12, v31, 56
841 ; GCN-NEXT: v_readlane_b32 s13, v31, 57
842 ; GCN-NEXT: v_readlane_b32 s14, v31, 58
843 ; GCN-NEXT: v_readlane_b32 s15, v31, 59
844 ; GCN-NEXT: v_readlane_b32 s16, v31, 60
845 ; GCN-NEXT: v_readlane_b32 s17, v31, 61
846 ; GCN-NEXT: v_readlane_b32 s18, v31, 62
847 ; GCN-NEXT: v_readlane_b32 s19, v31, 63
848 ; GCN-NEXT: s_mov_b64 s[2:3], exec
849 ; GCN-NEXT: s_mov_b64 exec, 3
850 ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0
851 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
852 ; GCN-NEXT: s_waitcnt vmcnt(0)
853 ; GCN-NEXT: v_readlane_b32 s0, v0, 0
854 ; GCN-NEXT: v_readlane_b32 s1, v0, 1
855 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0
856 ; GCN-NEXT: s_waitcnt vmcnt(0)
857 ; GCN-NEXT: s_mov_b64 exec, s[2:3]
858 ; GCN-NEXT: ;;#ASMSTART
859 ; GCN-NEXT: ; use s[36:51]
860 ; GCN-NEXT: ;;#ASMEND
861 ; GCN-NEXT: ;;#ASMSTART
862 ; GCN-NEXT: ; use s[4:19]
863 ; GCN-NEXT: ;;#ASMEND
864 ; GCN-NEXT: ;;#ASMSTART
865 ; GCN-NEXT: ; use s[0:1]
866 ; GCN-NEXT: ;;#ASMEND
867 ; GCN-NEXT: BB2_2: ; %ret
869 call void asm sideeffect "", "~{v[0:7]}" () #0
870 call void asm sideeffect "", "~{v[8:15]}" () #0
871 call void asm sideeffect "", "~{v[16:23]}" () #0
872 call void asm sideeffect "", "~{v[24:27]}"() #0
873 call void asm sideeffect "", "~{v[28:29]}"() #0
874 call void asm sideeffect "", "~{v30}"() #0
876 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
877 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
878 %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
879 %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
880 %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
881 %cmp = icmp eq i32 %in, 0
882 br i1 %cmp, label %bb0, label %ret
885 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
886 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
887 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
888 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) #0
889 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
896 ; Same as @no_vgprs_last_sgpr_spill, some SGPR spills must go to memory.
897 ; Additionally, v0 is live throughout the function.
898 define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
899 ; GCN-LABEL: no_vgprs_last_sgpr_spill_live_v0:
901 ; GCN-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
902 ; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
903 ; GCN-NEXT: s_mov_b32 s54, -1
904 ; GCN-NEXT: s_mov_b32 s55, 0xe8f000
905 ; GCN-NEXT: s_add_u32 s52, s52, s3
906 ; GCN-NEXT: s_addc_u32 s53, s53, 0
907 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x9
908 ; GCN-NEXT: ;;#ASMSTART
909 ; GCN-NEXT: ;;#ASMEND
910 ; GCN-NEXT: ;;#ASMSTART
911 ; GCN-NEXT: ;;#ASMEND
912 ; GCN-NEXT: ;;#ASMSTART
913 ; GCN-NEXT: ;;#ASMEND
914 ; GCN-NEXT: ;;#ASMSTART
915 ; GCN-NEXT: ;;#ASMEND
916 ; GCN-NEXT: ;;#ASMSTART
917 ; GCN-NEXT: ;;#ASMEND
918 ; GCN-NEXT: ;;#ASMSTART
919 ; GCN-NEXT: ;;#ASMEND
920 ; GCN-NEXT: ;;#ASMSTART
921 ; GCN-NEXT: ; def s[4:19]
922 ; GCN-NEXT: ;;#ASMEND
923 ; GCN-NEXT: v_writelane_b32 v31, s4, 0
924 ; GCN-NEXT: v_writelane_b32 v31, s5, 1
925 ; GCN-NEXT: v_writelane_b32 v31, s6, 2
926 ; GCN-NEXT: v_writelane_b32 v31, s7, 3
927 ; GCN-NEXT: v_writelane_b32 v31, s8, 4
928 ; GCN-NEXT: v_writelane_b32 v31, s9, 5
929 ; GCN-NEXT: v_writelane_b32 v31, s10, 6
930 ; GCN-NEXT: v_writelane_b32 v31, s11, 7
931 ; GCN-NEXT: v_writelane_b32 v31, s12, 8
932 ; GCN-NEXT: v_writelane_b32 v31, s13, 9
933 ; GCN-NEXT: v_writelane_b32 v31, s14, 10
934 ; GCN-NEXT: v_writelane_b32 v31, s15, 11
935 ; GCN-NEXT: v_writelane_b32 v31, s16, 12
936 ; GCN-NEXT: v_writelane_b32 v31, s17, 13
937 ; GCN-NEXT: v_writelane_b32 v31, s18, 14
938 ; GCN-NEXT: v_writelane_b32 v31, s19, 15
939 ; GCN-NEXT: ;;#ASMSTART
940 ; GCN-NEXT: ; def s[4:19]
941 ; GCN-NEXT: ;;#ASMEND
942 ; GCN-NEXT: v_writelane_b32 v31, s4, 16
943 ; GCN-NEXT: v_writelane_b32 v31, s5, 17
944 ; GCN-NEXT: v_writelane_b32 v31, s6, 18
945 ; GCN-NEXT: v_writelane_b32 v31, s7, 19
946 ; GCN-NEXT: v_writelane_b32 v31, s8, 20
947 ; GCN-NEXT: v_writelane_b32 v31, s9, 21
948 ; GCN-NEXT: v_writelane_b32 v31, s10, 22
949 ; GCN-NEXT: v_writelane_b32 v31, s11, 23
950 ; GCN-NEXT: v_writelane_b32 v31, s12, 24
951 ; GCN-NEXT: v_writelane_b32 v31, s13, 25
952 ; GCN-NEXT: v_writelane_b32 v31, s14, 26
953 ; GCN-NEXT: v_writelane_b32 v31, s15, 27
954 ; GCN-NEXT: v_writelane_b32 v31, s16, 28
955 ; GCN-NEXT: v_writelane_b32 v31, s17, 29
956 ; GCN-NEXT: v_writelane_b32 v31, s18, 30
957 ; GCN-NEXT: v_writelane_b32 v31, s19, 31
958 ; GCN-NEXT: ;;#ASMSTART
959 ; GCN-NEXT: ; def s[4:19]
960 ; GCN-NEXT: ;;#ASMEND
961 ; GCN-NEXT: v_writelane_b32 v31, s4, 32
962 ; GCN-NEXT: v_writelane_b32 v31, s5, 33
963 ; GCN-NEXT: v_writelane_b32 v31, s6, 34
964 ; GCN-NEXT: v_writelane_b32 v31, s7, 35
965 ; GCN-NEXT: v_writelane_b32 v31, s8, 36
966 ; GCN-NEXT: v_writelane_b32 v31, s9, 37
967 ; GCN-NEXT: v_writelane_b32 v31, s10, 38
968 ; GCN-NEXT: v_writelane_b32 v31, s11, 39
969 ; GCN-NEXT: v_writelane_b32 v31, s12, 40
970 ; GCN-NEXT: v_writelane_b32 v31, s13, 41
971 ; GCN-NEXT: v_writelane_b32 v31, s14, 42
972 ; GCN-NEXT: v_writelane_b32 v31, s15, 43
973 ; GCN-NEXT: v_writelane_b32 v31, s16, 44
974 ; GCN-NEXT: v_writelane_b32 v31, s17, 45
975 ; GCN-NEXT: v_writelane_b32 v31, s18, 46
976 ; GCN-NEXT: v_writelane_b32 v31, s19, 47
977 ; GCN-NEXT: ;;#ASMSTART
978 ; GCN-NEXT: ; def s[4:19]
979 ; GCN-NEXT: ;;#ASMEND
980 ; GCN-NEXT: v_writelane_b32 v31, s4, 48
981 ; GCN-NEXT: v_writelane_b32 v31, s5, 49
982 ; GCN-NEXT: v_writelane_b32 v31, s6, 50
983 ; GCN-NEXT: v_writelane_b32 v31, s7, 51
984 ; GCN-NEXT: v_writelane_b32 v31, s8, 52
985 ; GCN-NEXT: v_writelane_b32 v31, s9, 53
986 ; GCN-NEXT: v_writelane_b32 v31, s10, 54
987 ; GCN-NEXT: v_writelane_b32 v31, s11, 55
988 ; GCN-NEXT: v_writelane_b32 v31, s12, 56
989 ; GCN-NEXT: v_writelane_b32 v31, s13, 57
990 ; GCN-NEXT: v_writelane_b32 v31, s14, 58
991 ; GCN-NEXT: v_writelane_b32 v31, s15, 59
992 ; GCN-NEXT: v_writelane_b32 v31, s16, 60
993 ; GCN-NEXT: v_writelane_b32 v31, s17, 61
994 ; GCN-NEXT: v_writelane_b32 v31, s18, 62
995 ; GCN-NEXT: v_writelane_b32 v31, s19, 63
996 ; GCN-NEXT: ;;#ASMSTART
997 ; GCN-NEXT: ; def s[2:3]
998 ; GCN-NEXT: ;;#ASMEND
999 ; GCN-NEXT: s_mov_b64 s[4:5], exec
1000 ; GCN-NEXT: s_mov_b64 exec, 3
1001 ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0
1002 ; GCN-NEXT: v_writelane_b32 v0, s2, 0
1003 ; GCN-NEXT: v_writelane_b32 v0, s3, 1
1004 ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
1005 ; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0
1006 ; GCN-NEXT: s_waitcnt vmcnt(0)
1007 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
1008 ; GCN-NEXT: s_mov_b32 s1, 0
1009 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
1010 ; GCN-NEXT: s_cmp_lg_u32 s0, s1
1011 ; GCN-NEXT: s_cbranch_scc1 BB3_2
1012 ; GCN-NEXT: ; %bb.1: ; %bb0
1013 ; GCN-NEXT: v_readlane_b32 s36, v31, 32
1014 ; GCN-NEXT: v_readlane_b32 s37, v31, 33
1015 ; GCN-NEXT: v_readlane_b32 s38, v31, 34
1016 ; GCN-NEXT: v_readlane_b32 s39, v31, 35
1017 ; GCN-NEXT: v_readlane_b32 s40, v31, 36
1018 ; GCN-NEXT: v_readlane_b32 s41, v31, 37
1019 ; GCN-NEXT: v_readlane_b32 s42, v31, 38
1020 ; GCN-NEXT: v_readlane_b32 s43, v31, 39
1021 ; GCN-NEXT: v_readlane_b32 s44, v31, 40
1022 ; GCN-NEXT: v_readlane_b32 s45, v31, 41
1023 ; GCN-NEXT: v_readlane_b32 s46, v31, 42
1024 ; GCN-NEXT: v_readlane_b32 s47, v31, 43
1025 ; GCN-NEXT: v_readlane_b32 s48, v31, 44
1026 ; GCN-NEXT: v_readlane_b32 s49, v31, 45
1027 ; GCN-NEXT: v_readlane_b32 s50, v31, 46
1028 ; GCN-NEXT: v_readlane_b32 s51, v31, 47
1029 ; GCN-NEXT: v_readlane_b32 s0, v31, 16
1030 ; GCN-NEXT: v_readlane_b32 s1, v31, 17
1031 ; GCN-NEXT: v_readlane_b32 s2, v31, 18
1032 ; GCN-NEXT: v_readlane_b32 s3, v31, 19
1033 ; GCN-NEXT: v_readlane_b32 s4, v31, 20
1034 ; GCN-NEXT: v_readlane_b32 s5, v31, 21
1035 ; GCN-NEXT: v_readlane_b32 s6, v31, 22
1036 ; GCN-NEXT: v_readlane_b32 s7, v31, 23
1037 ; GCN-NEXT: v_readlane_b32 s8, v31, 24
1038 ; GCN-NEXT: v_readlane_b32 s9, v31, 25
1039 ; GCN-NEXT: v_readlane_b32 s10, v31, 26
1040 ; GCN-NEXT: v_readlane_b32 s11, v31, 27
1041 ; GCN-NEXT: v_readlane_b32 s12, v31, 28
1042 ; GCN-NEXT: v_readlane_b32 s13, v31, 29
1043 ; GCN-NEXT: v_readlane_b32 s14, v31, 30
1044 ; GCN-NEXT: v_readlane_b32 s15, v31, 31
1045 ; GCN-NEXT: v_readlane_b32 s16, v31, 0
1046 ; GCN-NEXT: v_readlane_b32 s17, v31, 1
1047 ; GCN-NEXT: v_readlane_b32 s18, v31, 2
1048 ; GCN-NEXT: v_readlane_b32 s19, v31, 3
1049 ; GCN-NEXT: v_readlane_b32 s20, v31, 4
1050 ; GCN-NEXT: v_readlane_b32 s21, v31, 5
1051 ; GCN-NEXT: v_readlane_b32 s22, v31, 6
1052 ; GCN-NEXT: v_readlane_b32 s23, v31, 7
1053 ; GCN-NEXT: v_readlane_b32 s24, v31, 8
1054 ; GCN-NEXT: v_readlane_b32 s25, v31, 9
1055 ; GCN-NEXT: v_readlane_b32 s26, v31, 10
1056 ; GCN-NEXT: v_readlane_b32 s27, v31, 11
1057 ; GCN-NEXT: v_readlane_b32 s28, v31, 12
1058 ; GCN-NEXT: v_readlane_b32 s29, v31, 13
1059 ; GCN-NEXT: v_readlane_b32 s30, v31, 14
1060 ; GCN-NEXT: v_readlane_b32 s31, v31, 15
1061 ; GCN-NEXT: ;;#ASMSTART
1062 ; GCN-NEXT: ; def v0
1063 ; GCN-NEXT: ;;#ASMEND
1064 ; GCN-NEXT: ;;#ASMSTART
1065 ; GCN-NEXT: ; use s[16:31]
1066 ; GCN-NEXT: ;;#ASMEND
1067 ; GCN-NEXT: ;;#ASMSTART
1068 ; GCN-NEXT: ; use s[0:15]
1069 ; GCN-NEXT: ;;#ASMEND
1070 ; GCN-NEXT: v_readlane_b32 s4, v31, 48
1071 ; GCN-NEXT: v_readlane_b32 s5, v31, 49
1072 ; GCN-NEXT: v_readlane_b32 s6, v31, 50
1073 ; GCN-NEXT: v_readlane_b32 s7, v31, 51
1074 ; GCN-NEXT: v_readlane_b32 s8, v31, 52
1075 ; GCN-NEXT: v_readlane_b32 s9, v31, 53
1076 ; GCN-NEXT: v_readlane_b32 s10, v31, 54
1077 ; GCN-NEXT: v_readlane_b32 s11, v31, 55
1078 ; GCN-NEXT: v_readlane_b32 s12, v31, 56
1079 ; GCN-NEXT: v_readlane_b32 s13, v31, 57
1080 ; GCN-NEXT: v_readlane_b32 s14, v31, 58
1081 ; GCN-NEXT: v_readlane_b32 s15, v31, 59
1082 ; GCN-NEXT: v_readlane_b32 s16, v31, 60
1083 ; GCN-NEXT: v_readlane_b32 s17, v31, 61
1084 ; GCN-NEXT: v_readlane_b32 s18, v31, 62
1085 ; GCN-NEXT: v_readlane_b32 s19, v31, 63
1086 ; GCN-NEXT: s_mov_b64 s[2:3], exec
1087 ; GCN-NEXT: s_mov_b64 exec, 3
1088 ; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0
1089 ; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
1090 ; GCN-NEXT: s_waitcnt vmcnt(0)
1091 ; GCN-NEXT: v_readlane_b32 s0, v1, 0
1092 ; GCN-NEXT: v_readlane_b32 s1, v1, 1
1093 ; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0
1094 ; GCN-NEXT: s_waitcnt vmcnt(0)
1095 ; GCN-NEXT: s_mov_b64 exec, s[2:3]
1096 ; GCN-NEXT: ;;#ASMSTART
1097 ; GCN-NEXT: ; use s[36:51]
1098 ; GCN-NEXT: ;;#ASMEND
1099 ; GCN-NEXT: ;;#ASMSTART
1100 ; GCN-NEXT: ; use s[4:19]
1101 ; GCN-NEXT: ;;#ASMEND
1102 ; GCN-NEXT: ;;#ASMSTART
1103 ; GCN-NEXT: ; use s[0:1]
1104 ; GCN-NEXT: ;;#ASMEND
1105 ; GCN-NEXT: ;;#ASMSTART
1106 ; GCN-NEXT: ; use v0
1107 ; GCN-NEXT: ;;#ASMEND
1108 ; GCN-NEXT: BB3_2: ; %ret
1109 ; GCN-NEXT: s_endpgm
1110 call void asm sideeffect "", "~{v[0:7]}" () #0
1111 call void asm sideeffect "", "~{v[8:15]}" () #0
1112 call void asm sideeffect "", "~{v[16:23]}" () #0
1113 call void asm sideeffect "", "~{v[24:27]}"() #0
1114 call void asm sideeffect "", "~{v[28:29]}"() #0
1115 call void asm sideeffect "", "~{v30}"() #0
1117 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
1118 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
1119 %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
1120 %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
1121 %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
1122 %cmp = icmp eq i32 %in, 0
1123 br i1 %cmp, label %bb0, label %ret
1126 %vgpr0 = call i32 asm sideeffect "; def $0", "=v" () #0
1127 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
1128 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
1129 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
1130 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) #0
1131 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
1132 call void asm sideeffect "; use $0", "v"(i32 %vgpr0) #0
1139 attributes #0 = { nounwind }
1140 attributes #1 = { nounwind "amdgpu-waves-per-eu"="8,8" }