1 ; RUN: llc -O0 -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VGPR -check-prefix=GCN %s
3 ; FIXME: we should disable sdwa peephole because dead-code elimination, that
4 ; runs after peephole, ruins this test (different register numbers)
6 ; Spill all SGPRs so multiple VGPRs are required for spilling all of them.
8 ; Ideally we only need 2 VGPRs for all spilling. The VGPRs are
9 ; allocated per-frame index, so it's possible to get up with more.
11 ; GCN-LABEL: {{^}}spill_sgprs_to_multiple_vgprs:
25 ; GCN: v_writelane_b32 v0, s4, 0
26 ; GCN-NEXT: v_writelane_b32 v0, s5, 1
27 ; GCN-NEXT: v_writelane_b32 v0, s6, 2
28 ; GCN-NEXT: v_writelane_b32 v0, s7, 3
29 ; GCN-NEXT: v_writelane_b32 v0, s8, 4
30 ; GCN-NEXT: v_writelane_b32 v0, s9, 5
31 ; GCN-NEXT: v_writelane_b32 v0, s10, 6
32 ; GCN-NEXT: v_writelane_b32 v0, s11, 7
34 ; GCN: def s{{\[}}[[TMP_LO:[0-9]+]]:[[TMP_HI:[0-9]+]]{{\]}}
35 ; GCN: v_writelane_b32 v0, s[[TMP_LO]], 8
36 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 9
37 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 10
38 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 11
39 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 12
40 ; GCN-NEXT: v_writelane_b32 v0, s9, 13
41 ; GCN-NEXT: v_writelane_b32 v0, s10, 14
42 ; GCN-NEXT: v_writelane_b32 v0, s[[TMP_HI]], 15
44 ; GCN: def s{{\[}}[[TMP_LO]]:[[TMP_HI]]{{\]}}
45 ; GCN: v_writelane_b32 v0, s[[TMP_LO]], 16
46 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 17
47 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 18
48 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 19
49 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 20
50 ; GCN-NEXT: v_writelane_b32 v0, s9, 21
51 ; GCN-NEXT: v_writelane_b32 v0, s10, 22
52 ; GCN-NEXT: v_writelane_b32 v0, s[[TMP_HI]], 23
54 ; GCN: def s{{\[}}[[TMP_LO]]:[[TMP_HI]]{{\]}}
55 ; GCN: v_writelane_b32 v0, s[[TMP_LO]], 24
56 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 25
57 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 26
58 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 27
59 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 28
60 ; GCN-NEXT: v_writelane_b32 v0, s9, 29
61 ; GCN-NEXT: v_writelane_b32 v0, s10, 30
62 ; GCN-NEXT: v_writelane_b32 v0, s[[TMP_HI]], 31
64 ; GCN: def s{{\[}}[[TMP_LO]]:[[TMP_HI]]{{\]}}
65 ; GCN: v_writelane_b32 v0, s[[TMP_LO]], 32
66 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 33
67 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 34
68 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 35
69 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 36
70 ; GCN-NEXT: v_writelane_b32 v0, s9, 37
71 ; GCN-NEXT: v_writelane_b32 v0, s10, 38
72 ; GCN-NEXT: v_writelane_b32 v0, s[[TMP_HI]], 39
74 ; GCN: def s{{\[}}[[TMP_LO]]:[[TMP_HI]]{{\]}}
75 ; GCN: v_writelane_b32 v0, s[[TMP_LO]], 40
76 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 41
77 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 42
78 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 43
79 ; GCN-NEXT: v_writelane_b32 v0, s{{[0-9]+}}, 44
80 ; GCN-NEXT: v_writelane_b32 v0, s9, 45
81 ; GCN-NEXT: v_writelane_b32 v0, s10, 46
82 ; GCN-NEXT: v_writelane_b32 v0, s[[TMP_HI]], 47
84 ; GCN: def s{{\[}}[[TMP_LO]]:[[TMP_HI]]{{\]}}
85 ; GCN: v_writelane_b32 v0, s12, 48
86 ; GCN-NEXT: v_writelane_b32 v0, s13, 49
87 ; GCN-NEXT: v_writelane_b32 v0, s14, 50
88 ; GCN-NEXT: v_writelane_b32 v0, s15, 51
89 ; GCN-NEXT: v_writelane_b32 v0, s16, 52
90 ; GCN-NEXT: v_writelane_b32 v0, s17, 53
91 ; GCN-NEXT: v_writelane_b32 v0, s18, 54
92 ; GCN-NEXT: v_writelane_b32 v0, s19, 55
94 ; GCN-NEXT: v_writelane_b32 v0, s20, 56
95 ; GCN-NEXT: v_writelane_b32 v0, s21, 57
96 ; GCN-NEXT: v_writelane_b32 v0, s22, 58
97 ; GCN-NEXT: v_writelane_b32 v0, s23, 59
98 ; GCN-NEXT: v_writelane_b32 v0, s24, 60
99 ; GCN-NEXT: v_writelane_b32 v0, s25, 61
100 ; GCN-NEXT: v_writelane_b32 v0, s26, 62
101 ; GCN-NEXT: v_writelane_b32 v0, s27, 63
102 ; GCN-NEXT: v_writelane_b32 v1, s28, 0
103 ; GCN-NEXT: v_writelane_b32 v1, s29, 1
104 ; GCN-NEXT: v_writelane_b32 v1, s30, 2
105 ; GCN-NEXT: v_writelane_b32 v1, s31, 3
106 ; GCN-NEXT: v_writelane_b32 v1, s32, 4
107 ; GCN-NEXT: v_writelane_b32 v1, s33, 5
108 ; GCN-NEXT: v_writelane_b32 v1, s34, 6
109 ; GCN-NEXT: v_writelane_b32 v1, s35, 7
110 ; GCN-NEXT: v_writelane_b32 v1, s36, 8
111 ; GCN-NEXT: v_writelane_b32 v1, s37, 9
112 ; GCN-NEXT: v_writelane_b32 v1, s38, 10
113 ; GCN-NEXT: v_writelane_b32 v1, s39, 11
114 ; GCN-NEXT: v_writelane_b32 v1, s40, 12
115 ; GCN-NEXT: v_writelane_b32 v1, s41, 13
116 ; GCN-NEXT: v_writelane_b32 v1, s42, 14
117 ; GCN-NEXT: v_writelane_b32 v1, s43, 15
118 ; GCN-NEXT: v_writelane_b32 v1, s44, 16
119 ; GCN-NEXT: v_writelane_b32 v1, s45, 17
120 ; GCN-NEXT: v_writelane_b32 v1, s46, 18
121 ; GCN-NEXT: v_writelane_b32 v1, s47, 19
122 ; GCN-NEXT: v_writelane_b32 v1, s48, 20
123 ; GCN-NEXT: v_writelane_b32 v1, s49, 21
124 ; GCN-NEXT: v_writelane_b32 v1, s50, 22
125 ; GCN-NEXT: v_writelane_b32 v1, s51, 23
126 ; GCN-NEXT: v_writelane_b32 v1, s52, 24
127 ; GCN-NEXT: v_writelane_b32 v1, s53, 25
128 ; GCN-NEXT: v_writelane_b32 v1, s54, 26
129 ; GCN-NEXT: v_writelane_b32 v1, s55, 27
130 ; GCN-NEXT: v_writelane_b32 v1, s56, 28
131 ; GCN-NEXT: v_writelane_b32 v1, s57, 29
132 ; GCN-NEXT: v_writelane_b32 v1, s58, 30
133 ; GCN-NEXT: v_writelane_b32 v1, s59, 31
134 ; GCN-NEXT: v_writelane_b32 v1, s60, 32
135 ; GCN-NEXT: v_writelane_b32 v1, s61, 33
136 ; GCN-NEXT: v_writelane_b32 v1, s62, 34
137 ; GCN-NEXT: v_writelane_b32 v1, s63, 35
138 ; GCN-NEXT: v_writelane_b32 v1, s64, 36
139 ; GCN-NEXT: v_writelane_b32 v1, s65, 37
140 ; GCN-NEXT: v_writelane_b32 v1, s66, 38
141 ; GCN-NEXT: v_writelane_b32 v1, s67, 39
142 ; GCN-NEXT: v_writelane_b32 v1, s68, 40
143 ; GCN-NEXT: v_writelane_b32 v1, s69, 41
144 ; GCN-NEXT: v_writelane_b32 v1, s70, 42
145 ; GCN-NEXT: v_writelane_b32 v1, s71, 43
146 ; GCN-NEXT: v_writelane_b32 v1, s72, 44
147 ; GCN-NEXT: v_writelane_b32 v1, s73, 45
148 ; GCN-NEXT: v_writelane_b32 v1, s74, 46
149 ; GCN-NEXT: v_writelane_b32 v1, s75, 47
150 ; GCN-NEXT: v_writelane_b32 v1, s76, 48
151 ; GCN-NEXT: v_writelane_b32 v1, s77, 49
152 ; GCN-NEXT: v_writelane_b32 v1, s78, 50
153 ; GCN-NEXT: v_writelane_b32 v1, s79, 51
154 ; GCN-NEXT: v_writelane_b32 v1, s80, 52
155 ; GCN-NEXT: v_writelane_b32 v1, s81, 53
156 ; GCN-NEXT: v_writelane_b32 v1, s82, 54
157 ; GCN-NEXT: v_writelane_b32 v1, s83, 55
158 ; GCN-NEXT: v_writelane_b32 v1, s84, 56
159 ; GCN-NEXT: v_writelane_b32 v1, s85, 57
160 ; GCN-NEXT: v_writelane_b32 v1, s86, 58
161 ; GCN-NEXT: v_writelane_b32 v1, s87, 59
162 ; GCN-NEXT: v_writelane_b32 v1, s88, 60
163 ; GCN-NEXT: v_writelane_b32 v1, s89, 61
164 ; GCN-NEXT: v_writelane_b32 v1, s90, 62
165 ; GCN-NEXT: v_writelane_b32 v1, s91, 63
166 ; GCN-NEXT: v_writelane_b32 v2, s4, 0
167 ; GCN-NEXT: v_writelane_b32 v2, s5, 1
168 ; GCN-NEXT: v_writelane_b32 v2, s6, 2
169 ; GCN-NEXT: v_writelane_b32 v2, s7, 3
170 ; GCN-NEXT: v_writelane_b32 v2, s8, 4
171 ; GCN-NEXT: v_writelane_b32 v2, s9, 5
172 ; GCN-NEXT: v_writelane_b32 v2, s10, 6
173 ; GCN-NEXT: v_writelane_b32 v2, s11, 7
174 ; GCN: s_cbranch_scc1
177 ; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v0, 0
178 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 1
179 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 2
180 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 3
181 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 4
182 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 5
183 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 6
184 ; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v0, 7
185 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
187 ; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v0, 48
188 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 49
189 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 50
190 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 51
191 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 52
192 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 53
193 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 54
194 ; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v0, 55
195 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
197 ; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v0, 56
198 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 57
199 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 58
200 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 59
201 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 60
202 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 61
203 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 62
204 ; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v0, 63
205 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
207 ; GCN: v_readlane_b32 s[[USE_TMP_LO]], v1, 0
208 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 1
209 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 2
210 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 3
211 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 4
212 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 5
213 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 6
214 ; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI]], v1, 7
215 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
217 ; GCN: v_readlane_b32 s[[USE_TMP_LO]], v1, 8
218 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 9
219 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 10
220 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 11
221 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 12
222 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 13
223 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 14
224 ; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI]], v1, 15
225 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
227 ; GCN: v_readlane_b32 s[[USE_TMP_LO]], v1, 16
228 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 17
229 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 18
230 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 19
231 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 20
232 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 21
233 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 22
234 ; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI]], v1, 23
235 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
237 ; GCN: v_readlane_b32 s[[USE_TMP_LO]], v1, 24
238 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 25
239 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 26
240 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 27
241 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 28
242 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 29
243 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 30
244 ; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI]], v1, 31
245 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
247 ; GCN: v_readlane_b32 s[[USE_TMP_LO]], v1, 32
248 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 33
249 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 34
250 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 35
251 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 36
252 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 37
253 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 38
254 ; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI]], v1, 39
255 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
257 ; GCN: v_readlane_b32 s[[USE_TMP_LO]], v1, 40
258 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 41
259 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 42
260 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 43
261 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 44
262 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 45
263 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 46
264 ; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI]], v1, 47
265 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
267 ; GCN: v_readlane_b32 s[[USE_TMP_LO]], v1, 48
268 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 49
269 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 50
270 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 51
271 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 52
272 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 53
273 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 54
274 ; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI]], v1, 55
275 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
277 ; GCN: v_readlane_b32 s[[USE_TMP_LO]], v1, 56
278 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 57
279 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 58
280 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 59
281 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 60
282 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 61
283 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 62
284 ; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI]], v1, 63
285 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
287 ; GCN: v_readlane_b32 s{{[0-9]+}}, v0, 8
288 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 9
289 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 10
290 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 11
291 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 12
292 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 13
293 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 14
294 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 15
295 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
297 ; GCN: v_readlane_b32 s{{[0-9]+}}, v0, 16
298 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 17
299 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 18
300 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 19
301 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 20
302 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 21
303 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 22
304 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 23
305 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
307 ; GCN: v_readlane_b32 s{{[0-9]+}}, v0, 24
308 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 25
309 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 26
310 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 27
311 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 28
312 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 29
313 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 30
314 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 31
315 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
317 ; GCN: v_readlane_b32 s{{[0-9]+}}, v0, 32
318 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 33
319 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 34
320 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 35
321 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 36
322 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 37
323 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 38
324 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 39
325 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
327 ; GCN: v_readlane_b32 s{{[0-9]+}}, v0, 40
328 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 41
329 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 42
330 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 43
331 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 44
332 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 45
333 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 46
334 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 47
335 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
337 ; GCN: v_readlane_b32 s{{[0-9]+}}, v2, 0
338 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 1
339 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 2
340 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 3
341 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 4
342 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 5
343 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 6
344 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 7
345 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
346 define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, i32 %in) #0 {
347 %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
348 %wide.sgpr1 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
349 %wide.sgpr2 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
350 %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
351 %wide.sgpr4 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
352 %wide.sgpr5 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
353 %wide.sgpr6 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
354 %wide.sgpr7 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
355 %wide.sgpr8 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
356 %wide.sgpr9 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
357 %wide.sgpr10 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
358 %wide.sgpr11 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
359 %wide.sgpr12 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
360 %wide.sgpr13 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
361 %wide.sgpr14 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
362 %wide.sgpr15 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
363 %wide.sgpr16 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
364 %cmp = icmp eq i32 %in, 0
365 br i1 %cmp, label %bb0, label %ret
368 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr0) #0
369 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr1) #0
370 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr2) #0
371 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0
372 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr4) #0
373 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr5) #0
374 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr6) #0
375 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr7) #0
376 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr8) #0
377 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr9) #0
378 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr10) #0
379 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr11) #0
380 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr12) #0
381 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr13) #0
382 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr14) #0
383 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr15) #0
384 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr16) #0
391 ; Some of the lanes of an SGPR spill are in one VGPR and some forced
392 ; into the next available VGPR.
394 ; GCN-LABEL: {{^}}split_sgpr_spill_2_vgprs:
398 ; GCN: v_writelane_b32 v0, s4, 48
399 ; GCN-NEXT: v_writelane_b32 v0, s5, 49
400 ; GCN-NEXT: v_writelane_b32 v0, s6, 50
401 ; GCN-NEXT: v_writelane_b32 v0, s7, 51
402 ; GCN-NEXT: v_writelane_b32 v0, s8, 52
403 ; GCN-NEXT: v_writelane_b32 v0, s9, 53
404 ; GCN-NEXT: v_writelane_b32 v0, s10, 54
405 ; GCN-NEXT: v_writelane_b32 v0, s11, 55
406 ; GCN-NEXT: v_writelane_b32 v0, s12, 56
407 ; GCN-NEXT: v_writelane_b32 v0, s13, 57
408 ; GCN-NEXT: v_writelane_b32 v0, s14, 58
409 ; GCN-NEXT: v_writelane_b32 v0, s15, 59
410 ; GCN-NEXT: v_writelane_b32 v0, s16, 60
411 ; GCN-NEXT: v_writelane_b32 v0, s17, 61
412 ; GCN-NEXT: v_writelane_b32 v0, s18, 62
413 ; GCN-NEXT: v_writelane_b32 v0, s19, 63
415 ; GCN: v_readlane_b32 s4, v0, 48
416 ; GCN-NEXT: v_readlane_b32 s5, v0, 49
417 ; GCN-NEXT: v_readlane_b32 s6, v0, 50
418 ; GCN-NEXT: v_readlane_b32 s7, v0, 51
419 ; GCN-NEXT: v_readlane_b32 s8, v0, 52
420 ; GCN-NEXT: v_readlane_b32 s9, v0, 53
421 ; GCN-NEXT: v_readlane_b32 s10, v0, 54
422 ; GCN-NEXT: v_readlane_b32 s11, v0, 55
423 ; GCN-NEXT: v_readlane_b32 s12, v0, 56
424 ; GCN-NEXT: v_readlane_b32 s13, v0, 57
425 ; GCN-NEXT: v_readlane_b32 s14, v0, 58
426 ; GCN-NEXT: v_readlane_b32 s15, v0, 59
427 ; GCN-NEXT: v_readlane_b32 s16, v0, 60
428 ; GCN-NEXT: v_readlane_b32 s17, v0, 61
429 ; GCN-NEXT: v_readlane_b32 s18, v0, 62
430 ; GCN-NEXT: v_readlane_b32 s19, v0, 63
431 define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32 %in) #1 {
432 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
433 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
434 %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
435 %wide.sgpr5 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
436 %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
437 %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
439 %cmp = icmp eq i32 %in, 0
440 br i1 %cmp, label %bb0, label %ret
443 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
444 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
445 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
446 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0
447 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
448 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr5) #0
455 ; The first 64 SGPR spills can go to a VGPR, but there isn't a second
456 ; so some spills must be to memory. The last 16 element spill runs out of lanes at the 15th element.
458 ; GCN-LABEL: {{^}}no_vgprs_last_sgpr_spill:
460 ; GCN: v_writelane_b32 v23, s{{[0-9]+}}, 0
461 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 1
462 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 2
463 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 3
464 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 4
465 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 5
466 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 6
467 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 7
468 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 8
469 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 9
470 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 10
471 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 11
472 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 12
473 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 13
474 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 14
475 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 15
477 ; GCN: v_writelane_b32 v23, s{{[0-9]+}}, 16
478 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 17
479 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 18
480 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 19
481 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 20
482 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 21
483 ; GCN-NEXT: v_writelane_b32 v23, s{{[0-9]+}}, 22
484 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 23
485 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 24
486 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 25
487 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 26
488 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 27
489 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 28
490 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 29
491 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 30
492 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 31
495 ; GCN: v_writelane_b32 v23, s20, 32
496 ; GCN-NEXT: v_writelane_b32 v23, s21, 33
498 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 34
499 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 35
500 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 36
501 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 37
502 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 38
503 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 39
504 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 40
505 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 41
506 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 42
507 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 43
508 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 44
509 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 45
510 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 46
511 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 47
512 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 48
513 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 49
515 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
516 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
517 ; GCN: s_cbranch_scc1
520 ; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v23, 0
521 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 1
522 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 2
523 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 3
524 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 4
525 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 5
526 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 6
527 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 7
528 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 8
529 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 9
530 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 10
531 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 11
532 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 12
533 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 13
534 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 14
535 ; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v23, 15
536 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
539 ; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v23, 32
540 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 33
541 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 34
542 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 35
543 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 36
544 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 37
545 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 38
546 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 39
547 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 40
548 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 41
549 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 42
550 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 43
551 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 44
552 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 45
553 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 46
554 ; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v23, 47
555 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
557 ; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v23, 16
558 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 17
559 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 18
560 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 19
561 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 20
562 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 21
563 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 22
564 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 23
565 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 24
566 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 25
567 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 26
568 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 27
569 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 28
570 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 29
571 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 30
572 ; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v23, 31
573 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}}
575 ; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
576 ; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
578 ; GCN: v_readfirstlane_b32 s1, v0
581 define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 {
582 call void asm sideeffect "", "~{v[0:7]}" () #0
583 call void asm sideeffect "", "~{v[8:15]}" () #0
584 call void asm sideeffect "", "~{v[16:19]}"() #0
585 call void asm sideeffect "", "~{v[20:21]}"() #0
586 call void asm sideeffect "", "~{v22}"() #0
588 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
589 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
590 %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
591 %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
592 %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
593 %cmp = icmp eq i32 %in, 0
594 br i1 %cmp, label %bb0, label %ret
597 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
598 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
599 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
600 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) #0
601 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
608 attributes #0 = { nounwind }
609 attributes #1 = { nounwind "amdgpu-waves-per-eu"="10,10" }