1 ; Test spilling using MVC. The tests here assume z10 register pressure,
2 ; without the high words being available.
4 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -verify-machineinstrs | FileCheck %s
8 @g0 = dso_local global i32 0
9 @g1 = dso_local global i32 1
10 @g2 = dso_local global i32 2
11 @g3 = dso_local global i32 3
12 @g4 = dso_local global i32 4
13 @g5 = dso_local global i32 5
14 @g6 = dso_local global i32 6
15 @g7 = dso_local global i32 7
16 @g8 = dso_local global i32 8
17 @g9 = dso_local global i32 9
19 @h0 = dso_local global i64 0
20 @h1 = dso_local global i64 1
21 @h2 = dso_local global i64 2
22 @h3 = dso_local global i64 3
23 @h4 = dso_local global i64 4
24 @h5 = dso_local global i64 5
25 @h6 = dso_local global i64 6
26 @h7 = dso_local global i64 7
27 @h8 = dso_local global i64 8
28 @h9 = dso_local global i64 9
30 ; This function shouldn't spill anything
31 define dso_local void @f1(i32 *%ptr0) {
34 ; CHECK: aghi %r15, -160
36 ; CHECK: brasl %r14, foo@PLT
40 %ptr1 = getelementptr i32, i32 *%ptr0, i32 2
41 %ptr2 = getelementptr i32, i32 *%ptr0, i32 4
42 %ptr3 = getelementptr i32, i32 *%ptr0, i32 6
43 %ptr4 = getelementptr i32, i32 *%ptr0, i32 8
44 %ptr5 = getelementptr i32, i32 *%ptr0, i32 10
45 %ptr6 = getelementptr i32, i32 *%ptr0, i32 12
47 %val0 = load i32, i32 *%ptr0
48 %val1 = load i32, i32 *%ptr1
49 %val2 = load i32, i32 *%ptr2
50 %val3 = load i32, i32 *%ptr3
51 %val4 = load i32, i32 *%ptr4
52 %val5 = load i32, i32 *%ptr5
53 %val6 = load i32, i32 *%ptr6
57 store i32 %val0, i32 *%ptr0
58 store i32 %val1, i32 *%ptr1
59 store i32 %val2, i32 *%ptr2
60 store i32 %val3, i32 *%ptr3
61 store i32 %val4, i32 *%ptr4
62 store i32 %val5, i32 *%ptr5
63 store i32 %val6, i32 *%ptr6
68 ; Test a case where at least one i32 load and at least one i32 store
70 define dso_local void @f2(i32 *%ptr0) {
72 ; CHECK: mvc [[OFFSET1:16[04]]](4,%r15), [[OFFSET2:[0-9]+]]({{%r[0-9]+}})
73 ; CHECK: brasl %r14, foo@PLT
74 ; CHECK: mvc [[OFFSET2]](4,{{%r[0-9]+}}), [[OFFSET1]](%r15)
76 %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
77 %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
78 %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
79 %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
80 %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
81 %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
82 %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
83 %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
85 %val0 = load i32, i32 *%ptr0
86 %val1 = load i32, i32 *%ptr1
87 %val2 = load i32, i32 *%ptr2
88 %val3 = load i32, i32 *%ptr3
89 %val4 = load i32, i32 *%ptr4
90 %val5 = load i32, i32 *%ptr5
91 %val6 = load i32, i32 *%ptr6
92 %val7 = load i32, i32 *%ptr7
93 %val8 = load i32, i32 *%ptr8
97 store i32 %val0, i32 *%ptr0
98 store i32 %val1, i32 *%ptr1
99 store i32 %val2, i32 *%ptr2
100 store i32 %val3, i32 *%ptr3
101 store i32 %val4, i32 *%ptr4
102 store i32 %val5, i32 *%ptr5
103 store i32 %val6, i32 *%ptr6
104 store i32 %val7, i32 *%ptr7
105 store i32 %val8, i32 *%ptr8
110 ; Test a case where at least one i64 load and at least one i64 store
112 define dso_local void @f3(i64 *%ptr0) {
114 ; CHECK: mvc 160(8,%r15), [[OFFSET:[0-9]+]]({{%r[0-9]+}})
115 ; CHECK: brasl %r14, foo@PLT
116 ; CHECK: mvc [[OFFSET]](8,{{%r[0-9]+}}), 160(%r15)
118 %ptr1 = getelementptr i64, i64 *%ptr0, i64 2
119 %ptr2 = getelementptr i64, i64 *%ptr0, i64 4
120 %ptr3 = getelementptr i64, i64 *%ptr0, i64 6
121 %ptr4 = getelementptr i64, i64 *%ptr0, i64 8
122 %ptr5 = getelementptr i64, i64 *%ptr0, i64 10
123 %ptr6 = getelementptr i64, i64 *%ptr0, i64 12
124 %ptr7 = getelementptr i64, i64 *%ptr0, i64 14
125 %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
127 %val0 = load i64, i64 *%ptr0
128 %val1 = load i64, i64 *%ptr1
129 %val2 = load i64, i64 *%ptr2
130 %val3 = load i64, i64 *%ptr3
131 %val4 = load i64, i64 *%ptr4
132 %val5 = load i64, i64 *%ptr5
133 %val6 = load i64, i64 *%ptr6
134 %val7 = load i64, i64 *%ptr7
135 %val8 = load i64, i64 *%ptr8
139 store i64 %val0, i64 *%ptr0
140 store i64 %val1, i64 *%ptr1
141 store i64 %val2, i64 *%ptr2
142 store i64 %val3, i64 *%ptr3
143 store i64 %val4, i64 *%ptr4
144 store i64 %val5, i64 *%ptr5
145 store i64 %val6, i64 *%ptr6
146 store i64 %val7, i64 *%ptr7
147 store i64 %val8, i64 *%ptr8
153 ; Test a case where at least at least one f32 load and at least one f32 store
154 ; need spills. The 8 call-saved FPRs could be used for 8 of the %vals
155 ; (and are at the time of writing), but it would really be better to use
157 define dso_local void @f4(float *%ptr0) {
159 ; CHECK: mvc [[OFFSET1:16[04]]](4,%r15), [[OFFSET2:[0-9]+]]({{%r[0-9]+}})
160 ; CHECK: brasl %r14, foo@PLT
161 ; CHECK: mvc [[OFFSET2]](4,{{%r[0-9]+}}), [[OFFSET1]](%r15)
163 %ptr1 = getelementptr float, float *%ptr0, i64 2
164 %ptr2 = getelementptr float, float *%ptr0, i64 4
165 %ptr3 = getelementptr float, float *%ptr0, i64 6
166 %ptr4 = getelementptr float, float *%ptr0, i64 8
167 %ptr5 = getelementptr float, float *%ptr0, i64 10
168 %ptr6 = getelementptr float, float *%ptr0, i64 12
169 %ptr7 = getelementptr float, float *%ptr0, i64 14
170 %ptr8 = getelementptr float, float *%ptr0, i64 16
171 %ptr9 = getelementptr float, float *%ptr0, i64 18
173 %val0 = load float, float *%ptr0
174 %val1 = load float, float *%ptr1
175 %val2 = load float, float *%ptr2
176 %val3 = load float, float *%ptr3
177 %val4 = load float, float *%ptr4
178 %val5 = load float, float *%ptr5
179 %val6 = load float, float *%ptr6
180 %val7 = load float, float *%ptr7
181 %val8 = load float, float *%ptr8
182 %val9 = load float, float *%ptr9
186 store float %val0, float *%ptr0
187 store float %val1, float *%ptr1
188 store float %val2, float *%ptr2
189 store float %val3, float *%ptr3
190 store float %val4, float *%ptr4
191 store float %val5, float *%ptr5
192 store float %val6, float *%ptr6
193 store float %val7, float *%ptr7
194 store float %val8, float *%ptr8
195 store float %val9, float *%ptr9
201 define dso_local void @f5(double *%ptr0) {
203 ; CHECK: mvc 160(8,%r15), [[OFFSET:[0-9]+]]({{%r[0-9]+}})
204 ; CHECK: brasl %r14, foo@PLT
205 ; CHECK: mvc [[OFFSET]](8,{{%r[0-9]+}}), 160(%r15)
207 %ptr1 = getelementptr double, double *%ptr0, i64 2
208 %ptr2 = getelementptr double, double *%ptr0, i64 4
209 %ptr3 = getelementptr double, double *%ptr0, i64 6
210 %ptr4 = getelementptr double, double *%ptr0, i64 8
211 %ptr5 = getelementptr double, double *%ptr0, i64 10
212 %ptr6 = getelementptr double, double *%ptr0, i64 12
213 %ptr7 = getelementptr double, double *%ptr0, i64 14
214 %ptr8 = getelementptr double, double *%ptr0, i64 16
215 %ptr9 = getelementptr double, double *%ptr0, i64 18
217 %val0 = load double, double *%ptr0
218 %val1 = load double, double *%ptr1
219 %val2 = load double, double *%ptr2
220 %val3 = load double, double *%ptr3
221 %val4 = load double, double *%ptr4
222 %val5 = load double, double *%ptr5
223 %val6 = load double, double *%ptr6
224 %val7 = load double, double *%ptr7
225 %val8 = load double, double *%ptr8
226 %val9 = load double, double *%ptr9
230 store double %val0, double *%ptr0
231 store double %val1, double *%ptr1
232 store double %val2, double *%ptr2
233 store double %val3, double *%ptr3
234 store double %val4, double *%ptr4
235 store double %val5, double *%ptr5
236 store double %val6, double *%ptr6
237 store double %val7, double *%ptr7
238 store double %val8, double *%ptr8
239 store double %val9, double *%ptr9
244 ; Repeat f2 with atomic accesses. We shouldn't use MVC here.
245 define dso_local void @f6(i32 *%ptr0) {
249 %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
250 %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
251 %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
252 %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
253 %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
254 %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
255 %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
256 %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
258 %val0 = load atomic i32, i32 *%ptr0 unordered, align 4
259 %val1 = load atomic i32, i32 *%ptr1 unordered, align 4
260 %val2 = load atomic i32, i32 *%ptr2 unordered, align 4
261 %val3 = load atomic i32, i32 *%ptr3 unordered, align 4
262 %val4 = load atomic i32, i32 *%ptr4 unordered, align 4
263 %val5 = load atomic i32, i32 *%ptr5 unordered, align 4
264 %val6 = load atomic i32, i32 *%ptr6 unordered, align 4
265 %val7 = load atomic i32, i32 *%ptr7 unordered, align 4
266 %val8 = load atomic i32, i32 *%ptr8 unordered, align 4
270 store atomic i32 %val0, i32 *%ptr0 unordered, align 4
271 store atomic i32 %val1, i32 *%ptr1 unordered, align 4
272 store atomic i32 %val2, i32 *%ptr2 unordered, align 4
273 store atomic i32 %val3, i32 *%ptr3 unordered, align 4
274 store atomic i32 %val4, i32 *%ptr4 unordered, align 4
275 store atomic i32 %val5, i32 *%ptr5 unordered, align 4
276 store atomic i32 %val6, i32 *%ptr6 unordered, align 4
277 store atomic i32 %val7, i32 *%ptr7 unordered, align 4
278 store atomic i32 %val8, i32 *%ptr8 unordered, align 4
283 ; ...likewise volatile accesses.
284 define dso_local void @f7(i32 *%ptr0) {
288 %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
289 %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
290 %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
291 %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
292 %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
293 %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
294 %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
295 %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
297 %val0 = load volatile i32, i32 *%ptr0
298 %val1 = load volatile i32, i32 *%ptr1
299 %val2 = load volatile i32, i32 *%ptr2
300 %val3 = load volatile i32, i32 *%ptr3
301 %val4 = load volatile i32, i32 *%ptr4
302 %val5 = load volatile i32, i32 *%ptr5
303 %val6 = load volatile i32, i32 *%ptr6
304 %val7 = load volatile i32, i32 *%ptr7
305 %val8 = load volatile i32, i32 *%ptr8
309 store volatile i32 %val0, i32 *%ptr0
310 store volatile i32 %val1, i32 *%ptr1
311 store volatile i32 %val2, i32 *%ptr2
312 store volatile i32 %val3, i32 *%ptr3
313 store volatile i32 %val4, i32 *%ptr4
314 store volatile i32 %val5, i32 *%ptr5
315 store volatile i32 %val6, i32 *%ptr6
316 store volatile i32 %val7, i32 *%ptr7
317 store volatile i32 %val8, i32 *%ptr8
322 ; Check that LRL and STRL are not converted.
323 define dso_local void @f8() {
327 %val0 = load i32, i32 *@g0
328 %val1 = load i32, i32 *@g1
329 %val2 = load i32, i32 *@g2
330 %val3 = load i32, i32 *@g3
331 %val4 = load i32, i32 *@g4
332 %val5 = load i32, i32 *@g5
333 %val6 = load i32, i32 *@g6
334 %val7 = load i32, i32 *@g7
335 %val8 = load i32, i32 *@g8
336 %val9 = load i32, i32 *@g9
340 store i32 %val0, i32 *@g0
341 store i32 %val1, i32 *@g1
342 store i32 %val2, i32 *@g2
343 store i32 %val3, i32 *@g3
344 store i32 %val4, i32 *@g4
345 store i32 %val5, i32 *@g5
346 store i32 %val6, i32 *@g6
347 store i32 %val7, i32 *@g7
348 store i32 %val8, i32 *@g8
349 store i32 %val9, i32 *@g9
354 ; Likewise LGRL and STGRL.
355 define dso_local void @f9() {
359 %val0 = load i64, i64 *@h0
360 %val1 = load i64, i64 *@h1
361 %val2 = load i64, i64 *@h2
362 %val3 = load i64, i64 *@h3
363 %val4 = load i64, i64 *@h4
364 %val5 = load i64, i64 *@h5
365 %val6 = load i64, i64 *@h6
366 %val7 = load i64, i64 *@h7
367 %val8 = load i64, i64 *@h8
368 %val9 = load i64, i64 *@h9
372 store i64 %val0, i64 *@h0
373 store i64 %val1, i64 *@h1
374 store i64 %val2, i64 *@h2
375 store i64 %val3, i64 *@h3
376 store i64 %val4, i64 *@h4
377 store i64 %val5, i64 *@h5
378 store i64 %val6, i64 *@h6
379 store i64 %val7, i64 *@h7
380 store i64 %val8, i64 *@h8
381 store i64 %val9, i64 *@h9
386 ; This showed a problem with the way stack coloring updated instructions.
387 ; The copy from %val9 to %newval8 can be done using an MVC, which then
388 ; has two frame index operands. Stack coloring chose a valid renumbering
389 ; [FI0, FI1] -> [FI1, FI2], but applied it in the form FI0 -> FI1 -> FI2,
390 ; so that both operands ended up being the same.
391 define dso_local void @f10() {
393 ; CHECK: lgrl [[REG:%r[0-9]+]], h9
394 ; CHECK: stg [[REG]], [[VAL9:[0-9]+]](%r15)
395 ; CHECK: brasl %r14, foo@PLT
396 ; CHECK: brasl %r14, foo@PLT
397 ; CHECK: mvc [[NEWVAL8:[0-9]+]](8,%r15), [[VAL9]](%r15)
398 ; CHECK: brasl %r14, foo@PLT
399 ; CHECK: lg [[REG:%r[0-9]+]], [[NEWVAL8]](%r15)
400 ; CHECK: stgrl [[REG]], h8
403 %val8 = load volatile i64, i64 *@h8
404 %val0 = load volatile i64, i64 *@h0
405 %val1 = load volatile i64, i64 *@h1
406 %val2 = load volatile i64, i64 *@h2
407 %val3 = load volatile i64, i64 *@h3
408 %val4 = load volatile i64, i64 *@h4
409 %val5 = load volatile i64, i64 *@h5
410 %val6 = load volatile i64, i64 *@h6
411 %val7 = load volatile i64, i64 *@h7
412 %val9 = load volatile i64, i64 *@h9
416 store volatile i64 %val0, i64 *@h0
417 store volatile i64 %val1, i64 *@h1
418 store volatile i64 %val2, i64 *@h2
419 store volatile i64 %val3, i64 *@h3
420 store volatile i64 %val4, i64 *@h4
421 store volatile i64 %val5, i64 *@h5
422 store volatile i64 %val6, i64 *@h6
423 store volatile i64 %val7, i64 *@h7
425 %check = load volatile i64, i64 *@h0
426 %cond = icmp eq i64 %check, 0
427 br i1 %cond, label %skip, label %fallthru
432 store volatile i64 %val0, i64 *@h0
433 store volatile i64 %val1, i64 *@h1
434 store volatile i64 %val2, i64 *@h2
435 store volatile i64 %val3, i64 *@h3
436 store volatile i64 %val4, i64 *@h4
437 store volatile i64 %val5, i64 *@h5
438 store volatile i64 %val6, i64 *@h6
439 store volatile i64 %val7, i64 *@h7
440 store volatile i64 %val8, i64 *@h8
444 %newval8 = phi i64 [ %val8, %entry ], [ %val9, %fallthru ]
447 store volatile i64 %val0, i64 *@h0
448 store volatile i64 %val1, i64 *@h1
449 store volatile i64 %val2, i64 *@h2
450 store volatile i64 %val3, i64 *@h3
451 store volatile i64 %val4, i64 *@h4
452 store volatile i64 %val5, i64 *@h5
453 store volatile i64 %val6, i64 *@h6
454 store volatile i64 %val7, i64 *@h7
455 store volatile i64 %newval8, i64 *@h8
456 store volatile i64 %val9, i64 *@h9
461 ; This used to generate a no-op MVC. It is very sensitive to spill heuristics.
462 define dso_local void @f11() {
464 ; CHECK-NOT: mvc [[OFFSET:[0-9]+]](8,%r15), [[OFFSET]](%r15)
467 %val0 = load volatile i64, i64 *@h0
468 %val1 = load volatile i64, i64 *@h1
469 %val2 = load volatile i64, i64 *@h2
470 %val3 = load volatile i64, i64 *@h3
471 %val4 = load volatile i64, i64 *@h4
472 %val5 = load volatile i64, i64 *@h5
473 %val6 = load volatile i64, i64 *@h6
474 %val7 = load volatile i64, i64 *@h7
476 %altval0 = load volatile i64, i64 *@h0
477 %altval1 = load volatile i64, i64 *@h1
481 store volatile i64 %val0, i64 *@h0
482 store volatile i64 %val1, i64 *@h1
483 store volatile i64 %val2, i64 *@h2
484 store volatile i64 %val3, i64 *@h3
485 store volatile i64 %val4, i64 *@h4
486 store volatile i64 %val5, i64 *@h5
487 store volatile i64 %val6, i64 *@h6
488 store volatile i64 %val7, i64 *@h7
490 %check = load volatile i64, i64 *@h0
491 %cond = icmp eq i64 %check, 0
492 br i1 %cond, label %a1, label %b1
503 %newval0 = phi i64 [ %val0, %a1 ], [ %altval0, %b1 ]
507 store volatile i64 %val1, i64 *@h1
508 store volatile i64 %val2, i64 *@h2
509 store volatile i64 %val3, i64 *@h3
510 store volatile i64 %val4, i64 *@h4
511 store volatile i64 %val5, i64 *@h5
512 store volatile i64 %val6, i64 *@h6
513 store volatile i64 %val7, i64 *@h7
514 br i1 %cond, label %a2, label %b2
525 %newval1 = phi i64 [ %val1, %a2 ], [ %altval1, %b2 ]
529 store volatile i64 %val2, i64 *@h2
530 store volatile i64 %val3, i64 *@h3
531 store volatile i64 %val4, i64 *@h4
532 store volatile i64 %val5, i64 *@h5
533 store volatile i64 %val6, i64 *@h6
534 store volatile i64 %val7, i64 *@h7
538 store volatile i64 %newval0, i64 *@h0
539 store volatile i64 %newval1, i64 *@h1
540 store volatile i64 %val2, i64 *@h2
541 store volatile i64 %val3, i64 *@h3
542 store volatile i64 %val4, i64 *@h4
543 store volatile i64 %val5, i64 *@h5
544 store volatile i64 %val6, i64 *@h6
545 store volatile i64 %val7, i64 *@h7