1 ; Test that loops with sufficient registers do not reload or spill on
2 ; stack. These cases include calls and it is necessary to have the GR128 /
3 ; FP128 registers part of the callee saved registers list in order to avoid
4 ; spilling / reloading.
6 ; RUN: llc -switch-peel-threshold=101 < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
8 %0 = type { %0*, %0*, %0*, i32, %1*, i64, i64, i64, i64, i64, i64, %2, %5, %7 }
9 %1 = type { i32, i32, i32 (%1*, i64, i32)*, i32 (%1*, i64, i64, i32, i8**)*, i32 (%1*, i64, i64, i64, i32)*, i32 (%1*)*, void (i8*)*, i8*, i8* }
10 %2 = type { i64, i64, %3** }
11 %3 = type { %4*, i64 }
12 %4 = type { i64, i8* }
13 %5 = type { i64, i64, %6** }
14 %6 = type { i64, %4*, i32, i64, i8* }
15 %7 = type { i64, i64, %8** }
16 %8 = type { i64, i64*, i64*, %4*, i64, i32*, %5, i32, i64, i64 }
18 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
20 define void @fun0(%0*) {
21 ; CHECK-LABEL: .LBB0_4
22 ; CHECK: => This Inner Loop Header: Depth=2
23 ; CHECK-NOT: 16-byte Folded Spill
24 ; CHECK-NOT: 16-byte Folded Reload
26 %2 = load i64, i64* undef, align 8
29 %5 = load i64, i64* undef, align 8
30 switch i32 undef, label %36 [
39 ; <label>:6: ; preds = %1
42 ; <label>:7: ; preds = %1
45 ; <label>:8: ; preds = %1
48 ; <label>:9: ; preds = %1
51 ; <label>:10: ; preds = %1
54 ; <label>:11: ; preds = %1
57 ; <label>:12: ; preds = %7, %6
58 %13 = getelementptr inbounds %0, %0* %0, i64 0, i32 5
61 ; <label>:14: ; preds = %31, %12
62 %15 = phi i64 [ undef, %31 ], [ %5, %12 ]
63 %16 = phi i64 [ %35, %31 ], [ undef, %12 ]
64 %17 = load i64, i64* %13, align 8
65 %18 = icmp ult i64 %15, %17
66 %19 = select i1 %18, i64 %15, i64 %17
67 %20 = udiv i64 %19, %4
68 %21 = icmp ugt i64 %20, 1
69 %22 = select i1 %21, i64 %20, i64 1
73 ; <label>:24: ; preds = %24, %14
74 %25 = phi i64 [ %23, %14 ], [ %27, %24 ]
75 call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* nonnull undef, i64 %4, i1 false)
76 %26 = getelementptr inbounds i8, i8* null, i64 %4
77 store i8* %26, i8** undef, align 8
79 %28 = icmp eq i64 %27, 0
80 br i1 %28, label %31, label %24
82 ; <label>:29: ; preds = %24
83 br i1 undef, label %31, label %30
85 ; <label>:30: ; preds = %29
86 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %26, i8* nonnull undef, i64 %4, i1 false)
89 ; <label>:31: ; preds = %30, %29
90 %32 = call signext i32 undef(%1* undef, i64 %16, i32 signext 8)
91 %33 = icmp eq i64 undef, 0
92 %34 = select i1 %33, i64 0, i64 %19
93 %35 = add i64 %34, %16
94 br i1 %33, label %36, label %14
96 ; <label>:36: ; preds = %31, %1
100 declare fp128 @llvm.pow.f128(fp128, fp128)
102 define void @fun1(fp128*) {
103 ; CHECK-LABEL: .LBB1_2
104 ; CHECK: =>This Inner Loop Header: Depth=1
105 ; CHECK-NOT: 16-byte Folded Spill
106 ; CHECK-NOT: 16-byte Folded Reload
107 ; CHECK-LABEL: .LBB1_3
109 br i1 undef, label %7, label %2
111 ; <label>:2: ; preds = %2, %1
112 %3 = phi fp128 [ %5, %2 ], [ 0xL00000000000000000000000000000000, %1 ]
113 %4 = tail call fp128 @llvm.pow.f128(fp128 0xL00000000000000000000000000000000, fp128 0xL00000000000000000000000000000000) #2
114 %5 = fadd fp128 %3, %4
115 %6 = icmp eq i64 undef, 0
116 br i1 %6, label %7, label %2
118 ; <label>:7: ; preds = %2, %1
119 %8 = phi fp128 [ 0xL00000000000000000000000000000000, %1 ], [ %5, %2 ]
120 %9 = fadd fp128 0xL00000000000000000000000000000000, %8
121 %10 = fadd fp128 0xL00000000000000000000000000000000, %9
122 %11 = fadd fp128 0xL00000000000000000000000000000000, %10
123 %12 = tail call fp128 @llvm.pow.f128(fp128 %11, fp128 0xL00000000000000000000000000000000) #2
124 store fp128 %12, fp128* %0, align 8