1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3 ; RUN: -mcpu=pwr9 < %s | FileCheck %s
5 define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7) {
8 ; CHECK-NEXT: cmpd 5, 7
9 ; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill
10 ; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill
11 ; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill
12 ; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill
13 ; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill
14 ; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill
15 ; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill
16 ; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
17 ; CHECK-NEXT: bge 0, .LBB0_6
18 ; CHECK-NEXT: # %bb.1: # %.preheader
19 ; CHECK-NEXT: addi 30, 5, 1
20 ; CHECK-NEXT: addi 29, 5, 3
21 ; CHECK-NEXT: addi 28, 5, 2
22 ; CHECK-NEXT: mulld 12, 8, 5
23 ; CHECK-NEXT: addi 3, 3, 16
24 ; CHECK-NEXT: mulld 0, 9, 8
25 ; CHECK-NEXT: sldi 11, 10, 3
26 ; CHECK-NEXT: mulld 30, 8, 30
27 ; CHECK-NEXT: mulld 29, 8, 29
28 ; CHECK-NEXT: mulld 8, 8, 28
29 ; CHECK-NEXT: b .LBB0_3
30 ; CHECK-NEXT: .p2align 4
31 ; CHECK-NEXT: .LBB0_2:
32 ; CHECK-NEXT: add 5, 5, 9
33 ; CHECK-NEXT: add 12, 12, 0
34 ; CHECK-NEXT: add 30, 30, 0
35 ; CHECK-NEXT: add 29, 29, 0
36 ; CHECK-NEXT: add 8, 8, 0
37 ; CHECK-NEXT: cmpd 5, 7
38 ; CHECK-NEXT: bge 0, .LBB0_6
39 ; CHECK-NEXT: .LBB0_3: # =>This Loop Header: Depth=1
40 ; CHECK-NEXT: # Child Loop BB0_5 Depth 2
41 ; CHECK-NEXT: sub 28, 5, 10
42 ; CHECK-NEXT: cmpd 6, 28
43 ; CHECK-NEXT: bge 0, .LBB0_2
44 ; CHECK-NEXT: # %bb.4:
45 ; CHECK-NEXT: add 26, 6, 12
46 ; CHECK-NEXT: add 25, 6, 30
47 ; CHECK-NEXT: add 24, 6, 29
48 ; CHECK-NEXT: add 23, 6, 8
49 ; CHECK-NEXT: sldi 27, 6, 3
50 ; CHECK-NEXT: sldi 26, 26, 3
51 ; CHECK-NEXT: sldi 25, 25, 3
52 ; CHECK-NEXT: sldi 24, 24, 3
53 ; CHECK-NEXT: sldi 23, 23, 3
54 ; CHECK-NEXT: add 27, 4, 27
55 ; CHECK-NEXT: add 26, 3, 26
56 ; CHECK-NEXT: add 25, 3, 25
57 ; CHECK-NEXT: add 24, 3, 24
58 ; CHECK-NEXT: add 23, 3, 23
59 ; CHECK-NEXT: .p2align 5
60 ; CHECK-NEXT: .LBB0_5: # Parent Loop BB0_3 Depth=1
61 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2
62 ; CHECK-NEXT: lfd 0, 0(27)
63 ; CHECK-NEXT: lfd 1, -16(26)
64 ; CHECK-NEXT: add 6, 6, 10
65 ; CHECK-NEXT: cmpd 6, 28
66 ; CHECK-NEXT: xsadddp 0, 0, 1
67 ; CHECK-NEXT: lfd 1, -8(26)
68 ; CHECK-NEXT: xsadddp 0, 0, 1
69 ; CHECK-NEXT: lfd 1, 0(26)
70 ; CHECK-NEXT: xsadddp 0, 0, 1
71 ; CHECK-NEXT: lfd 1, 8(26)
72 ; CHECK-NEXT: add 26, 26, 11
73 ; CHECK-NEXT: xsadddp 0, 0, 1
74 ; CHECK-NEXT: lfd 1, -16(25)
75 ; CHECK-NEXT: xsadddp 0, 0, 1
76 ; CHECK-NEXT: lfd 1, -8(25)
77 ; CHECK-NEXT: xsadddp 0, 0, 1
78 ; CHECK-NEXT: lfd 1, 0(25)
79 ; CHECK-NEXT: xsadddp 0, 0, 1
80 ; CHECK-NEXT: lfd 1, 8(25)
81 ; CHECK-NEXT: add 25, 25, 11
82 ; CHECK-NEXT: xsadddp 0, 0, 1
83 ; CHECK-NEXT: lfd 1, -16(23)
84 ; CHECK-NEXT: xsadddp 0, 0, 1
85 ; CHECK-NEXT: lfd 1, -8(23)
86 ; CHECK-NEXT: xsadddp 0, 0, 1
87 ; CHECK-NEXT: lfd 1, 0(23)
88 ; CHECK-NEXT: xsadddp 0, 0, 1
89 ; CHECK-NEXT: lfd 1, 8(23)
90 ; CHECK-NEXT: add 23, 23, 11
91 ; CHECK-NEXT: xsadddp 0, 0, 1
92 ; CHECK-NEXT: lfd 1, -16(24)
93 ; CHECK-NEXT: xsadddp 0, 0, 1
94 ; CHECK-NEXT: lfd 1, -8(24)
95 ; CHECK-NEXT: xsadddp 0, 0, 1
96 ; CHECK-NEXT: lfd 1, 0(24)
97 ; CHECK-NEXT: xsadddp 0, 0, 1
98 ; CHECK-NEXT: lfd 1, 8(24)
99 ; CHECK-NEXT: add 24, 24, 11
100 ; CHECK-NEXT: xsadddp 0, 0, 1
101 ; CHECK-NEXT: stfd 0, 0(27)
102 ; CHECK-NEXT: add 27, 27, 11
103 ; CHECK-NEXT: blt 0, .LBB0_5
104 ; CHECK-NEXT: b .LBB0_2
105 ; CHECK-NEXT: .LBB0_6:
106 ; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
107 ; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload
108 ; CHECK-NEXT: ld 28, -32(1) # 8-byte Folded Reload
109 ; CHECK-NEXT: ld 27, -40(1) # 8-byte Folded Reload
110 ; CHECK-NEXT: ld 26, -48(1) # 8-byte Folded Reload
111 ; CHECK-NEXT: ld 25, -56(1) # 8-byte Folded Reload
112 ; CHECK-NEXT: ld 24, -64(1) # 8-byte Folded Reload
113 ; CHECK-NEXT: ld 23, -72(1) # 8-byte Folded Reload
115 %9 = icmp slt i64 %2, %4
116 br i1 %9, label %10, label %97
118 10: ; preds = %8, %93
119 %11 = phi i64 [ %95, %93 ], [ %2, %8 ]
120 %12 = phi i64 [ %94, %93 ], [ %3, %8 ]
121 %13 = sub nsw i64 %11, %7
122 %14 = icmp slt i64 %12, %13
123 br i1 %14, label %15, label %93
126 %16 = mul nsw i64 %11, %5
127 %17 = add nsw i64 %11, 1
128 %18 = mul nsw i64 %17, %5
129 %19 = add nsw i64 %11, 2
130 %20 = mul nsw i64 %19, %5
131 %21 = add nsw i64 %11, 3
132 %22 = mul nsw i64 %21, %5
135 23: ; preds = %15, %23
136 %24 = phi i64 [ %12, %15 ], [ %91, %23 ]
137 %25 = getelementptr inbounds double, double* %1, i64 %24
138 %26 = load double, double* %25, align 8
139 %27 = add nsw i64 %24, %16
140 %28 = getelementptr inbounds double, double* %0, i64 %27
141 %29 = load double, double* %28, align 8
142 %30 = fadd double %26, %29
143 %31 = add nsw i64 %27, 1
144 %32 = getelementptr inbounds double, double* %0, i64 %31
145 %33 = load double, double* %32, align 8
146 %34 = fadd double %30, %33
147 %35 = add nsw i64 %27, 2
148 %36 = getelementptr inbounds double, double* %0, i64 %35
149 %37 = load double, double* %36, align 8
150 %38 = fadd double %34, %37
151 %39 = add nsw i64 %27, 3
152 %40 = getelementptr inbounds double, double* %0, i64 %39
153 %41 = load double, double* %40, align 8
154 %42 = fadd double %38, %41
155 %43 = add nsw i64 %24, %18
156 %44 = getelementptr inbounds double, double* %0, i64 %43
157 %45 = load double, double* %44, align 8
158 %46 = fadd double %42, %45
159 %47 = add nsw i64 %43, 1
160 %48 = getelementptr inbounds double, double* %0, i64 %47
161 %49 = load double, double* %48, align 8
162 %50 = fadd double %46, %49
163 %51 = add nsw i64 %43, 2
164 %52 = getelementptr inbounds double, double* %0, i64 %51
165 %53 = load double, double* %52, align 8
166 %54 = fadd double %50, %53
167 %55 = add nsw i64 %43, 3
168 %56 = getelementptr inbounds double, double* %0, i64 %55
169 %57 = load double, double* %56, align 8
170 %58 = fadd double %54, %57
171 %59 = add nsw i64 %24, %20
172 %60 = getelementptr inbounds double, double* %0, i64 %59
173 %61 = load double, double* %60, align 8
174 %62 = fadd double %58, %61
175 %63 = add nsw i64 %59, 1
176 %64 = getelementptr inbounds double, double* %0, i64 %63
177 %65 = load double, double* %64, align 8
178 %66 = fadd double %62, %65
179 %67 = add nsw i64 %59, 2
180 %68 = getelementptr inbounds double, double* %0, i64 %67
181 %69 = load double, double* %68, align 8
182 %70 = fadd double %66, %69
183 %71 = add nsw i64 %59, 3
184 %72 = getelementptr inbounds double, double* %0, i64 %71
185 %73 = load double, double* %72, align 8
186 %74 = fadd double %70, %73
187 %75 = add nsw i64 %24, %22
188 %76 = getelementptr inbounds double, double* %0, i64 %75
189 %77 = load double, double* %76, align 8
190 %78 = fadd double %74, %77
191 %79 = add nsw i64 %75, 1
192 %80 = getelementptr inbounds double, double* %0, i64 %79
193 %81 = load double, double* %80, align 8
194 %82 = fadd double %78, %81
195 %83 = add nsw i64 %75, 2
196 %84 = getelementptr inbounds double, double* %0, i64 %83
197 %85 = load double, double* %84, align 8
198 %86 = fadd double %82, %85
199 %87 = add nsw i64 %75, 3
200 %88 = getelementptr inbounds double, double* %0, i64 %87
201 %89 = load double, double* %88, align 8
202 %90 = fadd double %86, %89
203 store double %90, double* %25, align 8
204 %91 = add nsw i64 %24, %7
205 %92 = icmp slt i64 %91, %13
206 br i1 %92, label %23, label %93
208 93: ; preds = %23, %10
209 %94 = phi i64 [ %12, %10 ], [ %91, %23 ]
210 %95 = add nsw i64 %11, %6
211 %96 = icmp slt i64 %95, %4
212 br i1 %96, label %10, label %97
214 97: ; preds = %93, %8