1 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN
2 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD
3 ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN
4 ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD
6 ; The following tests use the balance-fp-ops feature, and should be independent of
9 ; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN
10 ; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD
12 ; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so
13 ; our test strategy is to:
14 ; * Force the pass to always perform register swapping even if the dest register is of the
15 ; correct color already (-force-all)
16 ; * Force the pass to ignore all hints it obtained from regalloc (-deterministic-balance),
17 ; and run it twice, once where it always hints odd, and once where it always hints even.
19 ; We then use regex magic to check that in the two cases the register allocation is
20 ; different; this is what gives us the testing coverage and distinguishes cases where
21 ; the pass has done some work versus accidental regalloc.
23 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
24 target triple = "aarch64"
26 ; Non-overlapping groups - shouldn't need any changing at all.
29 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
30 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
36 define void @f1(ptr nocapture readonly %p, ptr nocapture %q) #0 {
38 %0 = load double, ptr %p, align 8
39 %arrayidx1 = getelementptr inbounds double, ptr %p, i64 1
40 %1 = load double, ptr %arrayidx1, align 8
41 %arrayidx2 = getelementptr inbounds double, ptr %p, i64 2
42 %2 = load double, ptr %arrayidx2, align 8
43 %arrayidx3 = getelementptr inbounds double, ptr %p, i64 3
44 %3 = load double, ptr %arrayidx3, align 8
45 %arrayidx4 = getelementptr inbounds double, ptr %p, i64 4
46 %4 = load double, ptr %arrayidx4, align 8
47 %mul = fmul fast double %0, %1
48 %add = fadd fast double %mul, %4
49 %mul5 = fmul fast double %1, %2
50 %add6 = fadd fast double %mul5, %add
51 %mul7 = fmul fast double %1, %3
52 %sub = fsub fast double %add6, %mul7
53 %mul8 = fmul fast double %2, %3
54 %add9 = fadd fast double %mul8, %sub
55 store double %add9, ptr %q, align 8
56 %arrayidx11 = getelementptr inbounds double, ptr %p, i64 5
57 %5 = load double, ptr %arrayidx11, align 8
58 %arrayidx12 = getelementptr inbounds double, ptr %p, i64 6
59 %6 = load double, ptr %arrayidx12, align 8
60 %arrayidx13 = getelementptr inbounds double, ptr %p, i64 7
61 %7 = load double, ptr %arrayidx13, align 8
62 %mul15 = fmul fast double %6, %7
63 %mul16 = fmul fast double %0, %5
64 %add17 = fadd fast double %mul16, %mul15
65 %mul18 = fmul fast double %5, %6
66 %add19 = fadd fast double %mul18, %add17
67 %arrayidx20 = getelementptr inbounds double, ptr %q, i64 1
68 store double %add19, ptr %arrayidx20, align 8
72 ; Overlapping groups - coloring needed.
75 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
76 ; CHECK-EVEN: fmul [[y:d[0-9]*[13579]]]
77 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
78 ; CHECK-ODD: fmul [[y:d[0-9]*[02468]]]
84 ; CHECK: stp [[x]], [[y]]
86 define void @f2(ptr nocapture readonly %p, ptr nocapture %q) #0 {
88 %0 = load double, ptr %p, align 8
89 %arrayidx1 = getelementptr inbounds double, ptr %p, i64 1
90 %1 = load double, ptr %arrayidx1, align 8
91 %arrayidx2 = getelementptr inbounds double, ptr %p, i64 2
92 %2 = load double, ptr %arrayidx2, align 8
93 %arrayidx3 = getelementptr inbounds double, ptr %p, i64 3
94 %3 = load double, ptr %arrayidx3, align 8
95 %arrayidx4 = getelementptr inbounds double, ptr %p, i64 4
96 %4 = load double, ptr %arrayidx4, align 8
97 %arrayidx5 = getelementptr inbounds double, ptr %p, i64 5
98 %5 = load double, ptr %arrayidx5, align 8
99 %arrayidx6 = getelementptr inbounds double, ptr %p, i64 6
100 %6 = load double, ptr %arrayidx6, align 8
101 %arrayidx7 = getelementptr inbounds double, ptr %p, i64 7
102 %7 = load double, ptr %arrayidx7, align 8
103 %mul = fmul fast double %0, %1
104 %add = fadd fast double %mul, %7
105 %mul8 = fmul fast double %5, %6
106 %mul9 = fmul fast double %1, %2
107 %add10 = fadd fast double %mul9, %add
108 %mul11 = fmul fast double %3, %4
109 %add12 = fadd fast double %mul11, %mul8
110 %mul13 = fmul fast double %1, %3
111 %sub = fsub fast double %add10, %mul13
112 %mul14 = fmul fast double %4, %5
113 %add15 = fadd fast double %mul14, %add12
114 %mul16 = fmul fast double %2, %3
115 %add17 = fadd fast double %mul16, %sub
116 store double %add17, ptr %q, align 8
117 %arrayidx19 = getelementptr inbounds double, ptr %q, i64 1
118 store double %add15, ptr %arrayidx19, align 8
122 ; Dest register is live on block exit - fixup needed.
125 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
126 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
129 ; CHECK: fmadd [[y:d[0-9]+]], {{.*}}, [[x]]
132 define void @f3(ptr nocapture readonly %p, ptr nocapture %q) #0 {
134 %0 = load double, ptr %p, align 8
135 %arrayidx1 = getelementptr inbounds double, ptr %p, i64 1
136 %1 = load double, ptr %arrayidx1, align 8
137 %arrayidx2 = getelementptr inbounds double, ptr %p, i64 2
138 %2 = load double, ptr %arrayidx2, align 8
139 %arrayidx3 = getelementptr inbounds double, ptr %p, i64 3
140 %3 = load double, ptr %arrayidx3, align 8
141 %arrayidx4 = getelementptr inbounds double, ptr %p, i64 4
142 %4 = load double, ptr %arrayidx4, align 8
143 %mul = fmul fast double %0, %1
144 %add = fadd fast double %mul, %4
145 %mul5 = fmul fast double %1, %2
146 %add6 = fadd fast double %mul5, %add
147 %mul7 = fmul fast double %1, %3
148 %sub = fsub fast double %add6, %mul7
149 %mul8 = fmul fast double %2, %3
150 %add9 = fadd fast double %mul8, %sub
151 %cmp = fcmp oeq double %3, 0.000000e+00
152 br i1 %cmp, label %if.then, label %if.end
154 if.then: ; preds = %entry
155 tail call void @g() #2
158 if.end: ; preds = %if.then, %entry
159 store double %add9, ptr %q, align 8
163 declare void @g(...) #1
165 ; Single precision version of f2.
168 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
169 ; CHECK-EVEN: fmul [[y:s[0-9]*[13579]]]
170 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
171 ; CHECK-ODD: fmul [[y:s[0-9]*[02468]]]
177 ; CHECK: stp [[x]], [[y]]
179 define void @f4(ptr nocapture readonly %p, ptr nocapture %q) #0 {
181 %0 = load float, ptr %p, align 4
182 %arrayidx1 = getelementptr inbounds float, ptr %p, i64 1
183 %1 = load float, ptr %arrayidx1, align 4
184 %arrayidx2 = getelementptr inbounds float, ptr %p, i64 2
185 %2 = load float, ptr %arrayidx2, align 4
186 %arrayidx3 = getelementptr inbounds float, ptr %p, i64 3
187 %3 = load float, ptr %arrayidx3, align 4
188 %arrayidx4 = getelementptr inbounds float, ptr %p, i64 4
189 %4 = load float, ptr %arrayidx4, align 4
190 %arrayidx5 = getelementptr inbounds float, ptr %p, i64 5
191 %5 = load float, ptr %arrayidx5, align 4
192 %arrayidx6 = getelementptr inbounds float, ptr %p, i64 6
193 %6 = load float, ptr %arrayidx6, align 4
194 %arrayidx7 = getelementptr inbounds float, ptr %p, i64 7
195 %7 = load float, ptr %arrayidx7, align 4
196 %mul = fmul fast float %0, %1
197 %add = fadd fast float %mul, %7
198 %mul8 = fmul fast float %5, %6
199 %mul9 = fmul fast float %1, %2
200 %add10 = fadd fast float %mul9, %add
201 %mul11 = fmul fast float %3, %4
202 %add12 = fadd fast float %mul11, %mul8
203 %mul13 = fmul fast float %1, %3
204 %sub = fsub fast float %add10, %mul13
205 %mul14 = fmul fast float %4, %5
206 %add15 = fadd fast float %mul14, %add12
207 %mul16 = fmul fast float %2, %3
208 %add17 = fadd fast float %mul16, %sub
209 store float %add17, ptr %q, align 4
210 %arrayidx19 = getelementptr inbounds float, ptr %q, i64 1
211 store float %add15, ptr %arrayidx19, align 4
215 ; Single precision version of f3
218 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
219 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
222 ; CHECK: fmadd [[y:s[0-9]+]], {{.*}}, [[x]]
225 define void @f5(ptr nocapture readonly %p, ptr nocapture %q) #0 {
227 %0 = load float, ptr %p, align 4
228 %arrayidx1 = getelementptr inbounds float, ptr %p, i64 1
229 %1 = load float, ptr %arrayidx1, align 4
230 %arrayidx2 = getelementptr inbounds float, ptr %p, i64 2
231 %2 = load float, ptr %arrayidx2, align 4
232 %arrayidx3 = getelementptr inbounds float, ptr %p, i64 3
233 %3 = load float, ptr %arrayidx3, align 4
234 %arrayidx4 = getelementptr inbounds float, ptr %p, i64 4
235 %4 = load float, ptr %arrayidx4, align 4
236 %mul = fmul fast float %0, %1
237 %add = fadd fast float %mul, %4
238 %mul5 = fmul fast float %1, %2
239 %add6 = fadd fast float %mul5, %add
240 %mul7 = fmul fast float %1, %3
241 %sub = fsub fast float %add6, %mul7
242 %mul8 = fmul fast float %2, %3
243 %add9 = fadd fast float %mul8, %sub
244 %cmp = fcmp oeq float %3, 0.000000e+00
245 br i1 %cmp, label %if.then, label %if.end
247 if.then: ; preds = %entry
248 tail call void @g() #2
251 if.end: ; preds = %if.then, %entry
252 store float %add9, ptr %q, align 4
256 ; Test that regmask clobbering stops a chain sequence.
259 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
260 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
263 ; CHECK: fmadd d0, {{.*}}, [[x]]
267 define void @f6(ptr nocapture readonly %p, ptr nocapture %q) #0 {
269 %0 = load double, ptr %p, align 8
270 %arrayidx1 = getelementptr inbounds double, ptr %p, i64 1
271 %1 = load double, ptr %arrayidx1, align 8
272 %arrayidx2 = getelementptr inbounds double, ptr %p, i64 2
273 %2 = load double, ptr %arrayidx2, align 8
274 %arrayidx3 = getelementptr inbounds double, ptr %p, i64 3
275 %3 = load double, ptr %arrayidx3, align 8
276 %arrayidx4 = getelementptr inbounds double, ptr %p, i64 4
277 %4 = load double, ptr %arrayidx4, align 8
278 %mul = fmul fast double %0, %1
279 %add = fadd fast double %mul, %4
280 %mul5 = fmul fast double %1, %2
281 %add6 = fadd fast double %mul5, %add
282 %mul7 = fmul fast double %1, %3
283 %sub = fsub fast double %add6, %mul7
284 %mul8 = fmul fast double %2, %3
285 %add9 = fadd fast double %mul8, %sub
286 %call = tail call double @hh(double %add9) #2
287 store double %call, ptr %q, align 8
291 declare double @hh(double) #1
293 ; Check that we correctly deal with repeated operands.
294 ; The following testcase creates:
295 ; %d1 = FADDDrr killed %d0, %d0
296 ; We'll get a crash if we naively look at the first operand, remove it
297 ; from the substitution list then look at the second operand.
299 ; CHECK: fmadd [[x:d[0-9]+]]
300 ; CHECK: fadd d1, [[x]], [[x]]
302 define void @f7(ptr nocapture readonly %p, ptr nocapture %q) #0 {
304 %0 = load double, ptr %p, align 8
305 %arrayidx1 = getelementptr inbounds double, ptr %p, i64 1
306 %1 = load double, ptr %arrayidx1, align 8
307 %arrayidx2 = getelementptr inbounds double, ptr %p, i64 2
308 %2 = load double, ptr %arrayidx2, align 8
309 %arrayidx3 = getelementptr inbounds double, ptr %p, i64 3
310 %3 = load double, ptr %arrayidx3, align 8
311 %arrayidx4 = getelementptr inbounds double, ptr %p, i64 4
312 %4 = load double, ptr %arrayidx4, align 8
313 %mul = fmul fast double %0, %1
314 %add = fadd fast double %mul, %4
315 %mul5 = fmul fast double %1, %2
316 %add6 = fadd fast double %mul5, %add
317 %mul7 = fmul fast double %1, %3
318 %sub = fsub fast double %add6, %mul7
319 %mul8 = fmul fast double %2, %3
320 %add9 = fadd fast double %mul8, %sub
321 %add10 = fadd fast double %add9, %add9
322 call void @hhh(double 0.0, double %add10)
326 declare void @hhh(double, double)
328 attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
329 attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
330 attributes #2 = { nounwind }