1 ; RUN: llc -verify-machineinstrs < %s -mcpu=pwr7 -mattr=+vsx | FileCheck %s
2 ; RUN: llc -verify-machineinstrs < %s -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 | FileCheck -check-prefix=CHECK-FISL %s
5 ; Also run with -schedule-ppc-vsx-fma-mutation-early as a stress test for the
6 ; live-interval-updating logic.
7 ; RUN: llc -verify-machineinstrs < %s -mcpu=pwr7 -mattr=+vsx -schedule-ppc-vsx-fma-mutation-early
8 target datalayout = "E-m:e-i64:64-n32:64"
9 target triple = "powerpc64-unknown-linux-gnu"
11 define void @test1(double %a, double %b, double %c, double %e, double* nocapture %d) #0 {
13 %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
14 store double %0, double* %d, align 8
15 %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
16 %arrayidx1 = getelementptr inbounds double, double* %d, i64 1
17 store double %1, double* %arrayidx1, align 8
21 ; CHECK-DAG: li [[C1:[0-9]+]], 8
22 ; CHECK-DAG: xsmaddmdp 3, 2, 1
23 ; CHECK-DAG: xsmaddadp 1, 2, 4
24 ; CHECK-DAG: stxsdx 3, 0, 7
25 ; CHECK-DAG: stxsdx 1, 7, [[C1]]
28 ; CHECK-FISL-LABEL: @test1
29 ; CHECK-FISL-DAG: fmr 0, 1
30 ; CHECK-FISL-DAG: xsmaddadp 0, 2, 3
31 ; CHECK-FISL-DAG: stxsdx 0, 0, 7
32 ; CHECK-FISL-DAG: xsmaddadp 1, 2, 4
33 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 8
34 ; CHECK-FISL-DAG: stxsdx 1, 7, [[C1]]
38 define void @test2(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
40 %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
41 store double %0, double* %d, align 8
42 %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
43 %arrayidx1 = getelementptr inbounds double, double* %d, i64 1
44 store double %1, double* %arrayidx1, align 8
45 %2 = tail call double @llvm.fma.f64(double %b, double %f, double %a)
46 %arrayidx2 = getelementptr inbounds double, double* %d, i64 2
47 store double %2, double* %arrayidx2, align 8
51 ; CHECK-DAG: li [[C1:[0-9]+]], 8
52 ; CHECK-DAG: li [[C2:[0-9]+]], 16
53 ; FIXME: We no longer get this because of copy ordering at the MI level.
54 ; CHECX-DAG: xsmaddmdp 3, 2, 1
55 ; CHECX-DAG: xsmaddmdp 4, 2, 1
56 ; CHECX-DAG: xsmaddadp 1, 2, 5
57 ; CHECX-DAG: stxsdx 3, 0, 8
58 ; CHECX-DAG: stxsdx 4, 8, [[C1]]
59 ; CHECX-DAG: stxsdx 1, 8, [[C2]]
62 ; CHECK-FISL-LABEL: @test2
63 ; CHECK-FISL-DAG: fmr 0, 1
64 ; CHECK-FISL-DAG: xsmaddadp 0, 2, 3
65 ; CHECK-FISL-DAG: stxsdx 0, 0, 8
66 ; CHECK-FISL-DAG: fmr 0, 1
67 ; CHECK-FISL-DAG: xsmaddadp 0, 2, 4
68 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 8
69 ; CHECK-FISL-DAG: stxsdx 0, 8, [[C1]]
70 ; CHECK-FISL-DAG: xsmaddadp 1, 2, 5
71 ; CHECK-FISL-DAG: li [[C2:[0-9]+]], 16
72 ; CHECK-FISL-DAG: stxsdx 1, 8, [[C2]]
76 define void @test3(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
78 %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
79 store double %0, double* %d, align 8
80 %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
81 %2 = tail call double @llvm.fma.f64(double %b, double %c, double %1)
82 %arrayidx1 = getelementptr inbounds double, double* %d, i64 3
83 store double %2, double* %arrayidx1, align 8
84 %3 = tail call double @llvm.fma.f64(double %b, double %f, double %a)
85 %arrayidx2 = getelementptr inbounds double, double* %d, i64 2
86 store double %3, double* %arrayidx2, align 8
87 %arrayidx3 = getelementptr inbounds double, double* %d, i64 1
88 store double %1, double* %arrayidx3, align 8
92 ; CHECK-DAG: fmr [[F1:[0-9]+]], 1
93 ; CHECK-DAG: li [[C1:[0-9]+]], 24
94 ; CHECK-DAG: li [[C2:[0-9]+]], 16
95 ; CHECK-DAG: li [[C3:[0-9]+]], 8
96 ; CHECK-DAG: xsmaddmdp 4, 2, 1
97 ; CHECK-DAG: xsmaddadp 1, 2, 5
99 ; Note: We could convert this next FMA to M-type as well, but it would require
100 ; re-ordering the instructions.
101 ; CHECK-DAG: xsmaddadp [[F1]], 2, 3
103 ; CHECK-DAG: xsmaddmdp 3, 2, 4
104 ; CHECK-DAG: stxsdx [[F1]], 0, 8
105 ; CHECK-DAG: stxsdx 3, 8, [[C1]]
106 ; CHECK-DAG: stxsdx 1, 8, [[C2]]
107 ; CHECK-DAG: stxsdx 4, 8, [[C3]]
110 ; CHECK-FISL-LABEL: @test3
111 ; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
112 ; CHECK-FISL-DAG: xsmaddadp [[F1]], 2, 4
113 ; CHECK-FISL-DAG: fmr 4, [[F1]]
114 ; CHECK-FISL-DAG: xsmaddadp 4, 2, 3
115 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 24
116 ; CHECK-FISL-DAG: stxsdx 4, 8, [[C1]]
117 ; CHECK-FISL-DAG: xsmaddadp 1, 2, 5
118 ; CHECK-FISL-DAG: li [[C2:[0-9]+]], 16
119 ; CHECK-FISL-DAG: stxsdx 1, 8, [[C2]]
120 ; CHECK-FISL-DAG: li [[C3:[0-9]+]], 8
121 ; CHECK-FISL-DAG: stxsdx 0, 8, [[C3]]
125 define void @test4(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
127 %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
128 store double %0, double* %d, align 8
129 %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
130 %arrayidx1 = getelementptr inbounds double, double* %d, i64 1
131 store double %1, double* %arrayidx1, align 8
132 %2 = tail call double @llvm.fma.f64(double %b, double %c, double %1)
133 %arrayidx3 = getelementptr inbounds double, double* %d, i64 3
134 store double %2, double* %arrayidx3, align 8
135 %3 = tail call double @llvm.fma.f64(double %b, double %f, double %a)
136 %arrayidx4 = getelementptr inbounds double, double* %d, i64 2
137 store double %3, double* %arrayidx4, align 8
140 ; CHECK-LABEL: @test4
141 ; CHECK-DAG: fmr [[F1:[0-9]+]], 1
142 ; CHECK-DAG: li [[C1:[0-9]+]], 8
143 ; CHECK-DAG: li [[C2:[0-9]+]], 16
144 ; CHECK-DAG: xsmaddmdp 4, 2, 1
146 ; Note: We could convert this next FMA to M-type as well, but it would require
147 ; re-ordering the instructions.
148 ; CHECK-DAG: xsmaddadp 1, 2, 5
150 ; CHECK-DAG: xsmaddadp [[F1]], 2, 3
151 ; CHECK-DAG: stxsdx [[F1]], 0, 8
152 ; CHECK-DAG: stxsdx 4, 8, [[C1]]
153 ; CHECK-DAG: li [[C3:[0-9]+]], 24
154 ; CHECK-DAG: xsmaddadp 4, 2, 3
155 ; CHECK-DAG: stxsdx 4, 8, [[C3]]
156 ; CHECK-DAG: stxsdx 1, 8, [[C2]]
159 ; CHECK-FISL-LABEL: @test4
160 ; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
161 ; CHECK-FISL-DAG: xsmaddadp [[F1]], 2, 3
162 ; CHECK-FISL-DAG: stxsdx 0, 0, 8
163 ; CHECK-FISL-DAG: fmr [[F1]], 1
164 ; CHECK-FISL-DAG: xsmaddadp [[F1]], 2, 4
165 ; CHECK-FISL-DAG: li [[C3:[0-9]+]], 8
166 ; CHECK-FISL-DAG: stxsdx 0, 8, [[C3]]
167 ; CHECK-FISL-DAG: xsmaddadp 0, 2, 3
168 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 24
169 ; CHECK-FISL-DAG: stxsdx 0, 8, [[C1]]
170 ; CHECK-FISL-DAG: xsmaddadp 1, 2, 5
171 ; CHECK-FISL-DAG: li [[C2:[0-9]+]], 16
172 ; CHECK-FISL-DAG: stxsdx 1, 8, [[C2]]
176 declare double @llvm.fma.f64(double, double, double) #0
178 define void @testv1(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double>* nocapture %d) #0 {
180 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
181 store <2 x double> %0, <2 x double>* %d, align 8
182 %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
183 %arrayidx1 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 1
184 store <2 x double> %1, <2 x double>* %arrayidx1, align 8
187 ; CHECK-LABEL: @testv1
188 ; CHECK-DAG: xvmaddmdp 36, 35, 34
189 ; CHECK-DAG: xvmaddadp 34, 35, 37
190 ; CHECK-DAG: li [[C1:[0-9]+]], 16
191 ; CHECK-DAG: stxvd2x 36, 0, 3
192 ; CHECK-DAG: stxvd2x 34, 3, [[C1:[0-9]+]]
195 ; CHECK-FISL-LABEL: @testv1
196 ; CHECK-FISL-DAG: xxlor 0, 34, 34
197 ; CHECK-FISL-DAG: xvmaddadp 0, 35, 36
198 ; CHECK-FISL-DAG: stxvd2x 0, 0, 3
199 ; CHECK-FISL-DAG: xvmaddadp 34, 35, 37
200 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 16
201 ; CHECK-FISL-DAG: stxvd2x 34, 3, [[C1:[0-9]+]]
205 define void @testv2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
207 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
208 store <2 x double> %0, <2 x double>* %d, align 8
209 %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
210 %arrayidx1 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 1
211 store <2 x double> %1, <2 x double>* %arrayidx1, align 8
212 %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
213 %arrayidx2 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 2
214 store <2 x double> %2, <2 x double>* %arrayidx2, align 8
217 ; CHECK-LABEL: @testv2
218 ; FIXME: We currently don't get this because of copy ordering on the MI level.
219 ; CHECX-DAG: xvmaddmdp 36, 35, 34
220 ; CHECX-DAG: xvmaddmdp 37, 35, 34
221 ; CHECX-DAG: li [[C1:[0-9]+]], 16
222 ; CHECX-DAG: li [[C2:[0-9]+]], 32
223 ; CHECX-DAG: xvmaddadp 34, 35, 38
224 ; CHECX-DAG: stxvd2x 36, 0, 3
225 ; CHECX-DAG: stxvd2x 37, 3, [[C1:[0-9]+]]
226 ; CHECX-DAG: stxvd2x 34, 3, [[C2:[0-9]+]]
229 ; CHECK-FISL-LABEL: @testv2
230 ; CHECK-FISL-DAG: xxlor 0, 34, 34
231 ; CHECK-FISL-DAG: xvmaddadp 0, 35, 36
232 ; CHECK-FISL-DAG: stxvd2x 0, 0, 3
233 ; CHECK-FISL-DAG: xxlor 0, 34, 34
234 ; CHECK-FISL-DAG: xvmaddadp 0, 35, 37
235 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 16
236 ; CHECK-FISL-DAG: stxvd2x 0, 3, [[C1:[0-9]+]]
237 ; CHECK-FISL-DAG: xvmaddadp 34, 35, 38
238 ; CHECK-FISL-DAG: li [[C2:[0-9]+]], 32
239 ; CHECK-FISL-DAG: stxvd2x 34, 3, [[C2:[0-9]+]]
243 define void @testv3(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
245 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
246 store <2 x double> %0, <2 x double>* %d, align 8
247 %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
248 %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1)
249 %arrayidx1 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 3
250 store <2 x double> %2, <2 x double>* %arrayidx1, align 8
251 %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
252 %arrayidx2 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 2
253 store <2 x double> %3, <2 x double>* %arrayidx2, align 8
254 %arrayidx3 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 1
255 store <2 x double> %1, <2 x double>* %arrayidx3, align 8
258 ; Note: There is some unavoidable changeability in this variant. If the
259 ; FMAs are reordered differently, the algorithm can pick a different
260 ; multiplicand to destroy, changing the register assignment. There isn't
261 ; a good way to express this possibility, so hopefully this doesn't change
264 ; CHECK-LABEL: @testv3
265 ; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34
266 ; CHECK-DAG: li [[C1:[0-9]+]], 48
267 ; CHECK-DAG: li [[C2:[0-9]+]], 32
268 ; CHECK-DAG: xvmaddmdp 37, 35, 34
269 ; CHECK-DAG: li [[C3:[0-9]+]], 16
271 ; Note: We could convert this next FMA to M-type as well, but it would require
272 ; re-ordering the instructions.
273 ; CHECK-DAG: xvmaddadp [[V1]], 35, 36
275 ; CHECK-DAG: xvmaddmdp 36, 35, 37
276 ; CHECK-DAG: xvmaddadp 34, 35, 38
277 ; CHECK-DAG: stxvd2x 32, 0, 3
278 ; CHECK-DAG: stxvd2x 36, 3, [[C1]]
279 ; CHECK-DAG: stxvd2x 34, 3, [[C2]]
280 ; CHECK-DAG: stxvd2x 37, 3, [[C3]]
283 ; CHECK-FISL-LABEL: @testv3
284 ; CHECK-FISL-DAG: xxlor [[V1:[0-9]+]], 34, 34
285 ; CHECK-FISL-DAG: xvmaddadp [[V1]], 35, 36
286 ; CHECK-FISL-DAG: stxvd2x [[V1]], 0, 3
287 ; CHECK-FISL-DAG: xxlor [[V2:[0-9]+]], 34, 34
288 ; CHECK-FISL-DAG: xvmaddadp [[V2]], 35, 37
289 ; CHECK-FISL-DAG: xxlor [[V3:[0-9]+]], 0, 0
290 ; CHECK-FISL-DAG: xvmaddadp [[V3]], 35, 36
291 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 48
292 ; CHECK-FISL-DAG: stxvd2x [[V3]], 3, [[C1]]
293 ; CHECK-FISL-DAG: xvmaddadp 34, 35, 38
294 ; CHECK-FISL-DAG: li [[C2:[0-9]+]], 32
295 ; CHECK-FISL-DAG: stxvd2x 34, 3, [[C2]]
296 ; CHECK-FISL-DAG: li [[C3:[0-9]+]], 16
297 ; CHECK-FISL-DAG: stxvd2x 0, 3, [[C3]]
301 define void @testv4(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
303 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
304 store <2 x double> %0, <2 x double>* %d, align 8
305 %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
306 %arrayidx1 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 1
307 store <2 x double> %1, <2 x double>* %arrayidx1, align 8
308 %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1)
309 %arrayidx3 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 3
310 store <2 x double> %2, <2 x double>* %arrayidx3, align 8
311 %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
312 %arrayidx4 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 2
313 store <2 x double> %3, <2 x double>* %arrayidx4, align 8
316 ; CHECK-LABEL: @testv4
317 ; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34
318 ; CHECK-DAG: xvmaddmdp 37, 35, 34
319 ; CHECK-DAG: li [[C1:[0-9]+]], 16
320 ; CHECK-DAG: li [[C2:[0-9]+]], 32
321 ; CHECK-DAG: xvmaddadp 34, 35, 38
323 ; Note: We could convert this next FMA to M-type as well, but it would require
324 ; re-ordering the instructions.
325 ; CHECK-DAG: xvmaddadp [[V1]], 35, 36
327 ; CHECK-DAG: stxvd2x 32, 0, 3
328 ; CHECK-DAG: stxvd2x 37, 3, [[C1]]
329 ; CHECK-DAG: li [[C3:[0-9]+]], 48
330 ; CHECK-DAG: xvmaddadp 37, 35, 36
331 ; CHECK-DAG: stxvd2x 37, 3, [[C3]]
332 ; CHECK-DAG: stxvd2x 34, 3, [[C2]]
335 ; CHECK-FISL-LABEL: @testv4
336 ; CHECK-FISL-DAG: xxlor [[V1:[0-9]+]], 34, 34
337 ; CHECK-FISL-DAG: xvmaddadp [[V1]], 35, 36
338 ; CHECK-FISL-DAG: stxvd2x 0, 0, 3
339 ; CHECK-FISL-DAG: xxlor [[V2:[0-9]+]], 34, 34
340 ; CHECK-FISL-DAG: xvmaddadp [[V2]], 35, 37
341 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 16
342 ; CHECK-FISL-DAG: stxvd2x 0, 3, [[C1]]
343 ; CHECK-FISL-DAG: xvmaddadp 0, 35, 37
344 ; CHECK-FISL-DAG: li [[C3:[0-9]+]], 48
345 ; CHECK-FISL-DAG: stxvd2x 0, 3, [[C3]]
346 ; CHECK-FISL-DAG: xvmaddadp 0, 35, 36
347 ; CHECK-FISL-DAG: li [[C2:[0-9]+]], 32
348 ; CHECK-FISL-DAG: stxvd2x 34, 3, [[C2]]
352 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
354 attributes #0 = { nounwind readnone }