1 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck %s
2 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx -fast-isel -O0 | FileCheck -check-prefix=CHECK-FISL %s
5 define void @test1sp(float %a, float %b, float %c, float %e, ptr nocapture %d) #0 {
7 %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
8 store float %0, ptr %d, align 4
9 %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
10 %arrayidx1 = getelementptr inbounds float, ptr %d, i64 1
11 store float %1, ptr %arrayidx1, align 4
14 ; CHECK-LABEL: @test1sp
15 ; CHECK-DAG: li [[C1:[0-9]+]], 4
16 ; CHECK-DAG: xsmaddmsp 3, 2, 1
17 ; CHECK-DAG: xsmaddasp 1, 2, 4
18 ; CHECK-DAG: stxsspx 3, 0, 7
19 ; CHECK-DAG: stxsspx 1, 7, [[C1]]
22 ; CHECK-FISL-LABEL: @test1sp
23 ; CHECK-FISL-DAG: fmr 0, 1
24 ; CHECK-FISL-DAG: xsmaddasp 0, 2, 3
25 ; CHECK-FISL-DAG: stxsspx 0, 0, 7
26 ; CHECK-FISL-DAG: xsmaddasp 1, 2, 4
27 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 4
28 ; CHECK-FISL-DAG: stxsspx 1, 7, [[C1]]
32 define void @test2sp(float %a, float %b, float %c, float %e, float %f, ptr nocapture %d) #0 {
34 %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
35 store float %0, ptr %d, align 4
36 %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
37 %arrayidx1 = getelementptr inbounds float, ptr %d, i64 1
38 store float %1, ptr %arrayidx1, align 4
39 %2 = tail call float @llvm.fma.f32(float %b, float %f, float %a)
40 %arrayidx2 = getelementptr inbounds float, ptr %d, i64 2
41 store float %2, ptr %arrayidx2, align 4
44 ; CHECK-LABEL: @test2sp
45 ; CHECK-DAG: li [[C1:[0-9]+]], 4
46 ; CHECK-DAG: li [[C2:[0-9]+]], 8
47 ; FIXME: We now miss this because of copy ordering at the MI level.
48 ; CHECX-DAG: xsmaddmsp 3, 2, 1
49 ; CHECX-DAG: xsmaddmsp 4, 2, 1
50 ; CHECX-DAG: xsmaddasp 1, 2, 5
51 ; CHECX-DAG: stxsspx 3, 0, 8
52 ; CHECX-DAG: stxsspx 4, 8, [[C1]]
53 ; CHECX-DAG: stxsspx 1, 8, [[C2]]
56 ; CHECK-FISL-LABEL: @test2sp
57 ; CHECK-FISL-DAG: fmr 0, 1
58 ; CHECK-FISL-DAG: xsmaddasp 0, 2, 3
59 ; CHECK-FISL-DAG: stxsspx 0, 0, 8
60 ; CHECK-FISL-DAG: fmr 0, 1
61 ; CHECK-FISL-DAG: xsmaddasp 0, 2, 4
62 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 4
63 ; CHECK-FISL-DAG: stxsspx 0, 8, [[C1]]
64 ; CHECK-FISL-DAG: xsmaddasp 1, 2, 5
65 ; CHECK-FISL-DAG: li [[C2:[0-9]+]], 8
66 ; CHECK-FISL-DAG: stxsspx 1, 8, [[C2]]
70 define void @test3sp(float %a, float %b, float %c, float %e, float %f, ptr nocapture %d) #0 {
72 %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
73 store float %0, ptr %d, align 4
74 %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
75 %2 = tail call float @llvm.fma.f32(float %b, float %c, float %1)
76 %arrayidx1 = getelementptr inbounds float, ptr %d, i64 3
77 store float %2, ptr %arrayidx1, align 4
78 %3 = tail call float @llvm.fma.f32(float %b, float %f, float %a)
79 %arrayidx2 = getelementptr inbounds float, ptr %d, i64 2
80 store float %3, ptr %arrayidx2, align 4
81 %arrayidx3 = getelementptr inbounds float, ptr %d, i64 1
82 store float %1, ptr %arrayidx3, align 4
85 ; CHECK-LABEL: @test3sp
86 ; CHECK-DAG: fmr [[F1:[0-9]+]], 1
87 ; CHECK-DAG: li [[C1:[0-9]+]], 12
88 ; CHECK-DAG: li [[C2:[0-9]+]], 8
89 ; CHECK-DAG: li [[C3:[0-9]+]], 4
90 ; CHECK-DAG: xsmaddmsp 4, 2, 1
91 ; CHECK-DAG: xsmaddasp 1, 2, 5
93 ; Note: We could convert this next FMA to M-type as well, but it would require
94 ; re-ordering the instructions.
95 ; CHECK-DAG: xsmaddasp [[F1]], 2, 3
97 ; CHECK-DAG: xsmaddmsp 3, 2, 4
98 ; CHECK-DAG: stxsspx [[F1]], 0, 8
99 ; CHECK-DAG: stxsspx 3, 8, [[C1]]
100 ; CHECK-DAG: stxsspx 1, 8, [[C2]]
101 ; CHECK-DAG: stxsspx 4, 8, [[C3]]
104 ; CHECK-FISL-LABEL: @test3sp
105 ; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
106 ; CHECK-FISL-DAG: xsmaddasp [[F1]], 2, 4
107 ; CHECK-FISL-DAG: fmr 4, [[F1]]
108 ; CHECK-FISL-DAG: xsmaddasp 4, 2, 3
109 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 12
110 ; CHECK-FISL-DAG: stxsspx 4, 8, [[C1]]
111 ; CHECK-FISL-DAG: xsmaddasp 1, 2, 5
112 ; CHECK-FISL-DAG: li [[C2:[0-9]+]], 8
113 ; CHECK-FISL-DAG: stxsspx 1, 8, [[C2]]
114 ; CHECK-FISL-DAG: li [[C3:[0-9]+]], 4
115 ; CHECK-FISL-DAG: stxsspx 0, 8, [[C3]]
119 define void @test4sp(float %a, float %b, float %c, float %e, float %f, ptr nocapture %d) #0 {
121 %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
122 store float %0, ptr %d, align 4
123 %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
124 %arrayidx1 = getelementptr inbounds float, ptr %d, i64 1
125 store float %1, ptr %arrayidx1, align 4
126 %2 = tail call float @llvm.fma.f32(float %b, float %c, float %1)
127 %arrayidx3 = getelementptr inbounds float, ptr %d, i64 3
128 store float %2, ptr %arrayidx3, align 4
129 %3 = tail call float @llvm.fma.f32(float %b, float %f, float %a)
130 %arrayidx4 = getelementptr inbounds float, ptr %d, i64 2
131 store float %3, ptr %arrayidx4, align 4
134 ; CHECK-LABEL: @test4sp
135 ; CHECK-DAG: fmr [[F1:[0-9]+]], 1
136 ; CHECK-DAG: li [[C1:[0-9]+]], 4
137 ; CHECK-DAG: li [[C2:[0-9]+]], 8
138 ; CHECK-DAG: xsmaddmsp 4, 2, 1
140 ; Note: We could convert this next FMA to M-type as well, but it would require
141 ; re-ordering the instructions.
142 ; CHECK-DAG: xsmaddasp 1, 2, 5
144 ; CHECK-DAG: xsmaddasp [[F1]], 2, 3
145 ; CHECK-DAG: stxsspx [[F1]], 0, 8
146 ; CHECK-DAG: stxsspx 4, 8, [[C1]]
147 ; CHECK-DAG: li [[C3:[0-9]+]], 12
148 ; CHECK-DAG: xsmaddasp 4, 2, 3
149 ; CHECK-DAG: stxsspx 4, 8, [[C3]]
150 ; CHECK-DAG: stxsspx 1, 8, [[C2]]
153 ; CHECK-FISL-LABEL: @test4sp
154 ; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
155 ; CHECK-FISL-DAG: xsmaddasp [[F1]], 2, 3
156 ; CHECK-FISL-DAG: stxsspx 0, 0, 8
157 ; CHECK-FISL-DAG: fmr [[F1]], 1
158 ; CHECK-FISL-DAG: xsmaddasp [[F1]], 2, 4
159 ; CHECK-FISL-DAG: li [[C3:[0-9]+]], 4
160 ; CHECK-FISL-DAG: stxsspx 0, 8, [[C3]]
161 ; CHECK-FISL-DAG: xsmaddasp 0, 2, 3
162 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 12
163 ; CHECK-FISL-DAG: stxsspx 0, 8, [[C1]]
164 ; CHECK-FISL-DAG: xsmaddasp 1, 2, 5
165 ; CHECK-FISL-DAG: li [[C2:[0-9]+]], 8
166 ; CHECK-FISL-DAG: stxsspx 1, 8, [[C2]]
170 declare float @llvm.fma.f32(float, float, float) #0