1 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,-use-reciprocal-square-root | FileCheck %s --check-prefix=FAULT
2 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,+use-reciprocal-square-root | FileCheck %s
4 declare float @llvm.sqrt.f32(float) #0
5 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #0
6 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0
7 declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0
8 declare double @llvm.sqrt.f64(double) #0
9 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0
10 declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) #0
12 define float @fsqrt(float %a) #0 {
13 %1 = tail call fast float @llvm.sqrt.f32(float %a)
22 ; CHECK-NEXT: frsqrte [[RA:s[0-7]]]
23 ; CHECK-NEXT: fmul [[RB:s[0-7]]], [[RA]], [[RA]]
24 ; CHECK-NEXT: frsqrts {{s[0-7](, s[0-7])?}}, [[RB]]
25 ; CHECK: frsqrts {{s[0-7]}}, {{s[0-7]}}, {{s[0-7]}}
26 ; CHECK-NOT: frsqrts {{s[0-7]}}, {{s[0-7]}}, {{s[0-7]}}
27 ; CHECK: fcmp {{s[0-7]}}, #0
30 define <2 x float> @f2sqrt(<2 x float> %a) #0 {
31 %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a)
34 ; FAULT-LABEL: f2sqrt:
38 ; CHECK-LABEL: f2sqrt:
40 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2s]]
41 ; CHECK-NEXT: fmul [[RB:v[0-7]\.2s]], [[RA]], [[RA]]
42 ; CHECK-NEXT: frsqrts {{v[0-7]\.2s(, v[0-7]\.2s)?}}, [[RB]]
43 ; CHECK: frsqrts {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, {{v[0-7]\.2s}}
44 ; CHECK-NOT: frsqrts {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, {{v[0-7]\.2s}}
45 ; CHECK: fcmeq {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, #0
48 define <4 x float> @f4sqrt(<4 x float> %a) #0 {
49 %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
52 ; FAULT-LABEL: f4sqrt:
56 ; CHECK-LABEL: f4sqrt:
58 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]]
59 ; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]]
60 ; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]]
61 ; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}}
62 ; CHECK-NOT: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}}
63 ; CHECK: fcmeq {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, #0
66 define <8 x float> @f8sqrt(<8 x float> %a) #0 {
67 %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a)
70 ; FAULT-LABEL: f8sqrt:
75 ; CHECK-LABEL: f8sqrt:
77 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]]
78 ; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]]
79 ; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]]
80 ; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}}
81 ; CHECK: fcmeq {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, #0
82 ; CHECK: frsqrte [[RC:v[0-7]\.4s]]
83 ; CHECK-NEXT: fmul [[RD:v[0-7]\.4s]], [[RC]], [[RC]]
84 ; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RD]]
85 ; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}}
86 ; CHECK-NOT: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}}
87 ; CHECK: fcmeq {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, #0
90 define double @dsqrt(double %a) #0 {
91 %1 = tail call fast double @llvm.sqrt.f64(double %a)
100 ; CHECK-NEXT: frsqrte [[RA:d[0-7]]]
101 ; CHECK-NEXT: fmul [[RB:d[0-7]]], [[RA]], [[RA]]
102 ; CHECK-NEXT: frsqrts {{d[0-7](, d[0-7])?}}, [[RB]]
103 ; CHECK: frsqrts {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}}
104 ; CHECK: frsqrts {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}}
105 ; CHECK-NOT: frsqrts {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}}
106 ; CHECK: fcmp {{d[0-7]}}, #0
109 define <2 x double> @d2sqrt(<2 x double> %a) #0 {
110 %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
113 ; FAULT-LABEL: d2sqrt:
117 ; CHECK-LABEL: d2sqrt:
119 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]]
120 ; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]]
121 ; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]]
122 ; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
123 ; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
124 ; CHECK-NOT: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
125 ; CHECK: fcmeq {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, #0
128 define <4 x double> @d4sqrt(<4 x double> %a) #0 {
129 %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
132 ; FAULT-LABEL: d4sqrt:
137 ; CHECK-LABEL: d4sqrt:
139 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]]
140 ; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]]
141 ; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]]
142 ; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
143 ; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
144 ; CHECK-NOT: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
145 ; CHECK: fcmeq {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, #0
146 ; CHECK: frsqrte [[RC:v[0-7]\.2d]]
147 ; CHECK-NEXT: fmul [[RD:v[0-7]\.2d]], [[RC]], [[RC]]
148 ; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RD]]
149 ; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
150 ; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
151 ; CHECK-NOT: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
152 ; CHECK: fcmeq {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, #0
155 define float @frsqrt(float %a) #0 {
156 %1 = tail call fast float @llvm.sqrt.f32(float %a)
157 %2 = fdiv fast float 1.000000e+00, %1
160 ; FAULT-LABEL: frsqrt:
164 ; CHECK-LABEL: frsqrt:
166 ; CHECK-NEXT: frsqrte [[RA:s[0-7]]]
167 ; CHECK-NEXT: fmul [[RB:s[0-7]]], [[RA]], [[RA]]
168 ; CHECK-NEXT: frsqrts {{s[0-7](, s[0-7])?}}, [[RB]]
169 ; CHECK: frsqrts {{s[0-7]}}, {{s[0-7]}}, {{s[0-7]}}
170 ; CHECK-NOT: frsqrts {{s[0-7]}}, {{s[0-7]}}, {{s[0-7]}}
171 ; CHECK-NOT: fcmp {{s[0-7]}}, #0
174 define <2 x float> @f2rsqrt(<2 x float> %a) #0 {
175 %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a)
176 %2 = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %1
179 ; FAULT-LABEL: f2rsqrt:
183 ; CHECK-LABEL: f2rsqrt:
185 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2s]]
186 ; CHECK-NEXT: fmul [[RB:v[0-7]\.2s]], [[RA]], [[RA]]
187 ; CHECK-NEXT: frsqrts {{v[0-7]\.2s(, v[0-7]\.2s)?}}, [[RB]]
188 ; CHECK: frsqrts {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, {{v[0-7]\.2s}}
189 ; CHECK-NOT: frsqrts {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, {{v[0-7]\.2s}}
190 ; CHECK-NOT: fcmeq {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, #0
193 define <4 x float> @f4rsqrt(<4 x float> %a) #0 {
194 %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
195 %2 = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1
198 ; FAULT-LABEL: f4rsqrt:
202 ; CHECK-LABEL: f4rsqrt:
204 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]]
205 ; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]]
206 ; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]]
207 ; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}}
208 ; CHECK-NOT: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}}
209 ; CHECK-NOT: fcmeq {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, #0
212 define <8 x float> @f8rsqrt(<8 x float> %a) #0 {
213 %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a)
214 %2 = fdiv fast <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1
217 ; FAULT-LABEL: f8rsqrt:
222 ; CHECK-LABEL: f8rsqrt:
224 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]]
225 ; CHECK: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]]
226 ; CHECK: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]]
227 ; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}}
228 ; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}}
229 ; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}}
230 ; CHECK-NOT: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}}
231 ; CHECK-NOT: fcmeq {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, #0
234 define double @drsqrt(double %a) #0 {
235 %1 = tail call fast double @llvm.sqrt.f64(double %a)
236 %2 = fdiv fast double 1.000000e+00, %1
239 ; FAULT-LABEL: drsqrt:
243 ; CHECK-LABEL: drsqrt:
245 ; CHECK-NEXT: frsqrte [[RA:d[0-7]]]
246 ; CHECK-NEXT: fmul [[RB:d[0-7]]], [[RA]], [[RA]]
247 ; CHECK-NEXT: frsqrts {{d[0-7](, d[0-7])?}}, [[RB]]
248 ; CHECK: frsqrts {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}}
249 ; CHECK: frsqrts {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}}
250 ; CHECK-NOT: frsqrts {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}}
251 ; CHECK-NOT: fcmp d0, #0
254 define <2 x double> @d2rsqrt(<2 x double> %a) #0 {
255 %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
256 %2 = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, %1
259 ; FAULT-LABEL: d2rsqrt:
263 ; CHECK-LABEL: d2rsqrt:
265 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]]
266 ; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]]
267 ; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]]
268 ; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
269 ; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
270 ; CHECK-NOT: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
271 ; CHECK-NOT: fcmeq {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, #0
274 define <4 x double> @d4rsqrt(<4 x double> %a) #0 {
275 %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
276 %2 = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %1
279 ; FAULT-LABEL: d4rsqrt:
284 ; CHECK-LABEL: d4rsqrt:
286 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]]
287 ; CHECK: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]]
288 ; CHECK: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]]
289 ; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
290 ; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
291 ; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
292 ; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
293 ; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
294 ; CHECK-NOT: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}}
295 ; CHECK-NOT: fcmeq {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, #0
298 attributes #0 = { nounwind "unsafe-fp-math"="true" }