1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,-use-reciprocal-square-root | FileCheck %s --check-prefix=FAULT
3 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,+use-reciprocal-square-root | FileCheck %s
5 declare float @llvm.sqrt.f32(float) #0
6 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #0
7 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0
8 declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0
9 declare double @llvm.sqrt.f64(double) #0
10 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0
11 declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) #0
13 define float @fsqrt(float %a) #0 {
16 ; FAULT-NEXT: fsqrt s0, s0
21 ; CHECK-NEXT: frsqrte s1, s0
22 ; CHECK-NEXT: fmul s2, s1, s1
23 ; CHECK-NEXT: frsqrts s2, s0, s2
24 ; CHECK-NEXT: fmul s1, s1, s2
25 ; CHECK-NEXT: fmul s2, s1, s1
26 ; CHECK-NEXT: frsqrts s2, s0, s2
27 ; CHECK-NEXT: fmul s2, s2, s0
28 ; CHECK-NEXT: fmul s1, s1, s2
29 ; CHECK-NEXT: fcmp s0, #0.0
30 ; CHECK-NEXT: fcsel s0, s0, s1, eq
32 %1 = tail call fast float @llvm.sqrt.f32(float %a)
36 define float @fsqrt_ieee_denorms(float %a) #1 {
37 ; FAULT-LABEL: fsqrt_ieee_denorms:
39 ; FAULT-NEXT: fsqrt s0, s0
42 ; CHECK-LABEL: fsqrt_ieee_denorms:
44 ; CHECK-NEXT: frsqrte s1, s0
45 ; CHECK-NEXT: fmul s2, s1, s1
46 ; CHECK-NEXT: frsqrts s2, s0, s2
47 ; CHECK-NEXT: fmul s1, s1, s2
48 ; CHECK-NEXT: fmul s2, s1, s1
49 ; CHECK-NEXT: frsqrts s2, s0, s2
50 ; CHECK-NEXT: fmul s2, s2, s0
51 ; CHECK-NEXT: fmul s1, s1, s2
52 ; CHECK-NEXT: fcmp s0, #0.0
53 ; CHECK-NEXT: fcsel s0, s0, s1, eq
55 %1 = tail call fast float @llvm.sqrt.f32(float %a)
59 define <2 x float> @f2sqrt(<2 x float> %a) #0 {
60 ; FAULT-LABEL: f2sqrt:
62 ; FAULT-NEXT: fsqrt v0.2s, v0.2s
65 ; CHECK-LABEL: f2sqrt:
67 ; CHECK-NEXT: frsqrte v1.2s, v0.2s
68 ; CHECK-NEXT: fmul v2.2s, v1.2s, v1.2s
69 ; CHECK-NEXT: frsqrts v2.2s, v0.2s, v2.2s
70 ; CHECK-NEXT: fmul v1.2s, v1.2s, v2.2s
71 ; CHECK-NEXT: fmul v2.2s, v1.2s, v1.2s
72 ; CHECK-NEXT: frsqrts v2.2s, v0.2s, v2.2s
73 ; CHECK-NEXT: fmul v2.2s, v2.2s, v0.2s
74 ; CHECK-NEXT: fmul v1.2s, v1.2s, v2.2s
75 ; CHECK-NEXT: fcmeq v2.2s, v0.2s, #0.0
76 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
78 %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a)
82 define <4 x float> @f4sqrt(<4 x float> %a) #0 {
83 ; FAULT-LABEL: f4sqrt:
85 ; FAULT-NEXT: fsqrt v0.4s, v0.4s
88 ; CHECK-LABEL: f4sqrt:
90 ; CHECK-NEXT: frsqrte v1.4s, v0.4s
91 ; CHECK-NEXT: fmul v2.4s, v1.4s, v1.4s
92 ; CHECK-NEXT: frsqrts v2.4s, v0.4s, v2.4s
93 ; CHECK-NEXT: fmul v1.4s, v1.4s, v2.4s
94 ; CHECK-NEXT: fmul v2.4s, v1.4s, v1.4s
95 ; CHECK-NEXT: frsqrts v2.4s, v0.4s, v2.4s
96 ; CHECK-NEXT: fmul v2.4s, v2.4s, v0.4s
97 ; CHECK-NEXT: fmul v1.4s, v1.4s, v2.4s
98 ; CHECK-NEXT: fcmeq v2.4s, v0.4s, #0.0
99 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
101 %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
105 define <8 x float> @f8sqrt(<8 x float> %a) #0 {
106 ; FAULT-LABEL: f8sqrt:
108 ; FAULT-NEXT: fsqrt v0.4s, v0.4s
109 ; FAULT-NEXT: fsqrt v1.4s, v1.4s
112 ; CHECK-LABEL: f8sqrt:
114 ; CHECK-NEXT: frsqrte v2.4s, v0.4s
115 ; CHECK-NEXT: fmul v3.4s, v2.4s, v2.4s
116 ; CHECK-NEXT: frsqrts v3.4s, v0.4s, v3.4s
117 ; CHECK-NEXT: fmul v2.4s, v2.4s, v3.4s
118 ; CHECK-NEXT: fmul v3.4s, v2.4s, v2.4s
119 ; CHECK-NEXT: frsqrts v3.4s, v0.4s, v3.4s
120 ; CHECK-NEXT: fmul v3.4s, v3.4s, v0.4s
121 ; CHECK-NEXT: fmul v2.4s, v2.4s, v3.4s
122 ; CHECK-NEXT: fcmeq v3.4s, v0.4s, #0.0
123 ; CHECK-NEXT: bif v0.16b, v2.16b, v3.16b
124 ; CHECK-NEXT: frsqrte v2.4s, v1.4s
125 ; CHECK-NEXT: fmul v3.4s, v2.4s, v2.4s
126 ; CHECK-NEXT: frsqrts v3.4s, v1.4s, v3.4s
127 ; CHECK-NEXT: fmul v2.4s, v2.4s, v3.4s
128 ; CHECK-NEXT: fmul v3.4s, v2.4s, v2.4s
129 ; CHECK-NEXT: frsqrts v3.4s, v1.4s, v3.4s
130 ; CHECK-NEXT: fmul v3.4s, v3.4s, v1.4s
131 ; CHECK-NEXT: fmul v2.4s, v2.4s, v3.4s
132 ; CHECK-NEXT: fcmeq v3.4s, v1.4s, #0.0
133 ; CHECK-NEXT: bif v1.16b, v2.16b, v3.16b
135 %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a)
139 define double @dsqrt(double %a) #0 {
140 ; FAULT-LABEL: dsqrt:
142 ; FAULT-NEXT: fsqrt d0, d0
145 ; CHECK-LABEL: dsqrt:
147 ; CHECK-NEXT: frsqrte d1, d0
148 ; CHECK-NEXT: fmul d2, d1, d1
149 ; CHECK-NEXT: frsqrts d2, d0, d2
150 ; CHECK-NEXT: fmul d1, d1, d2
151 ; CHECK-NEXT: fmul d2, d1, d1
152 ; CHECK-NEXT: frsqrts d2, d0, d2
153 ; CHECK-NEXT: fmul d1, d1, d2
154 ; CHECK-NEXT: fmul d2, d1, d1
155 ; CHECK-NEXT: frsqrts d2, d0, d2
156 ; CHECK-NEXT: fmul d2, d2, d0
157 ; CHECK-NEXT: fmul d1, d1, d2
158 ; CHECK-NEXT: fcmp d0, #0.0
159 ; CHECK-NEXT: fcsel d0, d0, d1, eq
161 %1 = tail call fast double @llvm.sqrt.f64(double %a)
165 define double @dsqrt_ieee_denorms(double %a) #1 {
166 ; FAULT-LABEL: dsqrt_ieee_denorms:
168 ; FAULT-NEXT: fsqrt d0, d0
171 ; CHECK-LABEL: dsqrt_ieee_denorms:
173 ; CHECK-NEXT: frsqrte d1, d0
174 ; CHECK-NEXT: fmul d2, d1, d1
175 ; CHECK-NEXT: frsqrts d2, d0, d2
176 ; CHECK-NEXT: fmul d1, d1, d2
177 ; CHECK-NEXT: fmul d2, d1, d1
178 ; CHECK-NEXT: frsqrts d2, d0, d2
179 ; CHECK-NEXT: fmul d1, d1, d2
180 ; CHECK-NEXT: fmul d2, d1, d1
181 ; CHECK-NEXT: frsqrts d2, d0, d2
182 ; CHECK-NEXT: fmul d2, d2, d0
183 ; CHECK-NEXT: fmul d1, d1, d2
184 ; CHECK-NEXT: fcmp d0, #0.0
185 ; CHECK-NEXT: fcsel d0, d0, d1, eq
187 %1 = tail call fast double @llvm.sqrt.f64(double %a)
191 define <2 x double> @d2sqrt(<2 x double> %a) #0 {
192 ; FAULT-LABEL: d2sqrt:
194 ; FAULT-NEXT: fsqrt v0.2d, v0.2d
197 ; CHECK-LABEL: d2sqrt:
199 ; CHECK-NEXT: frsqrte v1.2d, v0.2d
200 ; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d
201 ; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d
202 ; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d
203 ; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d
204 ; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d
205 ; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d
206 ; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d
207 ; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d
208 ; CHECK-NEXT: fmul v2.2d, v2.2d, v0.2d
209 ; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d
210 ; CHECK-NEXT: fcmeq v2.2d, v0.2d, #0.0
211 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
213 %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
217 define <4 x double> @d4sqrt(<4 x double> %a) #0 {
218 ; FAULT-LABEL: d4sqrt:
220 ; FAULT-NEXT: fsqrt v0.2d, v0.2d
221 ; FAULT-NEXT: fsqrt v1.2d, v1.2d
224 ; CHECK-LABEL: d4sqrt:
226 ; CHECK-NEXT: frsqrte v2.2d, v0.2d
227 ; CHECK-NEXT: fmul v3.2d, v2.2d, v2.2d
228 ; CHECK-NEXT: frsqrts v3.2d, v0.2d, v3.2d
229 ; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d
230 ; CHECK-NEXT: fmul v3.2d, v2.2d, v2.2d
231 ; CHECK-NEXT: frsqrts v3.2d, v0.2d, v3.2d
232 ; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d
233 ; CHECK-NEXT: fmul v3.2d, v2.2d, v2.2d
234 ; CHECK-NEXT: frsqrts v3.2d, v0.2d, v3.2d
235 ; CHECK-NEXT: fmul v3.2d, v3.2d, v0.2d
236 ; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d
237 ; CHECK-NEXT: fcmeq v3.2d, v0.2d, #0.0
238 ; CHECK-NEXT: bif v0.16b, v2.16b, v3.16b
239 ; CHECK-NEXT: frsqrte v2.2d, v1.2d
240 ; CHECK-NEXT: fmul v3.2d, v2.2d, v2.2d
241 ; CHECK-NEXT: frsqrts v3.2d, v1.2d, v3.2d
242 ; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d
243 ; CHECK-NEXT: fmul v3.2d, v2.2d, v2.2d
244 ; CHECK-NEXT: frsqrts v3.2d, v1.2d, v3.2d
245 ; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d
246 ; CHECK-NEXT: fmul v3.2d, v2.2d, v2.2d
247 ; CHECK-NEXT: frsqrts v3.2d, v1.2d, v3.2d
248 ; CHECK-NEXT: fmul v3.2d, v3.2d, v1.2d
249 ; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d
250 ; CHECK-NEXT: fcmeq v3.2d, v1.2d, #0.0
251 ; CHECK-NEXT: bif v1.16b, v2.16b, v3.16b
253 %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
257 define float @frsqrt(float %a) #0 {
258 ; FAULT-LABEL: frsqrt:
260 ; FAULT-NEXT: fsqrt s0, s0
261 ; FAULT-NEXT: fmov s1, #1.00000000
262 ; FAULT-NEXT: fdiv s0, s1, s0
265 ; CHECK-LABEL: frsqrt:
267 ; CHECK-NEXT: frsqrte s1, s0
268 ; CHECK-NEXT: fmul s2, s1, s1
269 ; CHECK-NEXT: frsqrts s2, s0, s2
270 ; CHECK-NEXT: fmul s1, s1, s2
271 ; CHECK-NEXT: fmul s2, s1, s1
272 ; CHECK-NEXT: frsqrts s0, s0, s2
273 ; CHECK-NEXT: fmul s0, s1, s0
275 %1 = tail call fast float @llvm.sqrt.f32(float %a)
276 %2 = fdiv fast float 1.000000e+00, %1
280 define <2 x float> @f2rsqrt(<2 x float> %a) #0 {
281 ; FAULT-LABEL: f2rsqrt:
283 ; FAULT-NEXT: fsqrt v0.2s, v0.2s
284 ; FAULT-NEXT: fmov v1.2s, #1.00000000
285 ; FAULT-NEXT: fdiv v0.2s, v1.2s, v0.2s
288 ; CHECK-LABEL: f2rsqrt:
290 ; CHECK-NEXT: frsqrte v1.2s, v0.2s
291 ; CHECK-NEXT: fmul v2.2s, v1.2s, v1.2s
292 ; CHECK-NEXT: frsqrts v2.2s, v0.2s, v2.2s
293 ; CHECK-NEXT: fmul v1.2s, v1.2s, v2.2s
294 ; CHECK-NEXT: fmul v2.2s, v1.2s, v1.2s
295 ; CHECK-NEXT: frsqrts v0.2s, v0.2s, v2.2s
296 ; CHECK-NEXT: fmul v0.2s, v1.2s, v0.2s
298 %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a)
299 %2 = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %1
303 define <4 x float> @f4rsqrt(<4 x float> %a) #0 {
304 ; FAULT-LABEL: f4rsqrt:
306 ; FAULT-NEXT: fsqrt v0.4s, v0.4s
307 ; FAULT-NEXT: fmov v1.4s, #1.00000000
308 ; FAULT-NEXT: fdiv v0.4s, v1.4s, v0.4s
311 ; CHECK-LABEL: f4rsqrt:
313 ; CHECK-NEXT: frsqrte v1.4s, v0.4s
314 ; CHECK-NEXT: fmul v2.4s, v1.4s, v1.4s
315 ; CHECK-NEXT: frsqrts v2.4s, v0.4s, v2.4s
316 ; CHECK-NEXT: fmul v1.4s, v1.4s, v2.4s
317 ; CHECK-NEXT: fmul v2.4s, v1.4s, v1.4s
318 ; CHECK-NEXT: frsqrts v0.4s, v0.4s, v2.4s
319 ; CHECK-NEXT: fmul v0.4s, v1.4s, v0.4s
321 %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
322 %2 = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1
326 define <8 x float> @f8rsqrt(<8 x float> %a) #0 {
327 ; FAULT-LABEL: f8rsqrt:
329 ; FAULT-NEXT: fsqrt v1.4s, v1.4s
330 ; FAULT-NEXT: fsqrt v0.4s, v0.4s
331 ; FAULT-NEXT: fmov v2.4s, #1.00000000
332 ; FAULT-NEXT: fdiv v0.4s, v2.4s, v0.4s
333 ; FAULT-NEXT: fdiv v1.4s, v2.4s, v1.4s
336 ; CHECK-LABEL: f8rsqrt:
338 ; CHECK-NEXT: frsqrte v2.4s, v0.4s
339 ; CHECK-NEXT: fmul v4.4s, v2.4s, v2.4s
340 ; CHECK-NEXT: frsqrte v3.4s, v1.4s
341 ; CHECK-NEXT: frsqrts v4.4s, v0.4s, v4.4s
342 ; CHECK-NEXT: fmul v2.4s, v2.4s, v4.4s
343 ; CHECK-NEXT: fmul v4.4s, v3.4s, v3.4s
344 ; CHECK-NEXT: frsqrts v4.4s, v1.4s, v4.4s
345 ; CHECK-NEXT: fmul v3.4s, v3.4s, v4.4s
346 ; CHECK-NEXT: fmul v4.4s, v2.4s, v2.4s
347 ; CHECK-NEXT: frsqrts v0.4s, v0.4s, v4.4s
348 ; CHECK-NEXT: fmul v4.4s, v3.4s, v3.4s
349 ; CHECK-NEXT: frsqrts v1.4s, v1.4s, v4.4s
350 ; CHECK-NEXT: fmul v0.4s, v2.4s, v0.4s
351 ; CHECK-NEXT: fmul v1.4s, v3.4s, v1.4s
353 %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a)
354 %2 = fdiv fast <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1
358 define double @drsqrt(double %a) #0 {
359 ; FAULT-LABEL: drsqrt:
361 ; FAULT-NEXT: fsqrt d0, d0
362 ; FAULT-NEXT: fmov d1, #1.00000000
363 ; FAULT-NEXT: fdiv d0, d1, d0
366 ; CHECK-LABEL: drsqrt:
368 ; CHECK-NEXT: frsqrte d1, d0
369 ; CHECK-NEXT: fmul d2, d1, d1
370 ; CHECK-NEXT: frsqrts d2, d0, d2
371 ; CHECK-NEXT: fmul d1, d1, d2
372 ; CHECK-NEXT: fmul d2, d1, d1
373 ; CHECK-NEXT: frsqrts d2, d0, d2
374 ; CHECK-NEXT: fmul d1, d1, d2
375 ; CHECK-NEXT: fmul d2, d1, d1
376 ; CHECK-NEXT: frsqrts d0, d0, d2
377 ; CHECK-NEXT: fmul d0, d1, d0
379 %1 = tail call fast double @llvm.sqrt.f64(double %a)
380 %2 = fdiv fast double 1.000000e+00, %1
384 define <2 x double> @d2rsqrt(<2 x double> %a) #0 {
385 ; FAULT-LABEL: d2rsqrt:
387 ; FAULT-NEXT: fsqrt v0.2d, v0.2d
388 ; FAULT-NEXT: fmov v1.2d, #1.00000000
389 ; FAULT-NEXT: fdiv v0.2d, v1.2d, v0.2d
392 ; CHECK-LABEL: d2rsqrt:
394 ; CHECK-NEXT: frsqrte v1.2d, v0.2d
395 ; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d
396 ; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d
397 ; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d
398 ; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d
399 ; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d
400 ; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d
401 ; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d
402 ; CHECK-NEXT: frsqrts v0.2d, v0.2d, v2.2d
403 ; CHECK-NEXT: fmul v0.2d, v1.2d, v0.2d
405 %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
406 %2 = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, %1
410 define <4 x double> @d4rsqrt(<4 x double> %a) #0 {
411 ; FAULT-LABEL: d4rsqrt:
413 ; FAULT-NEXT: fsqrt v1.2d, v1.2d
414 ; FAULT-NEXT: fsqrt v0.2d, v0.2d
415 ; FAULT-NEXT: fmov v2.2d, #1.00000000
416 ; FAULT-NEXT: fdiv v0.2d, v2.2d, v0.2d
417 ; FAULT-NEXT: fdiv v1.2d, v2.2d, v1.2d
420 ; CHECK-LABEL: d4rsqrt:
422 ; CHECK-NEXT: frsqrte v2.2d, v0.2d
423 ; CHECK-NEXT: fmul v4.2d, v2.2d, v2.2d
424 ; CHECK-NEXT: frsqrte v3.2d, v1.2d
425 ; CHECK-NEXT: frsqrts v4.2d, v0.2d, v4.2d
426 ; CHECK-NEXT: fmul v2.2d, v2.2d, v4.2d
427 ; CHECK-NEXT: fmul v4.2d, v3.2d, v3.2d
428 ; CHECK-NEXT: frsqrts v4.2d, v1.2d, v4.2d
429 ; CHECK-NEXT: fmul v3.2d, v3.2d, v4.2d
430 ; CHECK-NEXT: fmul v4.2d, v2.2d, v2.2d
431 ; CHECK-NEXT: frsqrts v4.2d, v0.2d, v4.2d
432 ; CHECK-NEXT: fmul v2.2d, v2.2d, v4.2d
433 ; CHECK-NEXT: fmul v4.2d, v3.2d, v3.2d
434 ; CHECK-NEXT: frsqrts v4.2d, v1.2d, v4.2d
435 ; CHECK-NEXT: fmul v3.2d, v3.2d, v4.2d
436 ; CHECK-NEXT: fmul v4.2d, v2.2d, v2.2d
437 ; CHECK-NEXT: frsqrts v0.2d, v0.2d, v4.2d
438 ; CHECK-NEXT: fmul v4.2d, v3.2d, v3.2d
439 ; CHECK-NEXT: frsqrts v1.2d, v1.2d, v4.2d
440 ; CHECK-NEXT: fmul v0.2d, v2.2d, v0.2d
441 ; CHECK-NEXT: fmul v1.2d, v3.2d, v1.2d
443 %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
444 %2 = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %1
448 define double @sqrt_fdiv_common_operand(double %x) nounwind {
449 ; FAULT-LABEL: sqrt_fdiv_common_operand:
451 ; FAULT-NEXT: fsqrt d0, d0
454 ; CHECK-LABEL: sqrt_fdiv_common_operand:
456 ; CHECK-NEXT: frsqrte d1, d0
457 ; CHECK-NEXT: fmul d2, d1, d1
458 ; CHECK-NEXT: frsqrts d2, d0, d2
459 ; CHECK-NEXT: fmul d1, d1, d2
460 ; CHECK-NEXT: fmul d2, d1, d1
461 ; CHECK-NEXT: frsqrts d2, d0, d2
462 ; CHECK-NEXT: fmul d1, d1, d2
463 ; CHECK-NEXT: fmul d2, d1, d1
464 ; CHECK-NEXT: frsqrts d2, d0, d2
465 ; CHECK-NEXT: fmul d1, d1, d2
466 ; CHECK-NEXT: fmul d0, d0, d1
468 %sqrt = call fast double @llvm.sqrt.f64(double %x)
469 %r = fdiv fast double %x, %sqrt
473 define <2 x double> @sqrt_fdiv_common_operand_vec(<2 x double> %x) nounwind {
474 ; FAULT-LABEL: sqrt_fdiv_common_operand_vec:
476 ; FAULT-NEXT: fsqrt v0.2d, v0.2d
479 ; CHECK-LABEL: sqrt_fdiv_common_operand_vec:
481 ; CHECK-NEXT: frsqrte v1.2d, v0.2d
482 ; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d
483 ; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d
484 ; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d
485 ; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d
486 ; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d
487 ; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d
488 ; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d
489 ; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d
490 ; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d
491 ; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
493 %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
494 %r = fdiv arcp nsz reassoc <2 x double> %x, %sqrt
498 define double @sqrt_fdiv_common_operand_extra_use(double %x, double* %p) nounwind {
499 ; FAULT-LABEL: sqrt_fdiv_common_operand_extra_use:
501 ; FAULT-NEXT: fsqrt d0, d0
502 ; FAULT-NEXT: str d0, [x0]
505 ; CHECK-LABEL: sqrt_fdiv_common_operand_extra_use:
507 ; CHECK-NEXT: frsqrte d1, d0
508 ; CHECK-NEXT: fmul d2, d1, d1
509 ; CHECK-NEXT: frsqrts d2, d0, d2
510 ; CHECK-NEXT: fmul d1, d1, d2
511 ; CHECK-NEXT: fmul d2, d1, d1
512 ; CHECK-NEXT: frsqrts d2, d0, d2
513 ; CHECK-NEXT: fmul d1, d1, d2
514 ; CHECK-NEXT: fmul d2, d1, d1
515 ; CHECK-NEXT: frsqrts d2, d0, d2
516 ; CHECK-NEXT: fmul d1, d1, d2
517 ; CHECK-NEXT: fcmp d0, #0.0
518 ; CHECK-NEXT: fmul d1, d0, d1
519 ; CHECK-NEXT: fcsel d0, d0, d1, eq
520 ; CHECK-NEXT: str d0, [x0]
521 ; CHECK-NEXT: fmov d0, d1
523 %sqrt = call fast double @llvm.sqrt.f64(double %x)
524 store double %sqrt, double* %p
525 %r = fdiv fast double %x, %sqrt
529 define double @sqrt_simplify_before_recip_3_uses(double %x, double* %p1, double* %p2) nounwind {
530 ; FAULT-LABEL: sqrt_simplify_before_recip_3_uses:
532 ; FAULT-NEXT: mov x8, #4631107791820423168
533 ; FAULT-NEXT: fsqrt d0, d0
534 ; FAULT-NEXT: fmov d1, #1.00000000
535 ; FAULT-NEXT: fmov d2, x8
536 ; FAULT-NEXT: fdiv d1, d1, d0
537 ; FAULT-NEXT: fdiv d2, d2, d0
538 ; FAULT-NEXT: str d1, [x0]
539 ; FAULT-NEXT: str d2, [x1]
542 ; CHECK-LABEL: sqrt_simplify_before_recip_3_uses:
544 ; CHECK-NEXT: frsqrte d1, d0
545 ; CHECK-NEXT: fmul d2, d1, d1
546 ; CHECK-NEXT: frsqrts d2, d0, d2
547 ; CHECK-NEXT: fmul d1, d1, d2
548 ; CHECK-NEXT: fmul d2, d1, d1
549 ; CHECK-NEXT: frsqrts d2, d0, d2
550 ; CHECK-NEXT: fmul d1, d1, d2
551 ; CHECK-NEXT: fmul d2, d1, d1
552 ; CHECK-NEXT: mov x8, #4631107791820423168
553 ; CHECK-NEXT: frsqrts d2, d0, d2
554 ; CHECK-NEXT: fmul d1, d1, d2
555 ; CHECK-NEXT: fmov d2, x8
556 ; CHECK-NEXT: fmul d2, d1, d2
557 ; CHECK-NEXT: fmul d0, d0, d1
558 ; CHECK-NEXT: str d1, [x0]
559 ; CHECK-NEXT: str d2, [x1]
561 %sqrt = tail call fast double @llvm.sqrt.f64(double %x)
562 %rsqrt = fdiv fast double 1.0, %sqrt
563 %r = fdiv fast double 42.0, %sqrt
564 %sqrt_fast = fdiv fast double %x, %sqrt
565 store double %rsqrt, double* %p1, align 8
566 store double %r, double* %p2, align 8
567 ret double %sqrt_fast
570 define double @sqrt_simplify_before_recip_3_uses_order(double %x, double* %p1, double* %p2) nounwind {
571 ; FAULT-LABEL: sqrt_simplify_before_recip_3_uses_order:
573 ; FAULT-NEXT: mov x9, #140737488355328
574 ; FAULT-NEXT: mov x8, #4631107791820423168
575 ; FAULT-NEXT: movk x9, #16453, lsl #48
576 ; FAULT-NEXT: fsqrt d0, d0
577 ; FAULT-NEXT: fmov d1, x8
578 ; FAULT-NEXT: fmov d2, x9
579 ; FAULT-NEXT: fdiv d1, d1, d0
580 ; FAULT-NEXT: fdiv d2, d2, d0
581 ; FAULT-NEXT: str d1, [x0]
582 ; FAULT-NEXT: str d2, [x1]
585 ; CHECK-LABEL: sqrt_simplify_before_recip_3_uses_order:
587 ; CHECK-NEXT: frsqrte d1, d0
588 ; CHECK-NEXT: fmul d3, d1, d1
589 ; CHECK-NEXT: frsqrts d3, d0, d3
590 ; CHECK-NEXT: fmul d1, d1, d3
591 ; CHECK-NEXT: fmul d3, d1, d1
592 ; CHECK-NEXT: frsqrts d3, d0, d3
593 ; CHECK-NEXT: mov x8, #4631107791820423168
594 ; CHECK-NEXT: fmul d1, d1, d3
595 ; CHECK-NEXT: fmov d2, x8
596 ; CHECK-NEXT: mov x8, #140737488355328
597 ; CHECK-NEXT: fmul d3, d1, d1
598 ; CHECK-NEXT: movk x8, #16453, lsl #48
599 ; CHECK-NEXT: frsqrts d3, d0, d3
600 ; CHECK-NEXT: fmul d1, d1, d3
601 ; CHECK-NEXT: fmov d3, x8
602 ; CHECK-NEXT: fmul d0, d0, d1
603 ; CHECK-NEXT: fmul d2, d1, d2
604 ; CHECK-NEXT: fmul d1, d1, d3
605 ; CHECK-NEXT: str d2, [x0]
606 ; CHECK-NEXT: str d1, [x1]
608 %sqrt = tail call fast double @llvm.sqrt.f64(double %x)
609 %sqrt_fast = fdiv fast double %x, %sqrt
610 %r1 = fdiv fast double 42.0, %sqrt
611 %r2 = fdiv fast double 43.0, %sqrt
612 store double %r1, double* %p1, align 8
613 store double %r2, double* %p2, align 8
614 ret double %sqrt_fast
618 define double @sqrt_simplify_before_recip_4_uses(double %x, double* %p1, double* %p2, double* %p3) nounwind {
619 ; FAULT-LABEL: sqrt_simplify_before_recip_4_uses:
621 ; FAULT-NEXT: mov x8, #4631107791820423168
622 ; FAULT-NEXT: fmov d2, x8
623 ; FAULT-NEXT: mov x8, #140737488355328
624 ; FAULT-NEXT: fsqrt d0, d0
625 ; FAULT-NEXT: fmov d1, #1.00000000
626 ; FAULT-NEXT: movk x8, #16453, lsl #48
627 ; FAULT-NEXT: fdiv d1, d1, d0
628 ; FAULT-NEXT: fmov d3, x8
629 ; FAULT-NEXT: fmul d2, d1, d2
630 ; FAULT-NEXT: fmul d3, d1, d3
631 ; FAULT-NEXT: str d1, [x0]
632 ; FAULT-NEXT: str d2, [x1]
633 ; FAULT-NEXT: str d3, [x2]
636 ; CHECK-LABEL: sqrt_simplify_before_recip_4_uses:
638 ; CHECK-NEXT: frsqrte d1, d0
639 ; CHECK-NEXT: fmul d3, d1, d1
640 ; CHECK-NEXT: frsqrts d3, d0, d3
641 ; CHECK-NEXT: fmul d1, d1, d3
642 ; CHECK-NEXT: fmul d3, d1, d1
643 ; CHECK-NEXT: frsqrts d3, d0, d3
644 ; CHECK-NEXT: fmul d1, d1, d3
645 ; CHECK-NEXT: mov x8, #4631107791820423168
646 ; CHECK-NEXT: fmul d3, d1, d1
647 ; CHECK-NEXT: fmov d2, x8
648 ; CHECK-NEXT: mov x8, #140737488355328
649 ; CHECK-NEXT: frsqrts d3, d0, d3
650 ; CHECK-NEXT: movk x8, #16453, lsl #48
651 ; CHECK-NEXT: fmul d1, d1, d3
652 ; CHECK-NEXT: fcmp d0, #0.0
653 ; CHECK-NEXT: fmov d4, x8
654 ; CHECK-NEXT: fmul d3, d0, d1
655 ; CHECK-NEXT: fmul d2, d1, d2
656 ; CHECK-NEXT: fmul d4, d1, d4
657 ; CHECK-NEXT: str d1, [x0]
658 ; CHECK-NEXT: fcsel d1, d0, d3, eq
659 ; CHECK-NEXT: fdiv d0, d0, d1
660 ; CHECK-NEXT: str d2, [x1]
661 ; CHECK-NEXT: str d4, [x2]
663 %sqrt = tail call fast double @llvm.sqrt.f64(double %x)
664 %rsqrt = fdiv fast double 1.0, %sqrt
665 %r1 = fdiv fast double 42.0, %sqrt
666 %r2 = fdiv fast double 43.0, %sqrt
667 %sqrt_fast = fdiv fast double %x, %sqrt
668 store double %rsqrt, double* %p1, align 8
669 store double %r1, double* %p2, align 8
670 store double %r2, double* %p3, align 8
671 ret double %sqrt_fast
674 attributes #0 = { "unsafe-fp-math"="true" }
675 attributes #1 = { "unsafe-fp-math"="true" "denormal-fp-math"="ieee" }