1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
5 declare double @__sqrt_finite(double)
6 declare float @__sqrtf_finite(float)
7 declare x86_fp80 @__sqrtl_finite(x86_fp80)
8 declare float @llvm.sqrt.f32(float)
9 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
10 declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
13 define double @finite_f64_no_estimate(double %d) #0 {
14 ; SSE-LABEL: finite_f64_no_estimate:
16 ; SSE-NEXT: sqrtsd %xmm0, %xmm0
19 ; AVX-LABEL: finite_f64_no_estimate:
21 ; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
23 %call = tail call double @__sqrt_finite(double %d) #2
27 ; No estimates for doubles.
29 define double @finite_f64_estimate(double %d) #1 {
30 ; SSE-LABEL: finite_f64_estimate:
32 ; SSE-NEXT: sqrtsd %xmm0, %xmm0
35 ; AVX-LABEL: finite_f64_estimate:
37 ; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
39 %call = tail call double @__sqrt_finite(double %d) #2
43 define float @finite_f32_no_estimate(float %f) #0 {
44 ; SSE-LABEL: finite_f32_no_estimate:
46 ; SSE-NEXT: sqrtss %xmm0, %xmm0
49 ; AVX-LABEL: finite_f32_no_estimate:
51 ; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
53 %call = tail call float @__sqrtf_finite(float %f) #2
57 define float @finite_f32_estimate(float %f) #1 {
58 ; SSE-LABEL: finite_f32_estimate:
60 ; SSE-NEXT: rsqrtss %xmm0, %xmm1
61 ; SSE-NEXT: movaps %xmm0, %xmm2
62 ; SSE-NEXT: mulss %xmm1, %xmm2
63 ; SSE-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
64 ; SSE-NEXT: mulss %xmm2, %xmm3
65 ; SSE-NEXT: mulss %xmm1, %xmm2
66 ; SSE-NEXT: addss {{.*}}(%rip), %xmm2
67 ; SSE-NEXT: mulss %xmm3, %xmm2
68 ; SSE-NEXT: xorps %xmm1, %xmm1
69 ; SSE-NEXT: cmpeqss %xmm1, %xmm0
70 ; SSE-NEXT: andnps %xmm2, %xmm0
73 ; AVX-LABEL: finite_f32_estimate:
75 ; AVX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
76 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm2
77 ; AVX-NEXT: vmulss %xmm1, %xmm2, %xmm1
78 ; AVX-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1
79 ; AVX-NEXT: vmulss {{.*}}(%rip), %xmm2, %xmm2
80 ; AVX-NEXT: vmulss %xmm1, %xmm2, %xmm1
81 ; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
82 ; AVX-NEXT: vcmpeqss %xmm2, %xmm0, %xmm0
83 ; AVX-NEXT: vandnps %xmm1, %xmm0, %xmm0
85 %call = tail call float @__sqrtf_finite(float %f) #2
89 define x86_fp80 @finite_f80_no_estimate(x86_fp80 %ld) #0 {
90 ; CHECK-LABEL: finite_f80_no_estimate:
92 ; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
95 %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #2
99 ; Don't die on the impossible.
101 define x86_fp80 @finite_f80_estimate_but_no(x86_fp80 %ld) #1 {
102 ; CHECK-LABEL: finite_f80_estimate_but_no:
104 ; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
107 %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #2
111 define float @f32_no_estimate(float %x) #0 {
112 ; SSE-LABEL: f32_no_estimate:
114 ; SSE-NEXT: sqrtss %xmm0, %xmm1
115 ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
116 ; SSE-NEXT: divss %xmm1, %xmm0
119 ; AVX-LABEL: f32_no_estimate:
121 ; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
122 ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
123 ; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
125 %sqrt = tail call float @llvm.sqrt.f32(float %x)
126 %div = fdiv fast float 1.0, %sqrt
130 define float @f32_estimate(float %x) #1 {
131 ; SSE-LABEL: f32_estimate:
133 ; SSE-NEXT: rsqrtss %xmm0, %xmm1
134 ; SSE-NEXT: movaps %xmm1, %xmm2
135 ; SSE-NEXT: mulss %xmm2, %xmm2
136 ; SSE-NEXT: mulss %xmm0, %xmm2
137 ; SSE-NEXT: addss {{.*}}(%rip), %xmm2
138 ; SSE-NEXT: mulss {{.*}}(%rip), %xmm1
139 ; SSE-NEXT: mulss %xmm2, %xmm1
140 ; SSE-NEXT: movaps %xmm1, %xmm0
143 ; AVX-LABEL: f32_estimate:
145 ; AVX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
146 ; AVX-NEXT: vmulss %xmm1, %xmm1, %xmm2
147 ; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0
148 ; AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
149 ; AVX-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1
150 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
152 %sqrt = tail call float @llvm.sqrt.f32(float %x)
153 %div = fdiv fast float 1.0, %sqrt
157 define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
158 ; SSE-LABEL: v4f32_no_estimate:
160 ; SSE-NEXT: sqrtps %xmm0, %xmm1
161 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
162 ; SSE-NEXT: divps %xmm1, %xmm0
165 ; AVX-LABEL: v4f32_no_estimate:
167 ; AVX-NEXT: vsqrtps %xmm0, %xmm0
168 ; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
169 ; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm0
171 %sqrt = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
172 %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
176 define <4 x float> @v4f32_estimate(<4 x float> %x) #1 {
177 ; SSE-LABEL: v4f32_estimate:
179 ; SSE-NEXT: rsqrtps %xmm0, %xmm1
180 ; SSE-NEXT: movaps %xmm1, %xmm2
181 ; SSE-NEXT: mulps %xmm2, %xmm2
182 ; SSE-NEXT: mulps %xmm0, %xmm2
183 ; SSE-NEXT: addps {{.*}}(%rip), %xmm2
184 ; SSE-NEXT: mulps {{.*}}(%rip), %xmm1
185 ; SSE-NEXT: mulps %xmm2, %xmm1
186 ; SSE-NEXT: movaps %xmm1, %xmm0
189 ; AVX-LABEL: v4f32_estimate:
191 ; AVX-NEXT: vrsqrtps %xmm0, %xmm1
192 ; AVX-NEXT: vmulps %xmm1, %xmm1, %xmm2
193 ; AVX-NEXT: vmulps %xmm2, %xmm0, %xmm0
194 ; AVX-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
195 ; AVX-NEXT: vmulps {{.*}}(%rip), %xmm1, %xmm1
196 ; AVX-NEXT: vmulps %xmm0, %xmm1, %xmm0
198 %sqrt = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
199 %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
203 define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
204 ; SSE-LABEL: v8f32_no_estimate:
206 ; SSE-NEXT: sqrtps %xmm1, %xmm2
207 ; SSE-NEXT: sqrtps %xmm0, %xmm3
208 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
209 ; SSE-NEXT: movaps %xmm1, %xmm0
210 ; SSE-NEXT: divps %xmm3, %xmm0
211 ; SSE-NEXT: divps %xmm2, %xmm1
214 ; AVX-LABEL: v8f32_no_estimate:
216 ; AVX-NEXT: vsqrtps %ymm0, %ymm0
217 ; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
218 ; AVX-NEXT: vdivps %ymm0, %ymm1, %ymm0
220 %sqrt = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %x)
221 %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
225 define <8 x float> @v8f32_estimate(<8 x float> %x) #1 {
226 ; SSE-LABEL: v8f32_estimate:
228 ; SSE-NEXT: rsqrtps %xmm0, %xmm3
229 ; SSE-NEXT: movaps {{.*#+}} xmm4 = [-5.000000e-01,-5.000000e-01,-5.000000e-01,-5.000000e-01]
230 ; SSE-NEXT: movaps %xmm3, %xmm2
231 ; SSE-NEXT: mulps %xmm2, %xmm2
232 ; SSE-NEXT: mulps %xmm0, %xmm2
233 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [-3.000000e+00,-3.000000e+00,-3.000000e+00,-3.000000e+00]
234 ; SSE-NEXT: addps %xmm0, %xmm2
235 ; SSE-NEXT: mulps %xmm4, %xmm2
236 ; SSE-NEXT: mulps %xmm3, %xmm2
237 ; SSE-NEXT: rsqrtps %xmm1, %xmm5
238 ; SSE-NEXT: movaps %xmm5, %xmm3
239 ; SSE-NEXT: mulps %xmm3, %xmm3
240 ; SSE-NEXT: mulps %xmm1, %xmm3
241 ; SSE-NEXT: addps %xmm0, %xmm3
242 ; SSE-NEXT: mulps %xmm4, %xmm3
243 ; SSE-NEXT: mulps %xmm5, %xmm3
244 ; SSE-NEXT: movaps %xmm2, %xmm0
245 ; SSE-NEXT: movaps %xmm3, %xmm1
248 ; AVX-LABEL: v8f32_estimate:
250 ; AVX-NEXT: vrsqrtps %ymm0, %ymm1
251 ; AVX-NEXT: vmulps %ymm1, %ymm1, %ymm2
252 ; AVX-NEXT: vmulps %ymm2, %ymm0, %ymm0
253 ; AVX-NEXT: vaddps {{.*}}(%rip), %ymm0, %ymm0
254 ; AVX-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
255 ; AVX-NEXT: vmulps %ymm0, %ymm1, %ymm0
257 %sqrt = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %x)
258 %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
263 attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="!sqrtf,!vec-sqrtf,!divf,!vec-divf" }
264 attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" }
265 attributes #2 = { nounwind readnone }