1 //===-- Optimized PolyEval implementations for x86_64 --------- C++ -----*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_X86_64_POLYEVAL_H
10 #define LLVM_LIBC_SRC_SUPPORT_FPUTIL_X86_64_POLYEVAL_H
12 #include "src/__support/common.h"
13 #include "src/__support/macros/properties/architectures.h"
15 #if !defined(LIBC_TARGET_ARCH_IS_X86_64)
16 #error "Invalid include"
19 #include <immintrin.h>
21 namespace __llvm_libc
{
25 // polyeval(x, a0, a1, a2, a3) = a3*x^3 + a2*x^2 + a1*x + a0
27 LIBC_INLINE
float polyeval(float x
, float a0
, float a1
, float a2
, float a3
) {
28 __m128 xmm
= _mm_set1_ps(x
); // NOLINT
29 __m128 a13
= _mm_set_ps(0.0f
, x
, a3
, a1
); // NOLINT
30 __m128 a02
= _mm_set_ps(0.0f
, 0.0f
, a2
, a0
); // NOLINT
31 // r = (0, x^2, a3*x + a2, a1*x + a0)
32 __m128 r
= _mm_fmadd_ps(a13
, xmm
, a02
); // NOLINT
33 // result = (a3*x + a2) * x^2 + (a1*x + a0)
34 return fma(r
[2], r
[1], r
[0]);
38 LIBC_INLINE
double polyeval(double x
, double a0
, double a1
, double a2
,
40 __m256d xmm
= _mm256_set1_pd(x
); // NOLINT
41 __m256d a13
= _mm256_set_pd(0.0, x
, a3
, a1
); // NOLINT
42 __m256d a02
= _mm256_set_pd(0.0, 0.0, a2
, a0
); // NOLINT
43 // r = (0, x^2, a3*x + a2, a1*x + a0)
44 __m256d r
= _mm256_fmadd_pd(a13
, xmm
, a02
); // NOLINT
45 // result = (a3*x + a2) * x^2 + (a1*x + a0)
46 return fma(r
[2], r
[1], r
[0]);
49 // Quintic polynomials:
50 // polyeval(x, a0, a1, a2, a3, a4, a5) = a5*x^5 + a4*x^4 + a3*x^3 + a2*x^2 +
53 LIBC_INLINE
float polyeval(float x
, float a0
, float a1
, float a2
, float a3
,
55 __m128 xmm
= _mm_set1_ps(x
); // NOLINT
56 __m128 a25
= _mm_set_ps(0.0f
, x
, a5
, a2
); // NOLINT
57 __m128 a14
= _mm_set_ps(0.0f
, 0.0f
, a4
, a1
); // NOLINT
58 __m128 a03
= _mm_set_ps(0.0f
, 0.0f
, a3
, a0
); // NOLINT
59 // r1 = (0, x^2, a5*x + a4, a2*x + a1)
60 __m128 r1
= _mm_fmadd_ps(a25
, xmm
, a14
); // NOLINT
61 // r2 = (0, x^3, (a5*x + a4)*x + a3, (a2*x + a1)*x + a0
62 __m128 r2
= _mm_fmadd_ps(r1
, xmm
, a03
); // NOLINT
63 // result = ((a5*x + a4)*x + a3) * x^3 + ((a2*x + a1)*x + a0)
64 return fma(r2
[2], r2
[1], r2
[0]);
68 LIBC_INLINE
double polyeval(double x
, double a0
, double a1
, double a2
,
69 double a3
, double a4
, double a5
) {
70 __m256d xmm
= _mm256_set1_pd(x
); // NOLINT
71 __m256d a25
= _mm256_set_pd(0.0, x
, a5
, a2
); // NOLINT
72 __m256d a14
= _mm256_set_pd(0.0, 0.0, a4
, a1
); // NOLINT
73 __m256d a03
= _mm256_set_pd(0.0, 0.0, a3
, a0
); // NOLINT
74 // r1 = (0, x^2, a5*x + a4, a2*x + a1)
75 __m256d r1
= _mm256_fmadd_pd(a25
, xmm
, a14
); // NOLINT
76 // r2 = (0, x^3, (a5*x + a4)*x + a3, (a2*x + a1)*x + a0
77 __m256d r2
= _mm256_fmadd_pd(r1
, xmm
, a03
); // NOLINT
78 // result = ((a5*x + a4)*x + a3) * x^3 + ((a2*x + a1)*x + a0)
79 return fma(r2
[2], r2
[1], r2
[0]);
83 } // namespace __llvm_libc
85 #endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_X86_64_POLYEVAL_H