Improve accuracy of SIMD exp for small args
[gromacs.git] / src / gromacs / simd / impl_ibm_qpx / impl_ibm_qpx_simd_float.h
blob3aab94b75c488935da18d6590429b91f6789d2fd
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD_FLOAT_H
37 #define GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD_FLOAT_H
39 #include "config.h"
41 // Assert is buggy on xlc with high optimization, so we skip it for QPX
42 #include <cstddef>
43 #include <cstdint>
45 #ifdef __clang__
46 #include <qpxmath.h>
47 #endif
49 #include "gromacs/math/utilities.h"
50 #include "gromacs/utility/basedefinitions.h"
52 namespace gmx
55 class SimdFloat
57 public:
58 SimdFloat() {}
60 SimdFloat(float f) : simdInternal_(vec_splats(f)) {}
62 // Internal utility constructor to simplify return statements
63 SimdFloat(vector4double simd) : simdInternal_(simd) {}
65 vector4double simdInternal_;
68 class SimdFInt32
70 public:
71 SimdFInt32() {}
73 SimdFInt32(std::int32_t i)
75 GMX_ALIGNED(int, GMX_SIMD_FINT32_WIDTH) idata[GMX_SIMD_FINT32_WIDTH];
76 idata[0] = i;
77 simdInternal_ = vec_splat(vec_ldia(0, idata), 0);
80 // Internal utility constructor to simplify return statements
81 SimdFInt32(vector4double simd) : simdInternal_(simd) {}
83 vector4double simdInternal_;
86 class SimdFBool
88 public:
89 SimdFBool() {}
91 SimdFBool(bool b) : simdInternal_(vec_splats(b ? 1.0 : -1.0)) {}
93 // Internal utility constructor to simplify return statements
94 SimdFBool(vector4double simd) : simdInternal_(simd) {}
96 vector4double simdInternal_;
99 static inline SimdFloat gmx_simdcall
100 simdLoad(const float *m)
102 #ifdef NDEBUG
103 return {
104 vec_ld(0, const_cast<float *>(m))
106 #else
107 return {
108 vec_lda(0, const_cast<float *>(m))
110 #endif
113 static inline void gmx_simdcall
114 store(float *m, SimdFloat a)
116 #ifdef NDEBUG
117 vec_st(a.simdInternal_, 0, m);
118 #else
119 vec_sta(a.simdInternal_, 0, m);
120 #endif
123 static inline SimdFloat gmx_simdcall
124 setZeroF()
126 return {
127 vec_splats(0.0)
131 static inline SimdFInt32 gmx_simdcall
132 simdLoadFI(const std::int32_t * m)
134 #ifdef NDEBUG
135 return {
136 vec_ldia(0, const_cast<int *>(m))
138 #else
139 return {
140 vec_ldiaa(0, const_cast<int *>(m))
142 #endif
145 static inline void gmx_simdcall
146 store(std::int32_t * m, SimdFInt32 a)
148 vec_st(a.simdInternal_, 0, m);
151 static inline SimdFInt32 gmx_simdcall
152 setZeroFI()
154 return {
155 vec_splats(0.0)
159 static inline SimdFloat gmx_simdcall
160 operator+(SimdFloat a, SimdFloat b)
162 return {
163 vec_add(a.simdInternal_, b.simdInternal_)
167 static inline SimdFloat gmx_simdcall
168 operator-(SimdFloat a, SimdFloat b)
170 return {
171 vec_sub(a.simdInternal_, b.simdInternal_)
175 static inline SimdFloat gmx_simdcall
176 operator-(SimdFloat x)
178 return {
179 vec_neg(x.simdInternal_)
183 static inline SimdFloat gmx_simdcall
184 operator*(SimdFloat a, SimdFloat b)
186 return {
187 vec_mul(a.simdInternal_, b.simdInternal_)
191 static inline SimdFloat gmx_simdcall
192 fma(SimdFloat a, SimdFloat b, SimdFloat c)
194 return {
195 vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
199 static inline SimdFloat gmx_simdcall
200 fms(SimdFloat a, SimdFloat b, SimdFloat c)
202 return {
203 vec_msub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
207 static inline SimdFloat gmx_simdcall
208 fnma(SimdFloat a, SimdFloat b, SimdFloat c)
210 return {
211 vec_nmsub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
215 static inline SimdFloat gmx_simdcall
216 fnms(SimdFloat a, SimdFloat b, SimdFloat c)
218 return {
219 vec_nmadd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
223 static inline SimdFloat gmx_simdcall
224 rsqrt(SimdFloat x)
226 return {
227 vec_rsqrte(x.simdInternal_)
231 static inline SimdFloat gmx_simdcall
232 rcp(SimdFloat x)
234 return {
235 vec_re(x.simdInternal_)
239 static inline SimdFloat gmx_simdcall
240 maskAdd(SimdFloat a, SimdFloat b, SimdFBool m)
242 return {
243 vec_add(a.simdInternal_, vec_sel(vec_splats(0.0), b.simdInternal_, m.simdInternal_))
247 static inline SimdFloat gmx_simdcall
248 maskzMul(SimdFloat a, SimdFloat b, SimdFBool m)
250 return {
251 vec_sel(vec_splats(0.0), vec_mul(a.simdInternal_, b.simdInternal_), m.simdInternal_)
255 static inline SimdFloat
256 maskzFma(SimdFloat a, SimdFloat b, SimdFloat c, SimdFBool m)
258 return {
259 vec_sel(vec_splats(0.0), vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_), m.simdInternal_)
263 static inline SimdFloat
264 maskzRsqrt(SimdFloat x, SimdFBool m)
266 #ifndef NDEBUG
267 x.simdInternal_ = vec_sel(vec_splats(1.0), x.simdInternal_, m.simdInternal_);
268 #endif
269 return {
270 vec_sel(vec_splats(0.0), vec_rsqrte(x.simdInternal_), m.simdInternal_)
274 static inline SimdFloat
275 maskzRcp(SimdFloat x, SimdFBool m)
277 #ifndef NDEBUG
278 x.simdInternal_ = vec_sel(vec_splats(1.0), x.simdInternal_, m.simdInternal_);
279 #endif
280 return {
281 vec_sel(vec_splats(0.0), vec_re(x.simdInternal_), m.simdInternal_)
285 static inline SimdFloat gmx_simdcall
286 abs(SimdFloat x)
288 return {
289 vec_abs( x.simdInternal_ )
293 static inline SimdFloat gmx_simdcall
294 max(SimdFloat a, SimdFloat b)
296 return {
297 vec_sel(b.simdInternal_, a.simdInternal_, vec_sub(a.simdInternal_, b.simdInternal_))
301 static inline SimdFloat gmx_simdcall
302 min(SimdFloat a, SimdFloat b)
304 return {
305 vec_sel(b.simdInternal_, a.simdInternal_, vec_sub(b.simdInternal_, a.simdInternal_))
309 static inline SimdFloat gmx_simdcall
310 round(SimdFloat x)
312 // Note: It is critical to use vec_cfid(vec_ctid(a)) for the implementation
313 // here, since vec_round() does not adhere to the FP control
314 // word rounding scheme. We rely on float-to-float and float-to-integer
315 // rounding being the same for half-way values in a few algorithms.
316 return {
317 vec_cfid(vec_ctid(x.simdInternal_))
321 static inline SimdFloat gmx_simdcall
322 trunc(SimdFloat x)
324 return {
325 vec_trunc(x.simdInternal_)
329 static inline SimdFloat
330 frexp(SimdFloat value, SimdFInt32 * exponent)
332 GMX_ALIGNED(float, GMX_SIMD_FLOAT_WIDTH) rdata[GMX_SIMD_FLOAT_WIDTH];
333 GMX_ALIGNED(int, GMX_SIMD_FLOAT_WIDTH) idata[GMX_SIMD_FLOAT_WIDTH];
335 vec_st(value.simdInternal_, 0, rdata);
337 for (std::size_t i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
339 rdata[i] = std::frexp(rdata[i], idata + i);
342 exponent->simdInternal_ = vec_ldia(0, idata);
343 value.simdInternal_ = vec_ld(0, rdata);
345 return value;
348 template <MathOptimization opt = MathOptimization::Safe>
349 static inline SimdFloat
350 ldexp(SimdFloat value, SimdFInt32 exponent)
352 GMX_ALIGNED(float, GMX_SIMD_FLOAT_WIDTH) rdata[GMX_SIMD_FLOAT_WIDTH];
353 GMX_ALIGNED(int, GMX_SIMD_FLOAT_WIDTH) idata[GMX_SIMD_FLOAT_WIDTH];
355 vec_st(value.simdInternal_, 0, rdata);
356 vec_st(exponent.simdInternal_, 0, idata);
358 for (std::size_t i = 0; i < GMX_SIMD_FLOAT_WIDTH; i++)
360 rdata[i] = std::ldexp(rdata[i], idata[i]);
363 value.simdInternal_ = vec_ld(0, rdata);
365 return value;
368 static inline float gmx_simdcall
369 reduce(SimdFloat x)
371 vector4double y = vec_sldw(x.simdInternal_, x.simdInternal_, 2);
372 vector4double z;
374 y = vec_add(y, x.simdInternal_);
375 z = vec_sldw(y, y, 1);
376 y = vec_add(y, z);
377 return vec_extract(y, 0);
380 static inline SimdFBool gmx_simdcall
381 operator==(SimdFloat a, SimdFloat b)
383 return {
384 vec_cmpeq(a.simdInternal_, b.simdInternal_)
388 static inline SimdFBool gmx_simdcall
389 operator!=(SimdFloat a, SimdFloat b)
391 return {
392 vec_not(vec_cmpeq(a.simdInternal_, b.simdInternal_))
396 static inline SimdFBool gmx_simdcall
397 operator<(SimdFloat a, SimdFloat b)
399 return {
400 vec_cmplt(a.simdInternal_, b.simdInternal_)
404 static inline SimdFBool gmx_simdcall
405 operator<=(SimdFloat a, SimdFloat b)
407 return {
408 vec_or(vec_cmplt(a.simdInternal_, b.simdInternal_), vec_cmpeq(a.simdInternal_, b.simdInternal_))
412 static inline SimdFBool gmx_simdcall
413 operator&&(SimdFBool a, SimdFBool b)
415 return {
416 vec_and(a.simdInternal_, b.simdInternal_)
420 static inline SimdFBool gmx_simdcall
421 operator||(SimdFBool a, SimdFBool b)
423 return {
424 vec_or(a.simdInternal_, b.simdInternal_)
428 static inline bool gmx_simdcall
429 anyTrue(SimdFBool a)
431 vector4double b = vec_sldw(a.simdInternal_, a.simdInternal_, 2);
433 a.simdInternal_ = vec_or(a.simdInternal_, b);
434 b = vec_sldw(a.simdInternal_, a.simdInternal_, 1);
435 b = vec_or(a.simdInternal_, b);
436 return (vec_extract(b, 0) > 0);
439 static inline SimdFloat gmx_simdcall
440 selectByMask(SimdFloat a, SimdFBool m)
442 return {
443 vec_sel(vec_splats(0.0), a.simdInternal_, m.simdInternal_)
447 static inline SimdFloat gmx_simdcall
448 selectByNotMask(SimdFloat a, SimdFBool m)
450 return {
451 vec_sel(a.simdInternal_, vec_splats(0.0), m.simdInternal_)
455 static inline SimdFloat gmx_simdcall
456 blend(SimdFloat a, SimdFloat b, SimdFBool sel)
458 return {
459 vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_)
463 static inline SimdFInt32 gmx_simdcall
464 cvtR2I(SimdFloat a)
466 return {
467 vec_ctiw(a.simdInternal_)
471 static inline SimdFInt32 gmx_simdcall
472 cvttR2I(SimdFloat a)
474 return {
475 vec_ctiwz(a.simdInternal_)
479 static inline SimdFloat gmx_simdcall
480 cvtI2R(SimdFInt32 a)
482 return {
483 vec_cfid(a.simdInternal_)
487 static inline SimdFloat gmx_simdcall
488 copysign(SimdFloat x, SimdFloat y)
490 return {
491 vec_cpsgn(y.simdInternal_, x.simdInternal_)
495 } // namespace gmx
497 #endif // GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD_FLOAT_H