2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_FLOAT_H
37 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_FLOAT_H
41 #include "gromacs/math/utilities.h"
42 #include "gromacs/utility/basedefinitions.h"
44 #include "impl_ibm_vsx_definitions.h"
54 // gcc-4.9 does not recognize that we use the parameter
55 SimdFloat(float gmx_unused f
) : simdInternal_(vec_splats(f
)) {}
57 // Internal utility constructor to simplify return statements
58 SimdFloat(__vector
float simd
) : simdInternal_(simd
) {}
60 __vector
float simdInternal_
;
68 // gcc-4.9 does not recognize that we use the parameter
69 SimdFInt32(std::int32_t gmx_unused i
) : simdInternal_(vec_splats(i
)) {}
71 // Internal utility constructor to simplify return statements
72 SimdFInt32(__vector
signed int simd
) : simdInternal_(simd
) {}
74 __vector
signed int simdInternal_
;
82 SimdFBool(bool b
) : simdInternal_(reinterpret_cast<__vector vsxBool
int>(vec_splats( b
? 0xFFFFFFFF : 0))) {}
84 // Internal utility constructor to simplify return statements
85 SimdFBool(__vector vsxBool
int simd
) : simdInternal_(simd
) {}
87 __vector vsxBool
int simdInternal_
;
95 SimdFIBool(bool b
) : simdInternal_(reinterpret_cast<__vector vsxBool
int>(vec_splats( b
? 0xFFFFFFFF : 0))) {}
97 // Internal utility constructor to simplify return statements
98 SimdFIBool(__vector vsxBool
int simd
) : simdInternal_(simd
) {}
100 __vector vsxBool
int simdInternal_
;
103 // Note that the interfaces we use here have been a mess in xlc;
104 // currently version 13.1.5 is required.
106 static inline SimdFloat gmx_simdcall
107 simdLoad(const float *m
, SimdFloatTag
= {})
110 *reinterpret_cast<const __vector
float *>(m
)
114 static inline void gmx_simdcall
115 store(float *m
, SimdFloat a
)
117 *reinterpret_cast<__vector
float *>(m
) = a
.simdInternal_
;
120 static inline SimdFloat gmx_simdcall
121 simdLoadU(const float *m
, SimdFloatTag
= {})
125 *reinterpret_cast<const __vector
float *>(m
)
132 static inline void gmx_simdcall
133 storeU(float *m
, SimdFloat a
)
136 *reinterpret_cast<__vector
float *>(m
) = a
.simdInternal_
;
138 vec_xst(a
.simdInternal_
, 0, m
);
142 static inline SimdFloat gmx_simdcall
150 static inline SimdFInt32 gmx_simdcall
151 simdLoad(const std::int32_t * m
, SimdFInt32Tag
)
154 *reinterpret_cast<const __vector
int *>(m
)
158 static inline void gmx_simdcall
159 store(std::int32_t * m
, SimdFInt32 a
)
161 *reinterpret_cast<__vector
int *>(m
) = a
.simdInternal_
;
164 static inline SimdFInt32 gmx_simdcall
165 simdLoadU(const std::int32_t *m
, SimdFInt32Tag
)
169 *reinterpret_cast<const __vector
int *>(m
)
176 static inline void gmx_simdcall
177 storeU(std::int32_t * m
, SimdFInt32 a
)
180 *reinterpret_cast<__vector
int *>(m
) = a
.simdInternal_
;
182 vec_xst(a
.simdInternal_
, 0, m
);
186 static inline SimdFInt32 gmx_simdcall
190 vec_splats(static_cast<int>(0))
194 // gcc-4.9 does not detect that vec_extract() uses its argument
196 static inline std::int32_t gmx_simdcall
197 extract(SimdFInt32 gmx_unused a
)
199 return vec_extract(a
.simdInternal_
, index
);
202 static inline SimdFloat gmx_simdcall
203 operator&(SimdFloat a
, SimdFloat b
)
206 vec_and(a
.simdInternal_
, b
.simdInternal_
)
210 static inline SimdFloat gmx_simdcall
211 andNot(SimdFloat a
, SimdFloat b
)
214 vec_andc(b
.simdInternal_
, a
.simdInternal_
)
218 static inline SimdFloat gmx_simdcall
219 operator|(SimdFloat a
, SimdFloat b
)
222 vec_or(a
.simdInternal_
, b
.simdInternal_
)
226 static inline SimdFloat gmx_simdcall
227 operator^(SimdFloat a
, SimdFloat b
)
230 vec_xor(a
.simdInternal_
, b
.simdInternal_
)
234 static inline SimdFloat gmx_simdcall
235 operator+(SimdFloat a
, SimdFloat b
)
238 vec_add(a
.simdInternal_
, b
.simdInternal_
)
242 static inline SimdFloat gmx_simdcall
243 operator-(SimdFloat a
, SimdFloat b
)
246 vec_sub(a
.simdInternal_
, b
.simdInternal_
)
250 static inline SimdFloat gmx_simdcall
251 operator-(SimdFloat x
)
258 static inline SimdFloat gmx_simdcall
259 operator*(SimdFloat a
, SimdFloat b
)
262 vec_mul(a
.simdInternal_
, b
.simdInternal_
)
266 static inline SimdFloat gmx_simdcall
267 fma(SimdFloat a
, SimdFloat b
, SimdFloat c
)
270 vec_madd(a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
)
274 static inline SimdFloat gmx_simdcall
275 fms(SimdFloat a
, SimdFloat b
, SimdFloat c
)
278 vec_msub(a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
)
282 static inline SimdFloat gmx_simdcall
283 fnma(SimdFloat a
, SimdFloat b
, SimdFloat c
)
286 vec_nmsub(a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
)
290 static inline SimdFloat gmx_simdcall
291 fnms(SimdFloat a
, SimdFloat b
, SimdFloat c
)
294 vec_nmadd(a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
)
298 static inline SimdFloat gmx_simdcall
302 vec_rsqrte(x
.simdInternal_
)
306 static inline SimdFloat gmx_simdcall
310 vec_re(x
.simdInternal_
)
314 static inline SimdFloat gmx_simdcall
315 maskAdd(SimdFloat a
, SimdFloat b
, SimdFBool m
)
318 vec_add(a
.simdInternal_
, vec_and(b
.simdInternal_
, reinterpret_cast<__vector
float>(m
.simdInternal_
)))
322 static inline SimdFloat gmx_simdcall
323 maskzMul(SimdFloat a
, SimdFloat b
, SimdFBool m
)
325 SimdFloat prod
= a
* b
;
328 vec_and(prod
.simdInternal_
, reinterpret_cast<__vector
float>(m
.simdInternal_
))
332 static inline SimdFloat gmx_simdcall
333 maskzFma(SimdFloat a
, SimdFloat b
, SimdFloat c
, SimdFBool m
)
335 SimdFloat prod
= fma(a
, b
, c
);
338 vec_and(prod
.simdInternal_
, reinterpret_cast<__vector
float>(m
.simdInternal_
))
342 static inline SimdFloat gmx_simdcall
343 maskzRsqrt(SimdFloat x
, SimdFBool m
)
346 x
.simdInternal_
= vec_sel(vec_splats(1.0f
), x
.simdInternal_
, m
.simdInternal_
);
349 vec_and(vec_rsqrte(x
.simdInternal_
), reinterpret_cast<__vector
float>(m
.simdInternal_
))
353 static inline SimdFloat gmx_simdcall
354 maskzRcp(SimdFloat x
, SimdFBool m
)
357 x
.simdInternal_
= vec_sel(vec_splats(1.0f
), x
.simdInternal_
, m
.simdInternal_
);
360 vec_and(vec_re(x
.simdInternal_
), reinterpret_cast<__vector
float>(m
.simdInternal_
))
364 static inline SimdFloat gmx_simdcall
368 vec_abs( x
.simdInternal_
)
372 static inline SimdFloat gmx_simdcall
373 max(SimdFloat a
, SimdFloat b
)
376 vec_max(a
.simdInternal_
, b
.simdInternal_
)
380 static inline SimdFloat gmx_simdcall
381 min(SimdFloat a
, SimdFloat b
)
384 vec_min(a
.simdInternal_
, b
.simdInternal_
)
388 static inline SimdFloat gmx_simdcall
392 vec_round( x
.simdInternal_
)
396 static inline SimdFloat gmx_simdcall
400 vec_trunc( x
.simdInternal_
)
404 static inline SimdFloat gmx_simdcall
405 frexp(SimdFloat value
, SimdFInt32
* exponent
)
407 const __vector
float exponentMask
= reinterpret_cast<__vector
float>(vec_splats(0x7F800000U
));
408 const __vector
signed int exponentBias
= vec_splats(126);
409 const __vector
float half
= vec_splats(0.5f
);
410 __vector
signed int iExponent
;
412 iExponent
= reinterpret_cast<__vector
signed int>(vec_and(value
.simdInternal_
, exponentMask
));
413 iExponent
= vec_sub(vec_sr(iExponent
, vec_splats(23U)), exponentBias
);
414 exponent
->simdInternal_
= iExponent
;
417 vec_or( vec_andc(value
.simdInternal_
, exponentMask
), half
)
421 template <MathOptimization opt
= MathOptimization::Safe
>
422 static inline SimdFloat gmx_simdcall
423 ldexp(SimdFloat value
, SimdFInt32 exponent
)
425 const __vector
signed int exponentBias
= vec_splats(127);
426 __vector
signed int iExponent
;
428 iExponent
= vec_add(exponent
.simdInternal_
, exponentBias
);
430 if (opt
== MathOptimization::Safe
)
432 // Make sure biased argument is not negative
433 iExponent
= vec_max(iExponent
, vec_splat_s32(0));
436 iExponent
= vec_sl( iExponent
, vec_splats(23U));
439 vec_mul(value
.simdInternal_
, reinterpret_cast<__vector
float>(iExponent
))
443 static inline float gmx_simdcall
446 const __vector
unsigned char perm1
= { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 };
447 const __vector
unsigned char perm2
= { 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3 };
449 x
.simdInternal_
= vec_add(x
.simdInternal_
, vec_perm(x
.simdInternal_
, x
.simdInternal_
, perm1
));
450 x
.simdInternal_
= vec_add(x
.simdInternal_
, vec_perm(x
.simdInternal_
, x
.simdInternal_
, perm2
));
451 return vec_extract(x
.simdInternal_
, 0);
454 static inline SimdFBool gmx_simdcall
455 operator==(SimdFloat a
, SimdFloat b
)
458 vec_cmpeq(a
.simdInternal_
, b
.simdInternal_
)
462 static inline SimdFBool gmx_simdcall
463 operator!=(SimdFloat a
, SimdFloat b
)
466 vec_or(vec_cmpgt(a
.simdInternal_
, b
.simdInternal_
),
467 vec_cmplt(a
.simdInternal_
, b
.simdInternal_
))
471 static inline SimdFBool gmx_simdcall
472 operator<(SimdFloat a
, SimdFloat b
)
475 vec_cmplt(a
.simdInternal_
, b
.simdInternal_
)
479 static inline SimdFBool gmx_simdcall
480 operator<=(SimdFloat a
, SimdFloat b
)
483 vec_cmple(a
.simdInternal_
, b
.simdInternal_
)
487 static inline SimdFBool gmx_simdcall
488 testBits(SimdFloat a
)
491 vec_cmpgt( reinterpret_cast<__vector
unsigned int>(a
.simdInternal_
), vec_splats(0U))
495 static inline SimdFBool gmx_simdcall
496 operator&&(SimdFBool a
, SimdFBool b
)
499 vec_and(a
.simdInternal_
, b
.simdInternal_
)
503 static inline SimdFBool gmx_simdcall
504 operator||(SimdFBool a
, SimdFBool b
)
507 vec_or(a
.simdInternal_
, b
.simdInternal_
)
511 static inline bool gmx_simdcall
514 return vec_any_ne(a
.simdInternal_
, reinterpret_cast<__vector vsxBool
int>(vec_splats(0)));
517 static inline SimdFloat gmx_simdcall
518 selectByMask(SimdFloat a
, SimdFBool m
)
521 vec_and(a
.simdInternal_
, reinterpret_cast<__vector
float>(m
.simdInternal_
))
525 static inline SimdFloat gmx_simdcall
526 selectByNotMask(SimdFloat a
, SimdFBool m
)
529 vec_andc(a
.simdInternal_
, reinterpret_cast<__vector
float>(m
.simdInternal_
))
533 static inline SimdFloat gmx_simdcall
534 blend(SimdFloat a
, SimdFloat b
, SimdFBool sel
)
537 vec_sel(a
.simdInternal_
, b
.simdInternal_
, sel
.simdInternal_
)
541 static inline SimdFInt32 gmx_simdcall
542 operator&(SimdFInt32 a
, SimdFInt32 b
)
545 vec_and(a
.simdInternal_
, b
.simdInternal_
)
549 static inline SimdFInt32 gmx_simdcall
550 andNot(SimdFInt32 a
, SimdFInt32 b
)
553 vec_andc(b
.simdInternal_
, a
.simdInternal_
)
557 static inline SimdFInt32 gmx_simdcall
558 operator|(SimdFInt32 a
, SimdFInt32 b
)
561 vec_or(a
.simdInternal_
, b
.simdInternal_
)
565 static inline SimdFInt32 gmx_simdcall
566 operator^(SimdFInt32 a
, SimdFInt32 b
)
569 vec_xor(a
.simdInternal_
, b
.simdInternal_
)
573 static inline SimdFInt32 gmx_simdcall
574 operator+(SimdFInt32 a
, SimdFInt32 b
)
577 vec_add(a
.simdInternal_
, b
.simdInternal_
)
581 static inline SimdFInt32 gmx_simdcall
582 operator-(SimdFInt32 a
, SimdFInt32 b
)
585 vec_sub(a
.simdInternal_
, b
.simdInternal_
)
589 static inline SimdFInt32 gmx_simdcall
590 operator*(SimdFInt32 a
, SimdFInt32 b
)
593 a
.simdInternal_
* b
.simdInternal_
597 static inline SimdFIBool gmx_simdcall
598 operator==(SimdFInt32 a
, SimdFInt32 b
)
601 vec_cmpeq(a
.simdInternal_
, b
.simdInternal_
)
605 static inline SimdFIBool gmx_simdcall
606 testBits(SimdFInt32 a
)
609 vec_cmpgt( reinterpret_cast<__vector
unsigned int>(a
.simdInternal_
), vec_splats(0U))
613 static inline SimdFIBool gmx_simdcall
614 operator<(SimdFInt32 a
, SimdFInt32 b
)
617 vec_cmplt(a
.simdInternal_
, b
.simdInternal_
)
621 static inline SimdFIBool gmx_simdcall
622 operator&&(SimdFIBool a
, SimdFIBool b
)
625 vec_and(a
.simdInternal_
, b
.simdInternal_
)
629 static inline SimdFIBool gmx_simdcall
630 operator||(SimdFIBool a
, SimdFIBool b
)
633 vec_or(a
.simdInternal_
, b
.simdInternal_
)
637 static inline bool gmx_simdcall
638 anyTrue(SimdFIBool a
)
640 return vec_any_ne(a
.simdInternal_
, reinterpret_cast<__vector vsxBool
int>(vec_splats(0)));
643 static inline SimdFInt32 gmx_simdcall
644 selectByMask(SimdFInt32 a
, SimdFIBool m
)
647 vec_and(a
.simdInternal_
, reinterpret_cast<__vector
signed int>(m
.simdInternal_
))
651 static inline SimdFInt32 gmx_simdcall
652 selectByNotMask(SimdFInt32 a
, SimdFIBool m
)
655 vec_andc(a
.simdInternal_
, reinterpret_cast<__vector
signed int>(m
.simdInternal_
))
659 static inline SimdFInt32 gmx_simdcall
660 blend(SimdFInt32 a
, SimdFInt32 b
, SimdFIBool sel
)
663 vec_sel(a
.simdInternal_
, b
.simdInternal_
, sel
.simdInternal_
)
667 static inline SimdFInt32 gmx_simdcall
671 vec_cts(vec_round(a
.simdInternal_
), 0)
675 static inline SimdFInt32 gmx_simdcall
679 vec_cts(a
.simdInternal_
, 0)
683 static inline SimdFloat gmx_simdcall
687 vec_ctf(a
.simdInternal_
, 0)
691 static inline SimdFIBool gmx_simdcall
699 static inline SimdFBool gmx_simdcall
700 cvtIB2B(SimdFIBool a
)
707 static inline SimdFloat gmx_simdcall
708 copysign(SimdFloat x
, SimdFloat y
)
710 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
712 __asm__ ("xvcpsgnsp %x0,%x1,%x2" : "=wf" (res
) : "wf" (y
.simdInternal_
), "wf" (x
.simdInternal_
));
718 vec_cpsgn(y
.simdInternal_
, x
.simdInternal_
)
725 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_FLOAT_H