Update instructions in containers.rst
[gromacs.git] / src / gromacs / simd / impl_ibm_vsx / impl_ibm_vsx_simd_float.h
blobab8932d0fcb6441dd52fbeb55e2857376014e976
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5 * Copyright (c) 2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_FLOAT_H
38 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_FLOAT_H
40 #include "config.h"
42 #include "gromacs/math/utilities.h"
43 #include "gromacs/utility/basedefinitions.h"
45 #include "impl_ibm_vsx_definitions.h"
47 namespace gmx
50 class SimdFloat
52 public:
53 SimdFloat() {}
55 // gcc-4.9 does not recognize that we use the parameter
56 SimdFloat(float gmx_unused f) : simdInternal_(vec_splats(f)) {}
58 // Internal utility constructor to simplify return statements
59 SimdFloat(__vector float simd) : simdInternal_(simd) {}
61 __vector float simdInternal_;
64 class SimdFInt32
66 public:
67 SimdFInt32() {}
69 // gcc-4.9 does not recognize that we use the parameter
70 SimdFInt32(std::int32_t gmx_unused i) : simdInternal_(vec_splats(i)) {}
72 // Internal utility constructor to simplify return statements
73 SimdFInt32(__vector signed int simd) : simdInternal_(simd) {}
75 __vector signed int simdInternal_;
78 class SimdFBool
80 public:
81 SimdFBool() {}
83 SimdFBool(bool b) :
84 simdInternal_(reinterpret_cast<__vector vsxBool int>(vec_splats(b ? 0xFFFFFFFF : 0)))
88 // Internal utility constructor to simplify return statements
89 SimdFBool(__vector vsxBool int simd) : simdInternal_(simd) {}
91 __vector vsxBool int simdInternal_;
94 class SimdFIBool
96 public:
97 SimdFIBool() {}
99 SimdFIBool(bool b) :
100 simdInternal_(reinterpret_cast<__vector vsxBool int>(vec_splats(b ? 0xFFFFFFFF : 0)))
104 // Internal utility constructor to simplify return statements
105 SimdFIBool(__vector vsxBool int simd) : simdInternal_(simd) {}
107 __vector vsxBool int simdInternal_;
110 // Note that the interfaces we use here have been a mess in xlc;
111 // currently version 13.1.5 is required.
113 static inline SimdFloat gmx_simdcall simdLoad(const float* m, SimdFloatTag = {})
115 return { *reinterpret_cast<const __vector float*>(m) };
118 static inline void gmx_simdcall store(float* m, SimdFloat a)
120 *reinterpret_cast<__vector float*>(m) = a.simdInternal_;
123 static inline SimdFloat gmx_simdcall simdLoadU(const float* m, SimdFloatTag = {})
125 return
127 #if __GNUC__ < 7
128 *reinterpret_cast<const __vector float*>(m)
129 #else
130 vec_xl(0, m)
131 #endif
135 static inline void gmx_simdcall storeU(float* m, SimdFloat a)
137 #if __GNUC__ < 7
138 *reinterpret_cast<__vector float*>(m) = a.simdInternal_;
139 #else
140 vec_xst(a.simdInternal_, 0, m);
141 #endif
144 static inline SimdFloat gmx_simdcall setZeroF()
146 return { vec_splats(0.0F) };
149 static inline SimdFInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdFInt32Tag)
151 return { *reinterpret_cast<const __vector int*>(m) };
154 static inline void gmx_simdcall store(std::int32_t* m, SimdFInt32 a)
156 *reinterpret_cast<__vector int*>(m) = a.simdInternal_;
159 static inline SimdFInt32 gmx_simdcall simdLoadU(const std::int32_t* m, SimdFInt32Tag)
161 return
163 #if __GNUC__ < 7
164 *reinterpret_cast<const __vector int*>(m)
165 #else
166 vec_xl(0, m)
167 #endif
171 static inline void gmx_simdcall storeU(std::int32_t* m, SimdFInt32 a)
173 #if __GNUC__ < 7
174 *reinterpret_cast<__vector int*>(m) = a.simdInternal_;
175 #else
176 vec_xst(a.simdInternal_, 0, m);
177 #endif
180 static inline SimdFInt32 gmx_simdcall setZeroFI()
182 return { vec_splats(static_cast<int>(0)) };
185 // gcc-4.9 does not detect that vec_extract() uses its argument
186 template<int index>
187 static inline std::int32_t gmx_simdcall extract(SimdFInt32 gmx_unused a)
189 return vec_extract(a.simdInternal_, index);
192 static inline SimdFloat gmx_simdcall operator&(SimdFloat a, SimdFloat b)
194 return { vec_and(a.simdInternal_, b.simdInternal_) };
197 static inline SimdFloat gmx_simdcall andNot(SimdFloat a, SimdFloat b)
199 return { vec_andc(b.simdInternal_, a.simdInternal_) };
202 static inline SimdFloat gmx_simdcall operator|(SimdFloat a, SimdFloat b)
204 return { vec_or(a.simdInternal_, b.simdInternal_) };
207 static inline SimdFloat gmx_simdcall operator^(SimdFloat a, SimdFloat b)
209 return { vec_xor(a.simdInternal_, b.simdInternal_) };
212 static inline SimdFloat gmx_simdcall operator+(SimdFloat a, SimdFloat b)
214 return { vec_add(a.simdInternal_, b.simdInternal_) };
217 static inline SimdFloat gmx_simdcall operator-(SimdFloat a, SimdFloat b)
219 return { vec_sub(a.simdInternal_, b.simdInternal_) };
222 static inline SimdFloat gmx_simdcall operator-(SimdFloat x)
224 return { -x.simdInternal_ };
227 static inline SimdFloat gmx_simdcall operator*(SimdFloat a, SimdFloat b)
229 return { vec_mul(a.simdInternal_, b.simdInternal_) };
232 static inline SimdFloat gmx_simdcall fma(SimdFloat a, SimdFloat b, SimdFloat c)
234 return { vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
237 static inline SimdFloat gmx_simdcall fms(SimdFloat a, SimdFloat b, SimdFloat c)
239 return { vec_msub(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
242 static inline SimdFloat gmx_simdcall fnma(SimdFloat a, SimdFloat b, SimdFloat c)
244 return { vec_nmsub(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
247 static inline SimdFloat gmx_simdcall fnms(SimdFloat a, SimdFloat b, SimdFloat c)
249 return { vec_nmadd(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
252 static inline SimdFloat gmx_simdcall rsqrt(SimdFloat x)
254 return { vec_rsqrte(x.simdInternal_) };
257 static inline SimdFloat gmx_simdcall rcp(SimdFloat x)
259 return { vec_re(x.simdInternal_) };
262 static inline SimdFloat gmx_simdcall maskAdd(SimdFloat a, SimdFloat b, SimdFBool m)
264 return { vec_add(a.simdInternal_,
265 vec_and(b.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_))) };
268 static inline SimdFloat gmx_simdcall maskzMul(SimdFloat a, SimdFloat b, SimdFBool m)
270 SimdFloat prod = a * b;
272 return { vec_and(prod.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_)) };
275 static inline SimdFloat gmx_simdcall maskzFma(SimdFloat a, SimdFloat b, SimdFloat c, SimdFBool m)
277 SimdFloat prod = fma(a, b, c);
279 return { vec_and(prod.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_)) };
282 static inline SimdFloat gmx_simdcall maskzRsqrt(SimdFloat x, SimdFBool m)
284 #ifndef NDEBUG
285 x.simdInternal_ = vec_sel(vec_splats(1.0F), x.simdInternal_, m.simdInternal_);
286 #endif
287 return { vec_and(vec_rsqrte(x.simdInternal_), reinterpret_cast<__vector float>(m.simdInternal_)) };
290 static inline SimdFloat gmx_simdcall maskzRcp(SimdFloat x, SimdFBool m)
292 #ifndef NDEBUG
293 x.simdInternal_ = vec_sel(vec_splats(1.0F), x.simdInternal_, m.simdInternal_);
294 #endif
295 return { vec_and(vec_re(x.simdInternal_), reinterpret_cast<__vector float>(m.simdInternal_)) };
298 static inline SimdFloat gmx_simdcall abs(SimdFloat x)
300 return { vec_abs(x.simdInternal_) };
303 static inline SimdFloat gmx_simdcall max(SimdFloat a, SimdFloat b)
305 return { vec_max(a.simdInternal_, b.simdInternal_) };
308 static inline SimdFloat gmx_simdcall min(SimdFloat a, SimdFloat b)
310 return { vec_min(a.simdInternal_, b.simdInternal_) };
313 static inline SimdFloat gmx_simdcall round(SimdFloat x)
315 return { vec_round(x.simdInternal_) };
318 static inline SimdFloat gmx_simdcall trunc(SimdFloat x)
320 return { vec_trunc(x.simdInternal_) };
323 template<MathOptimization opt = MathOptimization::Safe>
324 static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent)
326 const __vector float exponentMask = reinterpret_cast<__vector float>(vec_splats(0x7F800000U));
327 const __vector signed int exponentBias = vec_splats(126);
328 const __vector float half = vec_splats(0.5F);
329 __vector signed int iExponent;
331 __vector vsxBool int valueIsZero =
332 vec_cmpeq(value.simdInternal_, reinterpret_cast<__vector float>(vec_splats(0.0)));
334 iExponent = reinterpret_cast<__vector signed int>(vec_and(value.simdInternal_, exponentMask));
335 iExponent = vec_sub(vec_sr(iExponent, vec_splats(23U)), exponentBias);
336 iExponent = vec_andc(iExponent, reinterpret_cast<__vector int>(valueIsZero));
338 __vector float result = vec_or(vec_andc(value.simdInternal_, exponentMask), half);
339 result = vec_sel(result, value.simdInternal_, valueIsZero);
341 exponent->simdInternal_ = iExponent;
343 return { result };
346 template<MathOptimization opt = MathOptimization::Safe>
347 static inline SimdFloat gmx_simdcall ldexp(SimdFloat value, SimdFInt32 exponent)
349 const __vector signed int exponentBias = vec_splats(127);
350 __vector signed int iExponent;
352 iExponent = vec_add(exponent.simdInternal_, exponentBias);
354 if (opt == MathOptimization::Safe)
356 // Make sure biased argument is not negative
357 iExponent = vec_max(iExponent, vec_splat_s32(0));
360 iExponent = vec_sl(iExponent, vec_splats(23U));
362 return { vec_mul(value.simdInternal_, reinterpret_cast<__vector float>(iExponent)) };
365 static inline float gmx_simdcall reduce(SimdFloat x)
367 const __vector unsigned char perm1 = { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 };
368 const __vector unsigned char perm2 = { 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3 };
370 x.simdInternal_ = vec_add(x.simdInternal_, vec_perm(x.simdInternal_, x.simdInternal_, perm1));
371 x.simdInternal_ = vec_add(x.simdInternal_, vec_perm(x.simdInternal_, x.simdInternal_, perm2));
372 return vec_extract(x.simdInternal_, 0);
375 static inline SimdFBool gmx_simdcall operator==(SimdFloat a, SimdFloat b)
377 return { vec_cmpeq(a.simdInternal_, b.simdInternal_) };
380 static inline SimdFBool gmx_simdcall operator!=(SimdFloat a, SimdFloat b)
382 return { vec_or(vec_cmpgt(a.simdInternal_, b.simdInternal_),
383 vec_cmplt(a.simdInternal_, b.simdInternal_)) };
386 static inline SimdFBool gmx_simdcall operator<(SimdFloat a, SimdFloat b)
388 return { vec_cmplt(a.simdInternal_, b.simdInternal_) };
391 static inline SimdFBool gmx_simdcall operator<=(SimdFloat a, SimdFloat b)
393 return { vec_cmple(a.simdInternal_, b.simdInternal_) };
396 static inline SimdFBool gmx_simdcall testBits(SimdFloat a)
398 return { vec_cmpgt(reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splats(0U)) };
401 static inline SimdFBool gmx_simdcall operator&&(SimdFBool a, SimdFBool b)
403 return { vec_and(a.simdInternal_, b.simdInternal_) };
406 static inline SimdFBool gmx_simdcall operator||(SimdFBool a, SimdFBool b)
408 return { vec_or(a.simdInternal_, b.simdInternal_) };
411 static inline bool gmx_simdcall anyTrue(SimdFBool a)
413 return vec_any_ne(a.simdInternal_, reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
416 static inline SimdFloat gmx_simdcall selectByMask(SimdFloat a, SimdFBool m)
418 return { vec_and(a.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_)) };
421 static inline SimdFloat gmx_simdcall selectByNotMask(SimdFloat a, SimdFBool m)
423 return { vec_andc(a.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_)) };
426 static inline SimdFloat gmx_simdcall blend(SimdFloat a, SimdFloat b, SimdFBool sel)
428 return { vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_) };
431 static inline SimdFInt32 gmx_simdcall operator&(SimdFInt32 a, SimdFInt32 b)
433 return { vec_and(a.simdInternal_, b.simdInternal_) };
436 static inline SimdFInt32 gmx_simdcall andNot(SimdFInt32 a, SimdFInt32 b)
438 return { vec_andc(b.simdInternal_, a.simdInternal_) };
441 static inline SimdFInt32 gmx_simdcall operator|(SimdFInt32 a, SimdFInt32 b)
443 return { vec_or(a.simdInternal_, b.simdInternal_) };
446 static inline SimdFInt32 gmx_simdcall operator^(SimdFInt32 a, SimdFInt32 b)
448 return { vec_xor(a.simdInternal_, b.simdInternal_) };
451 static inline SimdFInt32 gmx_simdcall operator+(SimdFInt32 a, SimdFInt32 b)
453 return { vec_add(a.simdInternal_, b.simdInternal_) };
456 static inline SimdFInt32 gmx_simdcall operator-(SimdFInt32 a, SimdFInt32 b)
458 return { vec_sub(a.simdInternal_, b.simdInternal_) };
461 static inline SimdFInt32 gmx_simdcall operator*(SimdFInt32 a, SimdFInt32 b)
463 return { a.simdInternal_ * b.simdInternal_ };
466 static inline SimdFIBool gmx_simdcall operator==(SimdFInt32 a, SimdFInt32 b)
468 return { vec_cmpeq(a.simdInternal_, b.simdInternal_) };
471 static inline SimdFIBool gmx_simdcall testBits(SimdFInt32 a)
473 return { vec_cmpgt(reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splats(0U)) };
476 static inline SimdFIBool gmx_simdcall operator<(SimdFInt32 a, SimdFInt32 b)
478 return { vec_cmplt(a.simdInternal_, b.simdInternal_) };
481 static inline SimdFIBool gmx_simdcall operator&&(SimdFIBool a, SimdFIBool b)
483 return { vec_and(a.simdInternal_, b.simdInternal_) };
486 static inline SimdFIBool gmx_simdcall operator||(SimdFIBool a, SimdFIBool b)
488 return { vec_or(a.simdInternal_, b.simdInternal_) };
491 static inline bool gmx_simdcall anyTrue(SimdFIBool a)
493 return vec_any_ne(a.simdInternal_, reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
496 static inline SimdFInt32 gmx_simdcall selectByMask(SimdFInt32 a, SimdFIBool m)
498 return { vec_and(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_)) };
501 static inline SimdFInt32 gmx_simdcall selectByNotMask(SimdFInt32 a, SimdFIBool m)
503 return { vec_andc(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_)) };
506 static inline SimdFInt32 gmx_simdcall blend(SimdFInt32 a, SimdFInt32 b, SimdFIBool sel)
508 return { vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_) };
511 static inline SimdFInt32 gmx_simdcall cvtR2I(SimdFloat a)
513 return { vec_cts(vec_round(a.simdInternal_), 0) };
516 static inline SimdFInt32 gmx_simdcall cvttR2I(SimdFloat a)
518 return { vec_cts(a.simdInternal_, 0) };
521 static inline SimdFloat gmx_simdcall cvtI2R(SimdFInt32 a)
523 return { vec_ctf(a.simdInternal_, 0) };
526 static inline SimdFIBool gmx_simdcall cvtB2IB(SimdFBool a)
528 return { a.simdInternal_ };
531 static inline SimdFBool gmx_simdcall cvtIB2B(SimdFIBool a)
533 return { a.simdInternal_ };
536 static inline SimdFloat gmx_simdcall copysign(SimdFloat x, SimdFloat y)
538 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
539 __vector float res;
540 __asm__("xvcpsgnsp %x0,%x1,%x2" : "=wf"(res) : "wf"(y.simdInternal_), "wf"(x.simdInternal_));
541 return { res };
542 #else
543 return { vec_cpsgn(y.simdInternal_, x.simdInternal_) };
544 #endif
547 } // namespace gmx
549 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_FLOAT_H