Introduce SimulatorBuilder
[gromacs.git] / src / gromacs / simd / impl_ibm_vsx / impl_ibm_vsx_simd4_float.h
blob9abedb1b42c123bc5bca83ba386798dc02f4faee
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD4_FLOAT_H
37 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD4_FLOAT_H
39 #include "config.h"
41 #include "gromacs/utility/basedefinitions.h"
43 #include "impl_ibm_vsx_definitions.h"
44 #include "impl_ibm_vsx_simd_float.h"
46 namespace gmx
49 class Simd4Float
51 public:
52 Simd4Float() {}
54 // gcc-4.9 does not recognize that we use the parameter
55 Simd4Float(float gmx_unused f) : simdInternal_(vec_splats(f)) {}
57 // Internal utility constructor to simplify return statements
58 Simd4Float(__vector float simd) : simdInternal_(simd) {}
60 __vector float simdInternal_;
63 class Simd4FBool
65 public:
66 Simd4FBool() {}
68 //! \brief Construct from scalar bool
69 Simd4FBool(bool b) : simdInternal_(reinterpret_cast<__vector vsxBool int>(vec_splats( b ? 0xFFFFFFFF : 0))) {}
71 // Internal utility constructor to simplify return statements
72 Simd4FBool(__vector vsxBool int simd) : simdInternal_(simd) {}
74 __vector vsxBool int simdInternal_;
77 // The VSX load & store operations are a bit of a mess. The interface is different
78 // for xlc version 12, xlc version 13, and gcc. Long-term IBM recommends
79 // simply using pointer dereferencing both for aligned and unaligned loads.
80 // That's nice, but unfortunately xlc still bugs out when the pointer is
81 // not aligned. Sticking to vec_xl/vec_xst isn't a solution either, since
82 // that appears to be buggy for some _aligned_ loads :-)
84 // For now, we use pointer dereferencing for all aligned load/stores, and
85 // for unaligned ones with gcc. On xlc we use vec_xlw4/vec_xstw4 for
86 // unaligned memory operations. The latest docs recommend using the overloaded
87 // vec_xl/vec_xst, but that is not supported on xlc version 12. We'll
88 // revisit things once xlc is a bit more stable - for now you probably want
89 // to stick to gcc...
91 static inline Simd4Float gmx_simdcall
92 load4(const float *m)
94 return {
95 *reinterpret_cast<const __vector float *>(m)
99 static inline void gmx_simdcall
100 store4(float *m, Simd4Float a)
102 *reinterpret_cast<__vector float *>(m) = a.simdInternal_;
105 static inline Simd4Float gmx_simdcall
106 load4U(const float *m)
108 return {
109 #if __GNUC__ < 7
110 *reinterpret_cast<const __vector float *>(m)
111 #else
112 vec_xl(0, m)
113 #endif
117 static inline void gmx_simdcall
118 store4U(float *m, Simd4Float a)
120 #if __GNUC__ < 7
121 *reinterpret_cast<__vector float *>(m) = a.simdInternal_;
122 #else
123 vec_xst(a.simdInternal_, 0, m);
124 #endif
127 static inline Simd4Float gmx_simdcall
128 simd4SetZeroF()
130 return {
131 vec_splats(0.0f)
135 static inline Simd4Float gmx_simdcall
136 operator&(Simd4Float a, Simd4Float b)
138 return {
139 vec_and(a.simdInternal_, b.simdInternal_)
143 static inline Simd4Float gmx_simdcall
144 andNot(Simd4Float a, Simd4Float b)
146 return {
147 vec_andc(b.simdInternal_, a.simdInternal_)
151 static inline Simd4Float gmx_simdcall
152 operator|(Simd4Float a, Simd4Float b)
154 return {
155 vec_or(a.simdInternal_, b.simdInternal_)
159 static inline Simd4Float gmx_simdcall
160 operator^(Simd4Float a, Simd4Float b)
162 return {
163 vec_xor(a.simdInternal_, b.simdInternal_)
167 static inline Simd4Float gmx_simdcall
168 operator+(Simd4Float a, Simd4Float b)
170 return {
171 vec_add(a.simdInternal_, b.simdInternal_)
175 static inline Simd4Float gmx_simdcall
176 operator-(Simd4Float a, Simd4Float b)
178 return {
179 vec_sub(a.simdInternal_, b.simdInternal_)
183 static inline Simd4Float gmx_simdcall
184 operator-(Simd4Float x)
186 return {
187 -x.simdInternal_
191 static inline Simd4Float gmx_simdcall
192 operator*(Simd4Float a, Simd4Float b)
194 return {
195 vec_mul(a.simdInternal_, b.simdInternal_)
199 static inline Simd4Float gmx_simdcall
200 fma(Simd4Float a, Simd4Float b, Simd4Float c)
202 return {
203 vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
207 static inline Simd4Float gmx_simdcall
208 fms(Simd4Float a, Simd4Float b, Simd4Float c)
210 return {
211 vec_msub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
215 static inline Simd4Float gmx_simdcall
216 fnma(Simd4Float a, Simd4Float b, Simd4Float c)
218 return {
219 vec_nmsub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
223 static inline Simd4Float gmx_simdcall
224 fnms(Simd4Float a, Simd4Float b, Simd4Float c)
226 return {
227 vec_nmadd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
231 static inline Simd4Float gmx_simdcall
232 rsqrt(Simd4Float x)
234 return {
235 vec_rsqrte(x.simdInternal_)
239 static inline Simd4Float gmx_simdcall
240 abs(Simd4Float x)
242 return {
243 vec_abs( x.simdInternal_ )
247 static inline Simd4Float gmx_simdcall
248 max(Simd4Float a, Simd4Float b)
250 return {
251 vec_max(a.simdInternal_, b.simdInternal_)
255 static inline Simd4Float gmx_simdcall
256 min(Simd4Float a, Simd4Float b)
258 return {
259 vec_min(a.simdInternal_, b.simdInternal_)
263 static inline Simd4Float gmx_simdcall
264 round(Simd4Float x)
266 return {
267 vec_round( x.simdInternal_ )
271 static inline Simd4Float gmx_simdcall
272 trunc(Simd4Float x)
274 return {
275 vec_trunc( x.simdInternal_ )
279 static inline float gmx_simdcall
280 dotProduct(Simd4Float a, Simd4Float b)
282 const __vector unsigned char perm1 = { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 };
283 const __vector unsigned char perm2 = { 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3 };
284 __vector float c = vec_mul(a.simdInternal_, b.simdInternal_);
285 __vector float sum;
286 sum = vec_add(c, vec_perm(c, c, perm1));
287 sum = vec_add(sum, vec_perm(c, c, perm2));
288 return vec_extract(sum, 0);
291 static inline void gmx_simdcall
292 transpose(Simd4Float * v0, Simd4Float * v1,
293 Simd4Float * v2, Simd4Float * v3)
295 __vector float t0 = vec_mergeh(v0->simdInternal_, v2->simdInternal_);
296 __vector float t1 = vec_mergel(v0->simdInternal_, v2->simdInternal_);
297 __vector float t2 = vec_mergeh(v1->simdInternal_, v3->simdInternal_);
298 __vector float t3 = vec_mergel(v1->simdInternal_, v3->simdInternal_);
299 v0->simdInternal_ = vec_mergeh(t0, t2);
300 v1->simdInternal_ = vec_mergel(t0, t2);
301 v2->simdInternal_ = vec_mergeh(t1, t3);
302 v3->simdInternal_ = vec_mergel(t1, t3);
305 static inline Simd4FBool gmx_simdcall
306 operator==(Simd4Float a, Simd4Float b)
308 return {
309 vec_cmpeq(a.simdInternal_, b.simdInternal_)
313 static inline Simd4FBool gmx_simdcall
314 operator!=(Simd4Float a, Simd4Float b)
316 return {
317 vec_or(vec_cmpgt(a.simdInternal_, b.simdInternal_),
318 vec_cmplt(a.simdInternal_, b.simdInternal_))
322 static inline Simd4FBool gmx_simdcall
323 operator<(Simd4Float a, Simd4Float b)
325 return {
326 vec_cmplt(a.simdInternal_, b.simdInternal_)
330 static inline Simd4FBool gmx_simdcall
331 operator<=(Simd4Float a, Simd4Float b)
333 return {
334 vec_cmple(a.simdInternal_, b.simdInternal_)
338 static inline Simd4FBool gmx_simdcall
339 operator&&(Simd4FBool a, Simd4FBool b)
341 return {
342 vec_and(a.simdInternal_, b.simdInternal_)
346 static inline Simd4FBool gmx_simdcall
347 operator||(Simd4FBool a, Simd4FBool b)
349 return {
350 vec_or(a.simdInternal_, b.simdInternal_)
354 static inline bool gmx_simdcall
355 anyTrue(Simd4FBool a)
357 return vec_any_ne(a.simdInternal_, reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
360 static inline Simd4Float gmx_simdcall
361 selectByMask(Simd4Float a, Simd4FBool m)
363 return {
364 vec_and(a.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_))
368 static inline Simd4Float gmx_simdcall
369 selectByNotMask(Simd4Float a, Simd4FBool m)
371 return {
372 vec_andc(a.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_))
376 static inline Simd4Float gmx_simdcall
377 blend(Simd4Float a, Simd4Float b, Simd4FBool sel)
379 return {
380 vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_)
384 static inline float gmx_simdcall
385 reduce(Simd4Float x)
387 const __vector unsigned char perm1 = { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 };
388 const __vector unsigned char perm2 = { 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3 };
390 x.simdInternal_ = vec_add(x.simdInternal_, vec_perm(x.simdInternal_, x.simdInternal_, perm1));
391 x.simdInternal_ = vec_add(x.simdInternal_, vec_perm(x.simdInternal_, x.simdInternal_, perm2));
392 return vec_extract(x.simdInternal_, 0);
395 } // namespace gmx
397 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD4_FLOAT_H