2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD_FLOAT_H
37 #define GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD_FLOAT_H
41 // Assert is buggy on xlc with high optimization, so we skip it for QPX
49 #include "gromacs/math/utilities.h"
50 #include "gromacs/utility/basedefinitions.h"
60 SimdFloat(float f
) : simdInternal_(vec_splats(f
)) {}
62 // Internal utility constructor to simplify return statements
63 SimdFloat(vector4double simd
) : simdInternal_(simd
) {}
65 vector4double simdInternal_
;
73 SimdFInt32(std::int32_t i
)
75 GMX_ALIGNED(int, GMX_SIMD_FINT32_WIDTH
) idata
[GMX_SIMD_FINT32_WIDTH
];
77 simdInternal_
= vec_splat(vec_ldia(0, idata
), 0);
80 // Internal utility constructor to simplify return statements
81 SimdFInt32(vector4double simd
) : simdInternal_(simd
) {}
83 vector4double simdInternal_
;
91 SimdFBool(bool b
) : simdInternal_(vec_splats(b
? 1.0 : -1.0)) {}
93 // Internal utility constructor to simplify return statements
94 SimdFBool(vector4double simd
) : simdInternal_(simd
) {}
96 vector4double simdInternal_
;
99 static inline SimdFloat gmx_simdcall
100 simdLoad(const float *m
)
104 vec_ld(0, const_cast<float *>(m
))
108 vec_lda(0, const_cast<float *>(m
))
113 static inline void gmx_simdcall
114 store(float *m
, SimdFloat a
)
117 vec_st(a
.simdInternal_
, 0, m
);
119 vec_sta(a
.simdInternal_
, 0, m
);
123 static inline SimdFloat gmx_simdcall
131 static inline SimdFInt32 gmx_simdcall
132 simdLoadFI(const std::int32_t * m
)
136 vec_ldia(0, const_cast<int *>(m
))
140 vec_ldiaa(0, const_cast<int *>(m
))
145 static inline void gmx_simdcall
146 store(std::int32_t * m
, SimdFInt32 a
)
148 vec_st(a
.simdInternal_
, 0, m
);
151 static inline SimdFInt32 gmx_simdcall
159 static inline SimdFloat gmx_simdcall
160 operator+(SimdFloat a
, SimdFloat b
)
163 vec_add(a
.simdInternal_
, b
.simdInternal_
)
167 static inline SimdFloat gmx_simdcall
168 operator-(SimdFloat a
, SimdFloat b
)
171 vec_sub(a
.simdInternal_
, b
.simdInternal_
)
175 static inline SimdFloat gmx_simdcall
176 operator-(SimdFloat x
)
179 vec_neg(x
.simdInternal_
)
183 static inline SimdFloat gmx_simdcall
184 operator*(SimdFloat a
, SimdFloat b
)
187 vec_mul(a
.simdInternal_
, b
.simdInternal_
)
191 static inline SimdFloat gmx_simdcall
192 fma(SimdFloat a
, SimdFloat b
, SimdFloat c
)
195 vec_madd(a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
)
199 static inline SimdFloat gmx_simdcall
200 fms(SimdFloat a
, SimdFloat b
, SimdFloat c
)
203 vec_msub(a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
)
207 static inline SimdFloat gmx_simdcall
208 fnma(SimdFloat a
, SimdFloat b
, SimdFloat c
)
211 vec_nmsub(a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
)
215 static inline SimdFloat gmx_simdcall
216 fnms(SimdFloat a
, SimdFloat b
, SimdFloat c
)
219 vec_nmadd(a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
)
223 static inline SimdFloat gmx_simdcall
227 vec_rsqrte(x
.simdInternal_
)
231 static inline SimdFloat gmx_simdcall
235 vec_re(x
.simdInternal_
)
239 static inline SimdFloat gmx_simdcall
240 maskAdd(SimdFloat a
, SimdFloat b
, SimdFBool m
)
243 vec_add(a
.simdInternal_
, vec_sel(vec_splats(0.0), b
.simdInternal_
, m
.simdInternal_
))
247 static inline SimdFloat gmx_simdcall
248 maskzMul(SimdFloat a
, SimdFloat b
, SimdFBool m
)
251 vec_sel(vec_splats(0.0), vec_mul(a
.simdInternal_
, b
.simdInternal_
), m
.simdInternal_
)
255 static inline SimdFloat
256 maskzFma(SimdFloat a
, SimdFloat b
, SimdFloat c
, SimdFBool m
)
259 vec_sel(vec_splats(0.0), vec_madd(a
.simdInternal_
, b
.simdInternal_
, c
.simdInternal_
), m
.simdInternal_
)
263 static inline SimdFloat
264 maskzRsqrt(SimdFloat x
, SimdFBool m
)
267 x
.simdInternal_
= vec_sel(vec_splats(1.0), x
.simdInternal_
, m
.simdInternal_
);
270 vec_sel(vec_splats(0.0), vec_rsqrte(x
.simdInternal_
), m
.simdInternal_
)
274 static inline SimdFloat
275 maskzRcp(SimdFloat x
, SimdFBool m
)
278 x
.simdInternal_
= vec_sel(vec_splats(1.0), x
.simdInternal_
, m
.simdInternal_
);
281 vec_sel(vec_splats(0.0), vec_re(x
.simdInternal_
), m
.simdInternal_
)
285 static inline SimdFloat gmx_simdcall
289 vec_abs( x
.simdInternal_
)
293 static inline SimdFloat gmx_simdcall
294 max(SimdFloat a
, SimdFloat b
)
297 vec_sel(b
.simdInternal_
, a
.simdInternal_
, vec_sub(a
.simdInternal_
, b
.simdInternal_
))
301 static inline SimdFloat gmx_simdcall
302 min(SimdFloat a
, SimdFloat b
)
305 vec_sel(b
.simdInternal_
, a
.simdInternal_
, vec_sub(b
.simdInternal_
, a
.simdInternal_
))
309 static inline SimdFloat gmx_simdcall
312 // Note: It is critical to use vec_cfid(vec_ctid(a)) for the implementation
313 // here, since vec_round() does not adhere to the FP control
314 // word rounding scheme. We rely on float-to-float and float-to-integer
315 // rounding being the same for half-way values in a few algorithms.
317 vec_cfid(vec_ctid(x
.simdInternal_
))
321 static inline SimdFloat gmx_simdcall
325 vec_trunc(x
.simdInternal_
)
329 static inline SimdFloat
330 frexp(SimdFloat value
, SimdFInt32
* exponent
)
332 GMX_ALIGNED(float, GMX_SIMD_FLOAT_WIDTH
) rdata
[GMX_SIMD_FLOAT_WIDTH
];
333 GMX_ALIGNED(int, GMX_SIMD_FLOAT_WIDTH
) idata
[GMX_SIMD_FLOAT_WIDTH
];
335 vec_st(value
.simdInternal_
, 0, rdata
);
337 for (std::size_t i
= 0; i
< GMX_SIMD_FLOAT_WIDTH
; i
++)
339 rdata
[i
] = std::frexp(rdata
[i
], idata
+ i
);
342 exponent
->simdInternal_
= vec_ldia(0, idata
);
343 value
.simdInternal_
= vec_ld(0, rdata
);
348 template <MathOptimization opt
= MathOptimization::Safe
>
349 static inline SimdFloat
350 ldexp(SimdFloat value
, SimdFInt32 exponent
)
352 GMX_ALIGNED(float, GMX_SIMD_FLOAT_WIDTH
) rdata
[GMX_SIMD_FLOAT_WIDTH
];
353 GMX_ALIGNED(int, GMX_SIMD_FLOAT_WIDTH
) idata
[GMX_SIMD_FLOAT_WIDTH
];
355 vec_st(value
.simdInternal_
, 0, rdata
);
356 vec_st(exponent
.simdInternal_
, 0, idata
);
358 for (std::size_t i
= 0; i
< GMX_SIMD_FLOAT_WIDTH
; i
++)
360 rdata
[i
] = std::ldexp(rdata
[i
], idata
[i
]);
363 value
.simdInternal_
= vec_ld(0, rdata
);
368 static inline float gmx_simdcall
371 vector4double y
= vec_sldw(x
.simdInternal_
, x
.simdInternal_
, 2);
374 y
= vec_add(y
, x
.simdInternal_
);
375 z
= vec_sldw(y
, y
, 1);
377 return vec_extract(y
, 0);
380 static inline SimdFBool gmx_simdcall
381 operator==(SimdFloat a
, SimdFloat b
)
384 vec_cmpeq(a
.simdInternal_
, b
.simdInternal_
)
388 static inline SimdFBool gmx_simdcall
389 operator!=(SimdFloat a
, SimdFloat b
)
392 vec_not(vec_cmpeq(a
.simdInternal_
, b
.simdInternal_
))
396 static inline SimdFBool gmx_simdcall
397 operator<(SimdFloat a
, SimdFloat b
)
400 vec_cmplt(a
.simdInternal_
, b
.simdInternal_
)
404 static inline SimdFBool gmx_simdcall
405 operator<=(SimdFloat a
, SimdFloat b
)
408 vec_or(vec_cmplt(a
.simdInternal_
, b
.simdInternal_
), vec_cmpeq(a
.simdInternal_
, b
.simdInternal_
))
412 static inline SimdFBool gmx_simdcall
413 operator&&(SimdFBool a
, SimdFBool b
)
416 vec_and(a
.simdInternal_
, b
.simdInternal_
)
420 static inline SimdFBool gmx_simdcall
421 operator||(SimdFBool a
, SimdFBool b
)
424 vec_or(a
.simdInternal_
, b
.simdInternal_
)
428 static inline bool gmx_simdcall
431 vector4double b
= vec_sldw(a
.simdInternal_
, a
.simdInternal_
, 2);
433 a
.simdInternal_
= vec_or(a
.simdInternal_
, b
);
434 b
= vec_sldw(a
.simdInternal_
, a
.simdInternal_
, 1);
435 b
= vec_or(a
.simdInternal_
, b
);
436 return (vec_extract(b
, 0) > 0);
439 static inline SimdFloat gmx_simdcall
440 selectByMask(SimdFloat a
, SimdFBool m
)
443 vec_sel(vec_splats(0.0), a
.simdInternal_
, m
.simdInternal_
)
447 static inline SimdFloat gmx_simdcall
448 selectByNotMask(SimdFloat a
, SimdFBool m
)
451 vec_sel(a
.simdInternal_
, vec_splats(0.0), m
.simdInternal_
)
455 static inline SimdFloat gmx_simdcall
456 blend(SimdFloat a
, SimdFloat b
, SimdFBool sel
)
459 vec_sel(a
.simdInternal_
, b
.simdInternal_
, sel
.simdInternal_
)
463 static inline SimdFInt32 gmx_simdcall
467 vec_ctiw(a
.simdInternal_
)
471 static inline SimdFInt32 gmx_simdcall
475 vec_ctiwz(a
.simdInternal_
)
479 static inline SimdFloat gmx_simdcall
483 vec_cfid(a
.simdInternal_
)
487 static inline SimdFloat gmx_simdcall
488 copysign(SimdFloat x
, SimdFloat y
)
491 vec_cpsgn(y
.simdInternal_
, x
.simdInternal_
)
497 #endif // GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD_FLOAT_H