2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 #ifndef GMX_SIMD_SCALAR_H
36 #define GMX_SIMD_SCALAR_H
44 /*! \libinternal \file
46 * \brief Scalar float functions corresponding to GROMACS SIMD functions
48 * These versions make it possible to write functions that are templated with
49 * either a SIMD or scalar type. While some of these functions might not appear
50 * SIMD-specific, we have placed them here because the only reason to use these
51 * instead of generic function is in templated combined SIMD/non-SIMD code.
53 * There are a handful of limitations, in particular that it is impossible
54 * to overload the bitwise logical operators on built-in types.
56 * \author Erik Lindahl <erik.lindahl@gmail.com>
59 * \ingroup module_simd
65 /************************************************************************
66 * Single-precision floating point functions mimicking SIMD versions *
67 ************************************************************************/
69 /*! \brief Store contents of float variable to aligned memory m.
71 * \param[out] m Pointer to memory.
72 * \param a float variable to store
74 * \note This function might be superficially meaningless, but it helps us to
75 * write templated SIMD/non-SIMD code. For clarity it should not be used
78 static inline void store(float* m
, float a
)
83 /*! \brief Store contents of float variable to unaligned memory m.
85 * \param[out] m Pointer to memory, no alignment requirement.
86 * \param a float variable to store.
88 * \note This function might be superficially meaningless, but it helps us to
89 * write templated SIMD/non-SIMD code. For clarity it should not be used
92 static inline void storeU(float* m
, float a
)
97 // We cannot overload the logical operators and, or, andNot, xor for
100 /*! \brief Float Fused-multiply-add. Result is a*b + c.
107 * \note This function might be superficially meaningless, but it helps us to
108 * write templated SIMD/non-SIMD code. For clarity it should not be used
111 static inline float fma(float a
, float b
, float c
)
113 // Note that we purposely do not use the single-rounding std::fma
114 // as that can be very slow without hardware support
118 /*! \brief Float Fused-multiply-subtract. Result is a*b - c.
125 * \note This function might be superficially meaningless, but it helps us to
126 * write templated SIMD/non-SIMD code. For clarity it should not be used
129 static inline float fms(float a
, float b
, float c
)
134 /*! \brief Float Fused-negated-multiply-add. Result is -a*b + c.
141 * \note This function might be superficially meaningless, but it helps us to
142 * write templated SIMD/non-SIMD code. For clarity it should not be used
145 static inline float fnma(float a
, float b
, float c
)
150 /*! \brief Float Fused-negated-multiply-subtract. Result is -a*b - c.
157 * \note This function might be superficially meaningless, but it helps us to
158 * write templated SIMD/non-SIMD code. For clarity it should not be used
161 static inline float fnms(float a
, float b
, float c
)
166 /*! \brief Add two float variables, masked version.
171 * \return a+b where mask is true, a otherwise.
173 * \note This function might be superficially meaningless, but it helps us to
174 * write templated SIMD/non-SIMD code. For clarity it should not be used
177 static inline float maskAdd(float a
, float b
, float m
)
179 return a
+ (m
!= 0.0F
? b
: 0.0F
);
182 /*! \brief Multiply two float variables, masked version.
187 * \return a*b where mask is true, 0.0 otherwise.
189 * \note This function might be superficially meaningless, but it helps us to
190 * write templated SIMD/non-SIMD code. For clarity it should not be used
193 static inline float maskzMul(float a
, float b
, float m
)
195 return m
!= 0.0F
? (a
* b
) : 0.0F
;
198 /*! \brief Float fused multiply-add, masked version.
204 * \return a*b+c where mask is true, 0.0 otherwise.
206 * \note This function might be superficially meaningless, but it helps us to
207 * write templated SIMD/non-SIMD code. For clarity it should not be used
210 static inline float maskzFma(float a
, float b
, float c
, float m
)
212 return m
!= 0.0F
? (a
* b
+ c
) : 0.0F
;
215 /*! \brief Float 1.0/x, masked version.
217 * \param x Argument, x>0 for entries where mask is true.
219 * \return 1/x. The result for masked-out entries will be 0.0.
221 * \note This function might be superficially meaningless, but it helps us to
222 * write templated SIMD/non-SIMD code. For clarity it should not be used
225 static inline float gmx_simdcall
maskzRcp(float x
, float m
)
227 return m
!= 0.0F
? 1.0F
/ x
: 0.0F
;
230 /*! \brief Float Floating-point abs().
232 * \param a any floating point values
233 * \return abs(a) for each element.
235 * \note This function might be superficially meaningless, but it helps us to
236 * write templated SIMD/non-SIMD code. For clarity it should not be used
239 static inline float abs(float a
)
244 /*! \brief Set each float element to the largest from two variables.
246 * \param a Any floating-point value
247 * \param b Any floating-point value
248 * \return max(a,b) for each element.
250 * \note This function might be superficially meaningless, but it helps us to
251 * write templated SIMD/non-SIMD code. For clarity it should not be used
254 static inline float max(float a
, float b
)
256 return std::max(a
, b
);
259 /*! \brief Set each float element to the smallest from two variables.
261 * \param a Any floating-point value
262 * \param b Any floating-point value
263 * \return min(a,b) for each element.
265 * \note This function might be superficially meaningless, but it helps us to
266 * write templated SIMD/non-SIMD code. For clarity it should not be used
269 static inline float min(float a
, float b
)
271 return std::min(a
, b
);
274 /*! \brief Float round to nearest integer value (in floating-point format).
276 * \param a Any floating-point value
277 * \return The nearest integer, represented in floating-point format.
279 * \note This function might be superficially meaningless, but it helps us to
280 * write templated SIMD/non-SIMD code. For clarity it should not be used
283 static inline float round(float a
)
285 return std::round(a
);
288 /*! \brief Truncate float, i.e. round towards zero - common hardware instruction.
290 * \param a Any floating-point value
291 * \return Integer rounded towards zero, represented in floating-point format.
293 * \note This function might be superficially meaningless, but it helps us to
294 * write templated SIMD/non-SIMD code. For clarity it should not be used
297 static inline float trunc(float a
)
299 return std::trunc(a
);
302 /*! \brief Return sum of all elements in float variable (i.e., the variable itself).
304 * \param a variable to reduce/sum.
305 * \return The argument variable itself.
307 * \note This function might be superficially meaningless, but it helps us to
308 * write templated SIMD/non-SIMD code. For clarity it should not be used
311 static inline float reduce(float a
)
316 /*! \brief Bitwise andnot for two scalar float variables.
320 * \return (~data1) & data2
322 * \note This function might be superficially meaningless, but it helps us to
323 * write templated SIMD/non-SIMD code. For clarity it should not be used
326 static inline float andNot(float a
, float b
)
336 conv1
.i
= (~conv1
.i
) & conv2
.i
;
341 /*! \brief Return true if any bits are set in the float variable.
343 * This function is used to handle bitmasks, mainly for exclusions in the
344 * inner kernels. Note that it will return true even for -0.0f (sign bit set),
345 * so it is not identical to not-equal.
348 * \return True if any bit in a is nonzero.
350 * \note This function might be superficially meaningless, but it helps us to
351 * write templated SIMD/non-SIMD code. For clarity it should not be used
354 static inline bool testBits(float a
)
362 return (conv
.i
!= 0);
365 /*! \brief Returns if the boolean is true.
367 * \param a Logical variable.
368 * \return true if a is true, otherwise false.
370 * \note This function might be superficially meaningless, but it helps us to
371 * write templated SIMD/non-SIMD code. For clarity it should not be used
374 static inline bool anyTrue(bool a
)
379 /*! \brief Select from single precision variable where boolean is true.
381 * \param a Floating-point variable to select from
382 * \param mask Boolean selector
383 * \return a is selected for true, 0 for false.
385 * \note This function might be superficially meaningless, but it helps us to
386 * write templated SIMD/non-SIMD code. For clarity it should not be used
389 static inline float selectByMask(float a
, bool mask
)
391 return mask
? a
: 0.0F
;
394 /*! \brief Select from single precision variable where boolean is false.
396 * \param a Floating-point variable to select from
397 * \param mask Boolean selector
398 * \return a is selected for false, 0 for true.
400 * \note This function might be superficially meaningless, but it helps us to
401 * write templated SIMD/non-SIMD code. For clarity it should not be used
404 static inline float selectByNotMask(float a
, bool mask
)
406 return mask
? 0.0F
: a
;
409 /*! \brief Blend float selection.
411 * \param a First source
412 * \param b Second source
413 * \param sel Boolean selector
414 * \return Select b if sel is true, a otherwise.
416 * \note This function might be superficially meaningless, but it helps us to
417 * write templated SIMD/non-SIMD code. For clarity it should not be used
420 static inline float blend(float a
, float b
, bool sel
)
425 /*! \brief Round single precision floating point to integer.
428 * \return Integer format, a rounded to nearest integer.
430 * \note This function might be superficially meaningless, but it helps us to
431 * write templated SIMD/non-SIMD code. For clarity it should not be used
434 static inline std::int32_t cvtR2I(float a
)
436 return static_cast<std::int32_t>(std::round(a
));
439 /*! \brief Truncate single precision floating point to integer.
442 * \return Integer format, a truncated to integer.
444 * \note This function might be superficially meaningless, but it helps us to
445 * write templated SIMD/non-SIMD code. For clarity it should not be used
448 static inline std::int32_t cvttR2I(float a
)
450 return static_cast<std::int32_t>(std::trunc(a
));
453 /*! \brief Return integer.
455 * This function mimicks the SIMD integer-to-real conversion routines. By
456 * simply returning an integer, we let the compiler sort out whether the
457 * conversion should be to float or double rather than using proxy objects.
460 * \return same value (a)
462 * \note This function might be superficially meaningless, but it helps us to
463 * write templated SIMD/non-SIMD code. For clarity it should not be used
466 static inline std::int32_t cvtI2R(std::int32_t a
)
471 /************************************************************************
472 * Double-precision floating point functions mimicking SIMD versions *
473 ************************************************************************/
475 /*! \brief Store contents of double variable to aligned memory m.
477 * \param[out] m Pointer to memory.
478 * \param a double variable to store
480 * \note This function might be superficially meaningless, but it helps us to
481 * write templated SIMD/non-SIMD code. For clarity it should not be used
484 static inline void store(double* m
, double a
)
489 /*! \brief Store contents of double variable to unaligned memory m.
491 * \param[out] m Pointer to memory, no alignment requirement.
492 * \param a double variable to store.
494 * \note This function might be superficially meaningless, but it helps us to
495 * write templated SIMD/non-SIMD code. For clarity it should not be used
498 static inline void storeU(double* m
, double a
)
503 // We cannot overload the logical operators and, or, andNot, xor for
506 /*! \brief double Fused-multiply-add. Result is a*b + c.
513 * \note This function might be superficially meaningless, but it helps us to
514 * write templated SIMD/non-SIMD code. For clarity it should not be used
517 static inline double fma(double a
, double b
, double c
)
519 // Note that we purposely do not use the single-rounding std::fma
520 // as that can be very slow without hardware support
524 /*! \brief double Fused-multiply-subtract. Result is a*b - c.
531 * \note This function might be superficially meaningless, but it helps us to
532 * write templated SIMD/non-SIMD code. For clarity it should not be used
535 static inline double fms(double a
, double b
, double c
)
540 /*! \brief double Fused-negated-multiply-add. Result is - a*b + c.
547 * \note This function might be superficially meaningless, but it helps us to
548 * write templated SIMD/non-SIMD code. For clarity it should not be used
551 static inline double fnma(double a
, double b
, double c
)
556 /*! \brief double Fused-negated-multiply-subtract. Result is -a*b - c.
563 * \note This function might be superficially meaningless, but it helps us to
564 * write templated SIMD/non-SIMD code. For clarity it should not be used
567 static inline double fnms(double a
, double b
, double c
)
572 /*! \brief Add two double variables, masked version.
577 * \return a+b where mask is true, a otherwise.
579 * \note This function might be superficially meaningless, but it helps us to
580 * write templated SIMD/non-SIMD code. For clarity it should not be used
583 static inline double maskAdd(double a
, double b
, double m
)
585 return a
+ (m
!= 0.0 ? b
: 0.0);
588 /*! \brief Multiply two double variables, masked version.
593 * \return a*b where mask is true, 0.0 otherwise.
595 * \note This function might be superficially meaningless, but it helps us to
596 * write templated SIMD/non-SIMD code. For clarity it should not be used
599 static inline double maskzMul(double a
, double b
, double m
)
601 return m
!= 0.0 ? (a
* b
) : 0.0;
604 /*! \brief double fused multiply-add, masked version.
610 * \return a*b+c where mask is true, 0.0 otherwise.
612 * \note This function might be superficially meaningless, but it helps us to
613 * write templated SIMD/non-SIMD code. For clarity it should not be used
616 static inline double maskzFma(double a
, double b
, double c
, double m
)
618 return m
!= 0.0 ? (a
* b
+ c
) : 0.0;
621 /*! \brief Double 1.0/x, masked version.
623 * \param x Argument, x>0 for entries where mask is true.
625 * \return Approximation of 1/x. The result for masked-out entries will be 0.0.
627 * \note This function might be superficially meaningless, but it helps us to
628 * write templated SIMD/non-SIMD code. For clarity it should not be used
631 static inline double gmx_simdcall
maskzRcp(double x
, double m
)
633 return m
!= 0.0 ? 1.0 / x
: 0.0;
636 /*! \brief double doubleing-point abs().
638 * \param a any doubleing point values
639 * \return abs(a) for each element.
641 * \note This function might be superficially meaningless, but it helps us to
642 * write templated SIMD/non-SIMD code. For clarity it should not be used
645 static inline double abs(double a
)
650 /*! \brief Set each double element to the largest from two variables.
652 * \param a Any doubleing-point value
653 * \param b Any doubleing-point value
654 * \return max(a,b) for each element.
656 * \note This function might be superficially meaningless, but it helps us to
657 * write templated SIMD/non-SIMD code. For clarity it should not be used
660 static inline double max(double a
, double b
)
662 return std::max(a
, b
);
665 /*! \brief Set each double element to the smallest from two variables.
667 * \param a Any doubleing-point value
668 * \param b Any doubleing-point value
669 * \return min(a,b) for each element.
671 * \note This function might be superficially meaningless, but it helps us to
672 * write templated SIMD/non-SIMD code. For clarity it should not be used
675 static inline double min(double a
, double b
)
677 return std::min(a
, b
);
680 /*! \brief double round to nearest integer value (in doubleing-point format).
682 * \param a Any doubleing-point value
683 * \return The nearest integer, represented in doubleing-point format.
685 * \note This function might be superficially meaningless, but it helps us to
686 * write templated SIMD/non-SIMD code. For clarity it should not be used
689 static inline double round(double a
)
691 return std::round(a
);
694 /*! \brief Truncate double, i.e. round towards zero - common hardware instruction.
696 * \param a Any doubleing-point value
697 * \return Integer rounded towards zero, represented in doubleing-point format.
699 * \note This function might be superficially meaningless, but it helps us to
700 * write templated SIMD/non-SIMD code. For clarity it should not be used
703 static inline double trunc(double a
)
705 return std::trunc(a
);
708 /*! \brief Return sum of all elements in double variable (i.e., the variable itself).
710 * \param a variable to reduce/sum.
711 * \return The argument variable itself.
713 * \note This function might be superficially meaningless, but it helps us to
714 * write templated SIMD/non-SIMD code. For clarity it should not be used
717 static inline double reduce(double a
)
722 /*! \brief Bitwise andnot for two scalar double variables.
726 * \return (~data1) & data2
728 * \note This function might be superficially meaningless, but it helps us to
729 * write templated SIMD/non-SIMD code. For clarity it should not be used
732 static inline double andNot(double a
, double b
)
742 conv1
.i
= (~conv1
.i
) & conv2
.i
;
747 /*! \brief Return true if any bits are set in the double variable.
749 * This function is used to handle bitmasks, mainly for exclusions in the
750 * inner kernels. Note that it will return true even for -0.0 (sign bit set),
751 * so it is not identical to not-equal.
754 * \return True if any bit in a is nonzero.
756 * \note This function might be superficially meaningless, but it helps us to
757 * write templated SIMD/non-SIMD code. For clarity it should not be used
760 static inline bool testBits(double a
)
768 return (conv
.i
!= 0);
771 /*! \brief Select from double precision variable where boolean is true.
773 * \param a double variable to select from
774 * \param mask Boolean selector
775 * \return a is selected for true, 0 for false.
777 * \note This function might be superficially meaningless, but it helps us to
778 * write templated SIMD/non-SIMD code. For clarity it should not be used
781 static inline double selectByMask(double a
, bool mask
)
783 return mask
? a
: 0.0;
786 /*! \brief Select from double precision variable where boolean is false.
788 * \param a double variable to select from
789 * \param mask Boolean selector
790 * \return a is selected for false, 0 for true.
792 * \note This function might be superficially meaningless, but it helps us to
793 * write templated SIMD/non-SIMD code. For clarity it should not be used
796 static inline double selectByNotMask(double a
, bool mask
)
798 return mask
? 0.0 : a
;
801 /*! \brief Blend double selection.
803 * \param a First source
804 * \param b Second source
805 * \param sel Boolean selector
806 * \return Select b if sel is true, a otherwise.
808 * \note This function might be superficially meaningless, but it helps us to
809 * write templated SIMD/non-SIMD code. For clarity it should not be used
812 static inline double blend(double a
, double b
, bool sel
)
817 /*! \brief Round single precision doubleing point to integer.
820 * \return Integer format, a rounded to nearest integer.
822 * \note This function might be superficially meaningless, but it helps us to
823 * write templated SIMD/non-SIMD code. For clarity it should not be used
826 static inline std::int32_t cvtR2I(double a
)
828 return static_cast<std::int32_t>(std::round(a
));
831 /*! \brief Truncate single precision doubleing point to integer.
834 * \return Integer format, a truncated to integer.
836 * \note This function might be superficially meaningless, but it helps us to
837 * write templated SIMD/non-SIMD code. For clarity it should not be used
840 static inline std::int32_t cvttR2I(double a
)
842 return static_cast<std::int32_t>(std::trunc(a
));
845 // We do not have a separate cvtI2R for double, since that would require
846 // proxy objects. Instead, the float version returns an integer and lets the
847 // compiler sort out the conversion type.
850 /*! \brief Convert float to double (mimicks SIMD conversion)
853 * \return a, as double double
855 * \note This function might be superficially meaningless, but it helps us to
856 * write templated SIMD/non-SIMD code. For clarity it should not be used
859 static inline double cvtF2D(float a
)
864 /*! \brief Convert double to float (mimicks SIMD conversion)
867 * \return a, as float
869 * \note This function might be superficially meaningless, but it helps us to
870 * write templated SIMD/non-SIMD code. For clarity it should not be used
873 static inline float cvtD2F(double a
)
878 /************************************************
879 * Integer functions mimicking SIMD versions *
880 ************************************************/
882 /*! \brief Store contents of integer variable to aligned memory m.
884 * \param[out] m Pointer to memory.
885 * \param a integer variable to store
887 * \note This function might be superficially meaningless, but it helps us to
888 * write templated SIMD/non-SIMD code. For clarity it should not be used
891 static inline void store(std::int32_t* m
, std::int32_t a
)
896 /*! \brief Store contents of integer variable to unaligned memory m.
898 * \param[out] m Pointer to memory, no alignment requirement.
899 * \param a integer variable to store.
901 * \note This function might be superficially meaningless, but it helps us to
902 * write templated SIMD/non-SIMD code. For clarity it should not be used
905 static inline void storeU(std::int32_t* m
, std::int32_t a
)
910 /*! \brief Bitwise andnot for two scalar integer variables.
914 * \return (~data1) & data2
916 * \note This function might be superficially meaningless, but it helps us to
917 * write templated SIMD/non-SIMD code. For clarity it should not be used
920 static inline std::int32_t andNot(std::int32_t a
, std::int32_t b
)
925 /*! \brief Return true if any bits are set in the integer variable.
927 * This function is used to handle bitmasks, mainly for exclusions in the
931 * \return True if any bit in a is nonzero.
933 * \note This function might be superficially meaningless, but it helps us to
934 * write templated SIMD/non-SIMD code. For clarity it should not be used
937 static inline bool testBits(std::int32_t a
)
942 /*! \brief Select from integer variable where boolean is true.
944 * \param a Integer variable to select from
945 * \param mask Boolean selector
946 * \return a is selected for true, 0 for false.
948 * \note This function might be superficially meaningless, but it helps us to
949 * write templated SIMD/non-SIMD code. For clarity it should not be used
952 static inline std::int32_t selectByMask(std::int32_t a
, bool mask
)
957 /*! \brief Select from integer variable where boolean is false.
959 * \param a Integer variable to select from
960 * \param mask Boolean selector
961 * \return a is selected for false, 0 for true.
963 * \note This function might be superficially meaningless, but it helps us to
964 * write templated SIMD/non-SIMD code. For clarity it should not be used
967 static inline std::int32_t selectByNotMask(std::int32_t a
, bool mask
)
972 /*! \brief Blend integer selection.
974 * \param a First source
975 * \param b Second source
976 * \param sel Boolean selector
977 * \return Select b if sel is true, a otherwise.
979 * \note This function might be superficially meaningless, but it helps us to
980 * write templated SIMD/non-SIMD code. For clarity it should not be used
983 static inline std::int32_t blend(std::int32_t a
, std::int32_t b
, bool sel
)
988 /*! \brief Just return a boolean (mimicks SIMD real-to-int bool conversions)
991 * \return same boolean
993 * \note This function might be superficially meaningless, but it helps us to
994 * write templated SIMD/non-SIMD code. For clarity it should not be used
997 static inline bool cvtB2IB(bool a
)
1002 /*! \brief Just return a boolean (mimicks SIMD int-to-real bool conversions)
1005 * \return same boolean
1007 * \note This function might be superficially meaningless, but it helps us to
1008 * write templated SIMD/non-SIMD code. For clarity it should not be used
1009 * outside such code.
1011 static inline bool cvtIB2B(bool a
)
1019 #endif // GMX_SIMD_SCALAR_FLOAT_H