2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 #ifndef GMX_SIMD_SCALAR_H
36 #define GMX_SIMD_SCALAR_H
44 /*! \libinternal \file
46 * \brief Scalar float functions corresponding to GROMACS SIMD functions
48 * These versions make it possible to write functions that are templated with
49 * either a SIMD or scalar type. While some of these functions might not appear
50 * SIMD-specific, we have placed them here because the only reason to use these
51 * instead of generic function is in templated combined SIMD/non-SIMD code.
53 * There are a handful of limitations, in particular that it is impossible
54 * to overload the bitwise logical operators on built-in types.
56 * \author Erik Lindahl <erik.lindahl@gmail.com>
59 * \ingroup module_simd
65 /************************************************************************
66 * Single-precision floating point functions mimicking SIMD versions *
67 ************************************************************************/
69 /*! \brief Store contents of float variable to aligned memory m.
71 * \param[out] m Pointer to memory.
72 * \param a float variable to store
74 * \note This function might be superficially meaningless, but it helps us to
75 * write templated SIMD/non-SIMD code. For clarity it should not be used
79 store(float *m
, float a
)
84 /*! \brief Store contents of float variable to unaligned memory m.
86 * \param[out] m Pointer to memory, no alignment requirement.
87 * \param a float variable to store.
89 * \note This function might be superficially meaningless, but it helps us to
90 * write templated SIMD/non-SIMD code. For clarity it should not be used
94 storeU(float *m
, float a
)
99 // We cannot overload the logical operators and, or, andNot, xor for
102 /*! \brief Float Fused-multiply-add. Result is a*b + c.
109 * \note This function might be superficially meaningless, but it helps us to
110 * write templated SIMD/non-SIMD code. For clarity it should not be used
114 fma(float a
, float b
, float c
)
116 // Note that we purposely do not use the single-rounding std::fma
117 // as that can be very slow without hardware support
121 /*! \brief Float Fused-multiply-subtract. Result is a*b - c.
128 * \note This function might be superficially meaningless, but it helps us to
129 * write templated SIMD/non-SIMD code. For clarity it should not be used
133 fms(float a
, float b
, float c
)
138 /*! \brief Float Fused-negated-multiply-add. Result is -a*b + c.
145 * \note This function might be superficially meaningless, but it helps us to
146 * write templated SIMD/non-SIMD code. For clarity it should not be used
150 fnma(float a
, float b
, float c
)
155 /*! \brief Float Fused-negated-multiply-subtract. Result is -a*b - c.
162 * \note This function might be superficially meaningless, but it helps us to
163 * write templated SIMD/non-SIMD code. For clarity it should not be used
167 fnms(float a
, float b
, float c
)
172 /*! \brief Add two float variables, masked version.
177 * \return a+b where mask is true, a otherwise.
179 * \note This function might be superficially meaningless, but it helps us to
180 * write templated SIMD/non-SIMD code. For clarity it should not be used
184 maskAdd(float a
, float b
, float m
)
186 return a
+ (m
!= 0.0f
? b
: 0.0f
);
189 /*! \brief Multiply two float variables, masked version.
194 * \return a*b where mask is true, 0.0 otherwise.
196 * \note This function might be superficially meaningless, but it helps us to
197 * write templated SIMD/non-SIMD code. For clarity it should not be used
201 maskzMul(float a
, float b
, float m
)
203 return m
!= 0.0f
? (a
* b
) : 0.0f
;
206 /*! \brief Float fused multiply-add, masked version.
212 * \return a*b+c where mask is true, 0.0 otherwise.
214 * \note This function might be superficially meaningless, but it helps us to
215 * write templated SIMD/non-SIMD code. For clarity it should not be used
219 maskzFma(float a
, float b
, float c
, float m
)
221 return m
!= 0.0f
? (a
* b
+ c
) : 0.0f
;
224 /*! \brief Float Floating-point abs().
226 * \param a any floating point values
227 * \return abs(a) for each element.
229 * \note This function might be superficially meaningless, but it helps us to
230 * write templated SIMD/non-SIMD code. For clarity it should not be used
239 /*! \brief Set each float element to the largest from two variables.
241 * \param a Any floating-point value
242 * \param b Any floating-point value
243 * \return max(a,b) for each element.
245 * \note This function might be superficially meaningless, but it helps us to
246 * write templated SIMD/non-SIMD code. For clarity it should not be used
250 max(float a
, float b
)
252 return std::max(a
, b
);
255 /*! \brief Set each float element to the smallest from two variables.
257 * \param a Any floating-point value
258 * \param b Any floating-point value
259 * \return min(a,b) for each element.
261 * \note This function might be superficially meaningless, but it helps us to
262 * write templated SIMD/non-SIMD code. For clarity it should not be used
266 min(float a
, float b
)
268 return std::min(a
, b
);
271 /*! \brief Float round to nearest integer value (in floating-point format).
273 * \param a Any floating-point value
274 * \return The nearest integer, represented in floating-point format.
276 * \note This function might be superficially meaningless, but it helps us to
277 * write templated SIMD/non-SIMD code. For clarity it should not be used
283 return std::round(a
);
286 /*! \brief Truncate float, i.e. round towards zero - common hardware instruction.
288 * \param a Any floating-point value
289 * \return Integer rounded towards zero, represented in floating-point format.
291 * \note This function might be superficially meaningless, but it helps us to
292 * write templated SIMD/non-SIMD code. For clarity it should not be used
298 return std::trunc(a
);
301 /*! \brief Return sum of all elements in float variable (i.e., the variable itself).
303 * \param a variable to reduce/sum.
304 * \return The argument variable itself.
306 * \note This function might be superficially meaningless, but it helps us to
307 * write templated SIMD/non-SIMD code. For clarity it should not be used
316 /*! \brief Bitwise andnot for two scalar float variables.
320 * \return (~data1) & data2
322 * \note This function might be superficially meaningless, but it helps us to
323 * write templated SIMD/non-SIMD code. For clarity it should not be used
327 andNot(float a
, float b
)
338 conv1
.i
= (~conv1
.i
) & conv2
.i
;
343 /*! \brief Return true if any bits are set in the float variable.
345 * This function is used to handle bitmasks, mainly for exclusions in the
346 * inner kernels. Note that it will return true even for -0.0f (sign bit set),
347 * so it is not identical to not-equal.
350 * \return True if any bit in a is nonzero.
352 * \note This function might be superficially meaningless, but it helps us to
353 * write templated SIMD/non-SIMD code. For clarity it should not be used
366 return (conv
.i
!= 0);
369 /*! \brief Returns if the boolean is true.
371 * \param a Logical variable.
372 * \return true if a is true, otherwise false.
374 * \note This function might be superficially meaningless, but it helps us to
375 * write templated SIMD/non-SIMD code. For clarity it should not be used
384 /*! \brief Select from single precision variable where boolean is true.
386 * \param a Floating-point variable to select from
387 * \param mask Boolean selector
388 * \return a is selected for true, 0 for false.
390 * \note This function might be superficially meaningless, but it helps us to
391 * write templated SIMD/non-SIMD code. For clarity it should not be used
395 selectByMask(float a
, bool mask
)
397 return mask
? a
: 0.0f
;
400 /*! \brief Select from single precision variable where boolean is false.
402 * \param a Floating-point variable to select from
403 * \param mask Boolean selector
404 * \return a is selected for false, 0 for true.
406 * \note This function might be superficially meaningless, but it helps us to
407 * write templated SIMD/non-SIMD code. For clarity it should not be used
411 selectByNotMask(float a
, bool mask
)
413 return mask
? 0.0f
: a
;
416 /*! \brief Blend float selection.
418 * \param a First source
419 * \param b Second source
420 * \param sel Boolean selector
421 * \return Select b if sel is true, a otherwise.
423 * \note This function might be superficially meaningless, but it helps us to
424 * write templated SIMD/non-SIMD code. For clarity it should not be used
428 blend(float a
, float b
, bool sel
)
433 /*! \brief Round single precision floating point to integer.
436 * \return Integer format, a rounded to nearest integer.
438 * \note This function might be superficially meaningless, but it helps us to
439 * write templated SIMD/non-SIMD code. For clarity it should not be used
442 static inline std::int32_t
445 return static_cast<std::int32_t>(std::round(a
));
448 /*! \brief Truncate single precision floating point to integer.
451 * \return Integer format, a truncated to integer.
453 * \note This function might be superficially meaningless, but it helps us to
454 * write templated SIMD/non-SIMD code. For clarity it should not be used
457 static inline std::int32_t
460 return static_cast<std::int32_t>(std::trunc(a
));
463 /*! \brief Return integer.
465 * This function mimicks the SIMD integer-to-real conversion routines. By
466 * simply returning an integer, we let the compiler sort out whether the
467 * conversion should be to float or double rather than using proxy objects.
470 * \return same value (a)
472 * \note This function might be superficially meaningless, but it helps us to
473 * write templated SIMD/non-SIMD code. For clarity it should not be used
476 static inline std::int32_t
477 cvtI2R(std::int32_t a
)
482 /************************************************************************
483 * Double-precision floating point functions mimicking SIMD versions *
484 ************************************************************************/
486 /*! \brief Store contents of double variable to aligned memory m.
488 * \param[out] m Pointer to memory.
489 * \param a double variable to store
491 * \note This function might be superficially meaningless, but it helps us to
492 * write templated SIMD/non-SIMD code. For clarity it should not be used
496 store(double *m
, double a
)
501 /*! \brief Store contents of double variable to unaligned memory m.
503 * \param[out] m Pointer to memory, no alignment requirement.
504 * \param a double variable to store.
506 * \note This function might be superficially meaningless, but it helps us to
507 * write templated SIMD/non-SIMD code. For clarity it should not be used
511 storeU(double *m
, double a
)
516 // We cannot overload the logical operators and, or, andNot, xor for
519 /*! \brief double Fused-multiply-add. Result is a*b + c.
526 * \note This function might be superficially meaningless, but it helps us to
527 * write templated SIMD/non-SIMD code. For clarity it should not be used
531 fma(double a
, double b
, double c
)
533 // Note that we purposely do not use the single-rounding std::fma
534 // as that can be very slow without hardware support
538 /*! \brief double Fused-multiply-subtract. Result is a*b - c.
545 * \note This function might be superficially meaningless, but it helps us to
546 * write templated SIMD/non-SIMD code. For clarity it should not be used
550 fms(double a
, double b
, double c
)
555 /*! \brief double Fused-negated-multiply-add. Result is - a*b + c.
562 * \note This function might be superficially meaningless, but it helps us to
563 * write templated SIMD/non-SIMD code. For clarity it should not be used
567 fnma(double a
, double b
, double c
)
572 /*! \brief double Fused-negated-multiply-subtract. Result is -a*b - c.
579 * \note This function might be superficially meaningless, but it helps us to
580 * write templated SIMD/non-SIMD code. For clarity it should not be used
584 fnms(double a
, double b
, double c
)
589 /*! \brief Add two double variables, masked version.
594 * \return a+b where mask is true, a otherwise.
596 * \note This function might be superficially meaningless, but it helps us to
597 * write templated SIMD/non-SIMD code. For clarity it should not be used
601 maskAdd(double a
, double b
, double m
)
603 return a
+ (m
!= 0.0 ? b
: 0.0);
606 /*! \brief Multiply two double variables, masked version.
611 * \return a*b where mask is true, 0.0 otherwise.
613 * \note This function might be superficially meaningless, but it helps us to
614 * write templated SIMD/non-SIMD code. For clarity it should not be used
618 maskzMul(double a
, double b
, double m
)
620 return m
!= 0.0 ? (a
* b
) : 0.0;
623 /*! \brief double fused multiply-add, masked version.
629 * \return a*b+c where mask is true, 0.0 otherwise.
631 * \note This function might be superficially meaningless, but it helps us to
632 * write templated SIMD/non-SIMD code. For clarity it should not be used
636 maskzFma(double a
, double b
, double c
, double m
)
638 return m
!= 0.0 ? (a
* b
+ c
) : 0.0;
641 /*! \brief double doubleing-point abs().
643 * \param a any doubleing point values
644 * \return abs(a) for each element.
646 * \note This function might be superficially meaningless, but it helps us to
647 * write templated SIMD/non-SIMD code. For clarity it should not be used
656 /*! \brief Set each double element to the largest from two variables.
658 * \param a Any doubleing-point value
659 * \param b Any doubleing-point value
660 * \return max(a,b) for each element.
662 * \note This function might be superficially meaningless, but it helps us to
663 * write templated SIMD/non-SIMD code. For clarity it should not be used
667 max(double a
, double b
)
669 return std::max(a
, b
);
672 /*! \brief Set each double element to the smallest from two variables.
674 * \param a Any doubleing-point value
675 * \param b Any doubleing-point value
676 * \return min(a,b) for each element.
678 * \note This function might be superficially meaningless, but it helps us to
679 * write templated SIMD/non-SIMD code. For clarity it should not be used
683 min(double a
, double b
)
685 return std::min(a
, b
);
688 /*! \brief double round to nearest integer value (in doubleing-point format).
690 * \param a Any doubleing-point value
691 * \return The nearest integer, represented in doubleing-point format.
693 * \note This function might be superficially meaningless, but it helps us to
694 * write templated SIMD/non-SIMD code. For clarity it should not be used
700 return std::round(a
);
703 /*! \brief Truncate double, i.e. round towards zero - common hardware instruction.
705 * \param a Any doubleing-point value
706 * \return Integer rounded towards zero, represented in doubleing-point format.
708 * \note This function might be superficially meaningless, but it helps us to
709 * write templated SIMD/non-SIMD code. For clarity it should not be used
715 return std::trunc(a
);
718 /*! \brief Return sum of all elements in double variable (i.e., the variable itself).
720 * \param a variable to reduce/sum.
721 * \return The argument variable itself.
723 * \note This function might be superficially meaningless, but it helps us to
724 * write templated SIMD/non-SIMD code. For clarity it should not be used
733 /*! \brief Bitwise andnot for two scalar double variables.
737 * \return (~data1) & data2
739 * \note This function might be superficially meaningless, but it helps us to
740 * write templated SIMD/non-SIMD code. For clarity it should not be used
744 andNot(double a
, double b
)
755 conv1
.i
= (~conv1
.i
) & conv2
.i
;
760 /*! \brief Return true if any bits are set in the double variable.
762 * This function is used to handle bitmasks, mainly for exclusions in the
763 * inner kernels. Note that it will return true even for -0.0 (sign bit set),
764 * so it is not identical to not-equal.
767 * \return True if any bit in a is nonzero.
769 * \note This function might be superficially meaningless, but it helps us to
770 * write templated SIMD/non-SIMD code. For clarity it should not be used
783 return (conv
.i
!= 0);
786 /*! \brief Select from double precision variable where boolean is true.
788 * \param a double variable to select from
789 * \param mask Boolean selector
790 * \return a is selected for true, 0 for false.
792 * \note This function might be superficially meaningless, but it helps us to
793 * write templated SIMD/non-SIMD code. For clarity it should not be used
797 selectByMask(double a
, bool mask
)
799 return mask
? a
: 0.0;
802 /*! \brief Select from double precision variable where boolean is false.
804 * \param a double variable to select from
805 * \param mask Boolean selector
806 * \return a is selected for false, 0 for true.
808 * \note This function might be superficially meaningless, but it helps us to
809 * write templated SIMD/non-SIMD code. For clarity it should not be used
813 selectByNotMask(double a
, bool mask
)
815 return mask
? 0.0 : a
;
818 /*! \brief Blend double selection.
820 * \param a First source
821 * \param b Second source
822 * \param sel Boolean selector
823 * \return Select b if sel is true, a otherwise.
825 * \note This function might be superficially meaningless, but it helps us to
826 * write templated SIMD/non-SIMD code. For clarity it should not be used
830 blend(double a
, double b
, bool sel
)
835 /*! \brief Round single precision doubleing point to integer.
838 * \return Integer format, a rounded to nearest integer.
840 * \note This function might be superficially meaningless, but it helps us to
841 * write templated SIMD/non-SIMD code. For clarity it should not be used
844 static inline std::int32_t
847 return static_cast<std::int32_t>(std::round(a
));
850 /*! \brief Truncate single precision doubleing point to integer.
853 * \return Integer format, a truncated to integer.
855 * \note This function might be superficially meaningless, but it helps us to
856 * write templated SIMD/non-SIMD code. For clarity it should not be used
859 static inline std::int32_t
862 return static_cast<std::int32_t>(std::trunc(a
));
865 // We do not have a separate cvtI2R for double, since that would require
866 // proxy objects. Instead, the float version returns an integer and lets the
867 // compiler sort out the conversion type.
870 /*! \brief Convert float to double (mimicks SIMD conversion)
873 * \return a, as double double
875 * \note This function might be superficially meaningless, but it helps us to
876 * write templated SIMD/non-SIMD code. For clarity it should not be used
885 /*! \brief Convert double to float (mimicks SIMD conversion)
888 * \return a, as float
890 * \note This function might be superficially meaningless, but it helps us to
891 * write templated SIMD/non-SIMD code. For clarity it should not be used
900 /************************************************
901 * Integer functions mimicking SIMD versions *
902 ************************************************/
904 /*! \brief Store contents of integer variable to aligned memory m.
906 * \param[out] m Pointer to memory.
907 * \param a integer variable to store
909 * \note This function might be superficially meaningless, but it helps us to
910 * write templated SIMD/non-SIMD code. For clarity it should not be used
914 store(std::int32_t *m
, std::int32_t a
)
919 /*! \brief Store contents of integer variable to unaligned memory m.
921 * \param[out] m Pointer to memory, no alignment requirement.
922 * \param a integer variable to store.
924 * \note This function might be superficially meaningless, but it helps us to
925 * write templated SIMD/non-SIMD code. For clarity it should not be used
929 storeU(std::int32_t *m
, std::int32_t a
)
934 /*! \brief Bitwise andnot for two scalar integer variables.
938 * \return (~data1) & data2
940 * \note This function might be superficially meaningless, but it helps us to
941 * write templated SIMD/non-SIMD code. For clarity it should not be used
944 static inline std::int32_t
945 andNot(std::int32_t a
, std::int32_t b
)
950 /*! \brief Return true if any bits are set in the integer variable.
952 * This function is used to handle bitmasks, mainly for exclusions in the
956 * \return True if any bit in a is nonzero.
958 * \note This function might be superficially meaningless, but it helps us to
959 * write templated SIMD/non-SIMD code. For clarity it should not be used
963 testBits(std::int32_t a
)
968 /*! \brief Select from integer variable where boolean is true.
970 * \param a Integer variable to select from
971 * \param mask Boolean selector
972 * \return a is selected for true, 0 for false.
974 * \note This function might be superficially meaningless, but it helps us to
975 * write templated SIMD/non-SIMD code. For clarity it should not be used
978 static inline std::int32_t
979 selectByMask(std::int32_t a
, bool mask
)
984 /*! \brief Select from integer variable where boolean is false.
986 * \param a Integer variable to select from
987 * \param mask Boolean selector
988 * \return a is selected for false, 0 for true.
990 * \note This function might be superficially meaningless, but it helps us to
991 * write templated SIMD/non-SIMD code. For clarity it should not be used
994 static inline std::int32_t
995 selectByNotMask(std::int32_t a
, bool mask
)
1000 /*! \brief Blend integer selection.
1002 * \param a First source
1003 * \param b Second source
1004 * \param sel Boolean selector
1005 * \return Select b if sel is true, a otherwise.
1007 * \note This function might be superficially meaningless, but it helps us to
1008 * write templated SIMD/non-SIMD code. For clarity it should not be used
1009 * outside such code.
1011 static inline std::int32_t
1012 blend(std::int32_t a
, std::int32_t b
, bool sel
)
1017 /*! \brief Just return a boolean (mimicks SIMD real-to-int bool conversions)
1020 * \return same boolean
1022 * \note This function might be superficially meaningless, but it helps us to
1023 * write templated SIMD/non-SIMD code. For clarity it should not be used
1024 * outside such code.
1032 /*! \brief Just return a boolean (mimicks SIMD int-to-real bool conversions)
1035 * \return same boolean
1037 * \note This function might be superficially meaningless, but it helps us to
1038 * write templated SIMD/non-SIMD code. For clarity it should not be used
1039 * outside such code.
1050 #endif // GMX_SIMD_SCALAR_FLOAT_H