2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * \defgroup module_simd SIMD intrinsics interface (simd)
38 * \ingroup group_utilitymodules
40 * \brief Provides an architecture-independent way of doing SIMD coding.
42 * Overview of the SIMD implementation is provided in \ref page_simd.
43 * The details are documented in gromacs/simd/simd.h and the reference
44 * implementation impl_reference.h.
46 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
49 #ifndef GMX_SIMD_SIMD_H
50 #define GMX_SIMD_SIMD_H
52 /*! \libinternal \file
54 * \brief Definitions, capabilities, and wrappers for SIMD module.
56 * The macros in this file are intended to be used for writing
57 * architecture-independent SIMD intrinsics code.
58 * To support a new architecture, adding a new sub-include with macros here
59 * should be (nearly) all that is needed.
61 * The defines in this top-level file will set default Gromacs real precision
62 * operations to either single or double precision based on whether
63 * GMX_DOUBLE is 1. The actual implementation - including e.g.
64 * conversion operations specifically between single and double - is documented
65 * in impl_reference.h.
67 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
70 * \ingroup module_simd
80 #include "gromacs/utility/classhelpers.h"
81 #include "gromacs/utility/real.h"
86 /*! \addtogroup module_simd
92 /*! \libinternal \brief Tag type to select to load SimdFloat with simdLoad(U) */
93 struct SimdFloatTag
{};
94 /*! \libinternal \brief Tag type to select to load SimdDouble with simdLoad(U) */
95 struct SimdDoubleTag
{};
96 /*! \libinternal \brief Tag type to select to load SimdFInt32 with simdLoad(U) */
97 struct SimdFInt32Tag
{};
98 /*! \libinternal \brief Tag type to select to load SimdDInt32 with simdLoad(U) */
99 struct SimdDInt32Tag
{};
102 /*! \name SIMD predefined macros to describe high-level capabilities
104 * These macros are used to describe the features available in default
105 * Gromacs real precision. They are set from the lower-level implementation
106 * files that have macros describing single and double precision individually,
107 * as well as the implementation details.
112 #pragma clang diagnostic push
113 /* reinterpret_cast is used for SIMD->scalar conversion
115 * In general using reinterpret_cast for bit_cast is UB but
116 * for intrinsics types it works for all known compilers
117 * and not all compilers produce as good code for memcpy.
119 #pragma clang diagnostic ignored "-Wundefined-reinterpret-cast"
122 #if GMX_SIMD_X86_SSE2
123 # include "impl_x86_sse2/impl_x86_sse2.h"
124 #elif GMX_SIMD_X86_SSE4_1
125 # include "impl_x86_sse4_1/impl_x86_sse4_1.h"
126 #elif GMX_SIMD_X86_AVX_128_FMA
127 # include "impl_x86_avx_128_fma/impl_x86_avx_128_fma.h"
128 #elif GMX_SIMD_X86_AVX_256
129 # include "impl_x86_avx_256/impl_x86_avx_256.h"
130 #elif GMX_SIMD_X86_AVX2_256
131 # include "impl_x86_avx2_256/impl_x86_avx2_256.h"
132 #elif GMX_SIMD_X86_AVX2_128
133 # include "impl_x86_avx2_128/impl_x86_avx2_128.h"
134 #elif GMX_SIMD_X86_MIC
135 # include "impl_x86_mic/impl_x86_mic.h"
136 #elif GMX_SIMD_X86_AVX_512
137 # include "impl_x86_avx_512/impl_x86_avx_512.h"
138 #elif GMX_SIMD_X86_AVX_512_KNL
139 # include "impl_x86_avx_512_knl/impl_x86_avx_512_knl.h"
140 #elif GMX_SIMD_ARM_NEON
141 # include "impl_arm_neon/impl_arm_neon.h"
142 #elif GMX_SIMD_ARM_NEON_ASIMD
143 # include "impl_arm_neon_asimd/impl_arm_neon_asimd.h"
144 #elif GMX_SIMD_IBM_VMX
145 # include "impl_ibm_vmx/impl_ibm_vmx.h"
146 #elif GMX_SIMD_IBM_VSX
147 # include "impl_ibm_vsx/impl_ibm_vsx.h"
148 #elif (GMX_SIMD_REFERENCE || defined DOXYGEN)
149 # include "impl_reference/impl_reference.h" // Includes doxygen documentation
151 # include "impl_none/impl_none.h"
155 #pragma clang diagnostic pop
158 // The scalar SIMD-mimicking functions are always included so we can use
159 // templated functions even without SIMD support.
160 #include "gromacs/simd/scalar/scalar.h"
161 #include "gromacs/simd/scalar/scalar_math.h"
162 #include "gromacs/simd/scalar/scalar_util.h"
166 # define GMX_SIMD_HAVE_REAL GMX_SIMD_HAVE_DOUBLE
167 # define GMX_SIMD_REAL_WIDTH GMX_SIMD_DOUBLE_WIDTH
168 # define GMX_SIMD_HAVE_INT32_EXTRACT GMX_SIMD_HAVE_DINT32_EXTRACT
169 # define GMX_SIMD_HAVE_INT32_LOGICAL GMX_SIMD_HAVE_DINT32_LOGICAL
170 # define GMX_SIMD_HAVE_INT32_ARITHMETICS GMX_SIMD_HAVE_DINT32_ARITHMETICS
171 # define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE
172 # define GMX_SIMD_HAVE_HSIMD_UTIL_REAL GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE
173 # define GMX_SIMD4_HAVE_REAL GMX_SIMD4_HAVE_DOUBLE
176 /*! \brief 1 if SimdReal is available, otherwise 0.
178 * \ref GMX_SIMD_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_HAVE_FLOAT.
180 # define GMX_SIMD_HAVE_REAL GMX_SIMD_HAVE_FLOAT
182 /*! \brief Width of SimdReal.
184 * \ref GMX_SIMD_DOUBLE_WIDTH if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_FLOAT_WIDTH.
186 # define GMX_SIMD_REAL_WIDTH GMX_SIMD_FLOAT_WIDTH
188 /*! \brief 1 if support is available for extracting elements from SimdInt32, otherwise 0
190 * \ref GMX_SIMD_HAVE_DINT32_EXTRACT if GMX_DOUBLE is 1, otherwise
191 * \ref GMX_SIMD_HAVE_FINT32_EXTRACT.
193 # define GMX_SIMD_HAVE_INT32_EXTRACT GMX_SIMD_HAVE_FINT32_EXTRACT
195 /*! \brief 1 if logical ops are supported on SimdInt32, otherwise 0.
197 * \ref GMX_SIMD_HAVE_DINT32_LOGICAL if GMX_DOUBLE is 1, otherwise
198 * \ref GMX_SIMD_HAVE_FINT32_LOGICAL.
200 # define GMX_SIMD_HAVE_INT32_LOGICAL GMX_SIMD_HAVE_FINT32_LOGICAL
202 /*! \brief 1 if arithmetic ops are supported on SimdInt32, otherwise 0.
204 * \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS if GMX_DOUBLE is 1, otherwise
205 * \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS.
207 # define GMX_SIMD_HAVE_INT32_ARITHMETICS GMX_SIMD_HAVE_FINT32_ARITHMETICS
209 /*! \brief 1 if gmx::simdGatherLoadUBySimdIntTranspose is present, otherwise 0
211 * \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE if GMX_DOUBLE is 1, otherwise
212 * \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT.
214 # define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT
216 /*! \brief 1 if real half-register load/store/reduce utils present, otherwise 0
218 * \ref GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE if GMX_DOUBLE is 1, otherwise
219 * \ref GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT.
221 # define GMX_SIMD_HAVE_HSIMD_UTIL_REAL GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT
223 /*! \brief 1 if Simd4Real is available, otherwise 0.
225 * \ref GMX_SIMD4_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD4_HAVE_FLOAT.
227 # define GMX_SIMD4_HAVE_REAL GMX_SIMD4_HAVE_FLOAT
231 //! \} end of name-group describing high-level capabilities
236 template<class T
, size_t N
>
239 #if GMX_SIMD_HAVE_FLOAT
240 /*! \libinternal \brief Identical to std::array with GMX_SIMD_FLOAT_WIDTH alignment.
241 * Should not be deleted through base pointer (destructor is non-virtual).
244 struct alignas(GMX_SIMD_FLOAT_WIDTH
*sizeof(float))AlignedArray
<float, N
> : public std::array
<float, N
>
249 #if GMX_SIMD_HAVE_DOUBLE
250 /*! \libinternal \brief Identical to std::array with GMX_SIMD_DOUBLE_WIDTH alignment.
251 * Should not be deleted through base pointer (destructor is non-virtual).
254 struct alignas(GMX_SIMD_DOUBLE_WIDTH
*sizeof(double))AlignedArray
<double, N
> : public std::array
<double, N
>
259 #if GMX_SIMD_HAVE_REAL
261 /*! \name SIMD data types
263 * The actual storage of these types is implementation dependent. The
264 * documentation is generated from the reference implementation, but for
265 * normal usage this will likely not be what you are using.
269 /*! \brief Real precision floating-point SIMD datatype.
271 * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
273 * \ref SimdDouble if GMX_DOUBLE is 1, otherwise \ref SimdFloat.
275 * \note This variable cannot be placed inside other structures or classes, since
276 * some compilers (including at least clang-3.7) appear to lose the
277 * alignment. This is likely particularly severe when allocating such
278 * memory on the heap, but it occurs for stack structures too.
281 typedef SimdDouble SimdReal
;
283 typedef SimdFloat SimdReal
;
287 /*! \brief Boolean SIMD type for usage with \ref SimdReal.
289 * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
291 * If GMX_DOUBLE is 1, this will be set to \ref SimdDBool
292 * internally, otherwise \ref SimdFBool. This is necessary since some
293 * SIMD implementations use bitpatterns for marking truth, so single-
294 * vs. double precision booleans are not necessarily exchangable.
295 * As long as you just use this type you will not have to worry about precision.
297 * See \ref SimdIBool for an explanation of real vs. integer booleans.
299 * \note This variable cannot be placed inside other structures or classes, since
300 * some compilers (including at least clang-3.7) appear to lose the
301 * alignment. This is likely particularly severe when allocating such
302 * memory on the heap, but it occurs for stack structures too.
305 typedef SimdDBool SimdBool
;
307 typedef SimdFBool SimdBool
;
311 /*! \brief 32-bit integer SIMD type.
313 * If GMX_DOUBLE is 1, this will be set to \ref SimdDInt32
314 * internally, otherwise \ref SimdFInt32. This might seem a strange
315 * implementation detail, but it is because some SIMD implementations use
316 * different types/widths of integers registers when converting from
317 * double vs. single precision floating point. As long as you just use
318 * this type you will not have to worry about precision.
320 * \note This variable cannot be placed inside other structures or classes, since
321 * some compilers (including at least clang-3.7) appear to lose the
322 * alignment. This is likely particularly severe when allocating such
323 * memory on the heap, but it occurs for stack structures too.
326 typedef SimdDInt32 SimdInt32
;
328 typedef SimdFInt32 SimdInt32
;
331 #if GMX_SIMD_HAVE_INT32_ARITHMETICS
332 /*! \brief Boolean SIMD type for usage with \ref SimdInt32.
334 * This type is only available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is 1.
336 * If GMX_DOUBLE is 1, this will be set to \ref SimdDIBool
337 * internally, otherwise \ref SimdFIBool. This is necessary since some
338 * SIMD implementations use bitpatterns for marking truth, so single-
339 * vs. double precision booleans are not necessarily exchangable, and while
340 * a double-precision boolean might be represented with a 64-bit mask, the
341 * corresponding integer might only use a 32-bit mask.
343 * We provide conversion routines for these cases, so the only thing you need to
344 * keep in mind is to use \ref SimdBool when working with
345 * \ref SimdReal while you pick \ref SimdIBool when working with
348 * To convert between them, use \ref cvtB2IB and \ref cvtIB2B.
350 * \note This variable cannot be placed inside other structures or classes, since
351 * some compilers (including at least clang-3.7) appear to lose the
352 * alignment. This is likely particularly severe when allocating such
353 * memory on the heap, but it occurs for stack structures too.
356 typedef SimdDIBool SimdIBool
;
358 typedef SimdFIBool SimdIBool
;
360 #endif // GMX_SIMD_HAVE_INT32_ARITHMETICS
364 const int c_simdBestPairAlignment
= c_simdBestPairAlignmentDouble
;
366 const int c_simdBestPairAlignment
= c_simdBestPairAlignmentFloat
;
369 #endif // GMX_SIMD_HAVE_REAL
371 #if GMX_SIMD4_HAVE_REAL
372 /*! \brief Real precision floating-point SIMD4 datatype.
374 * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
376 * \ref Simd4Double if GMX_DOUBLE is 1, otherwise \ref Simd4Float.
378 * \note This variable cannot be placed inside other structures or classes, since
379 * some compilers (including at least clang-3.7) appear to lose the
380 * alignment. This is likely particularly severe when allocating such
381 * memory on the heap, but it occurs for stack structures too.
384 typedef Simd4Double Simd4Real
;
386 typedef Simd4Float Simd4Real
;
390 /*! \brief Boolean SIMD4 type for usage with \ref SimdReal.
392 * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
394 * If GMX_DOUBLE is 1, this will be set to \ref Simd4DBool
395 * internally, otherwise \ref Simd4FBool. This is necessary since some
396 * SIMD implementations use bitpatterns for marking truth, so single-
397 * vs. double precision booleans are not necessarily exchangable.
398 * As long as you just use this type you will not have to worry about precision.
400 * \note This variable cannot be placed inside other structures or classes, since
401 * some compilers (including at least clang-3.7) appear to lose the
402 * alignment. This is likely particularly severe when allocating such
403 * memory on the heap, but it occurs for stack structures too.
406 typedef Simd4DBool Simd4Bool
;
408 typedef Simd4FBool Simd4Bool
;
410 #endif // GMX_SIMD4_HAVE_REAL
412 //! \} end of name-group describing SIMD data types
414 /*! \name High-level SIMD proxy objects to disambiguate load/set operations
420 /*! \libinternal \brief Simd traits
422 * These traits are used to query data about SIMD types. Currently provided
423 * data is useful for SIMD loads (load function and helper classes for
424 * ArrayRef<> in simd_memory.h). Provided data:
425 * - type: scalar type corresponding to the SIMD type
426 * - width: SIMD width
427 * - tag: tag used for type dispatch of load function
430 struct SimdTraits
{};
432 #if GMX_SIMD_HAVE_FLOAT
434 struct SimdTraits
<SimdFloat
>
437 static constexpr int width
= GMX_SIMD_FLOAT_WIDTH
;
438 using tag
= SimdFloatTag
;
441 #if GMX_SIMD_HAVE_DOUBLE
443 struct SimdTraits
<SimdDouble
>
446 static constexpr int width
= GMX_SIMD_DOUBLE_WIDTH
;
447 using tag
= SimdDoubleTag
;
450 #if GMX_SIMD_HAVE_FLOAT
452 struct SimdTraits
<SimdFInt32
>
455 static constexpr int width
= GMX_SIMD_FINT32_WIDTH
;
456 using tag
= SimdFInt32Tag
;
459 #if GMX_SIMD_HAVE_DOUBLE
461 struct SimdTraits
<SimdDInt32
>
464 static constexpr int width
= GMX_SIMD_DINT32_WIDTH
;
465 using tag
= SimdDInt32Tag
;
470 struct SimdTraits
<const T
>
472 using type
= const typename SimdTraits
<T
>::type
;
473 static constexpr int width
= SimdTraits
<T
>::width
;
474 using tag
= typename SimdTraits
<T
>::tag
;
476 } //namespace internal
478 /*! \brief Load function that returns SIMD or scalar
480 * \tparam T Type to load (type is always mandatory)
481 * \param m Pointer to aligned memory
482 * \return Loaded value
486 load(const typename
internal::SimdTraits
<T
>::type
*m
) //disabled by SFINAE for non-SIMD types
488 return simdLoad(m
, typename
internal::SimdTraits
<T
>::tag());
493 /* the enable_if serves to prevent two different type of misuse:
494 * 1) load<SimdReal>(SimdReal*); should only be called on real* or int*
495 * 2) load(real*); template parameter is mandatory because otherwise ambiguity is
496 * created. The dependent type disables type deduction.
498 load(const std::enable_if_t
<std::is_arithmetic
<T
>::value
, T
> *m
)
503 template <typename T
, size_t N
>
504 static inline T gmx_simdcall
505 load(const AlignedArray
<typename
internal::SimdTraits
<T
>::type
, N
> &m
)
507 return simdLoad(m
.data(), typename
internal::SimdTraits
<T
>::tag());
510 /*! \brief Load function that returns SIMD or scalar based on template argument
512 * \tparam T Type to load (type is always mandatory)
513 * \param m Pointer to unaligned memory
514 * \return Loaded SimdFloat/Double/Int or basic scalar type
518 loadU(const typename
internal::SimdTraits
<T
>::type
*m
)
520 return simdLoadU(m
, typename
internal::SimdTraits
<T
>::tag());
525 loadU(const std::enable_if_t
<std::is_arithmetic
<T
>::value
, T
> *m
)
530 template <typename T
, size_t N
>
531 static inline T gmx_simdcall
532 loadU(const AlignedArray
<typename
internal::SimdTraits
<T
>::type
, N
> &m
)
534 return simdLoadU(m
.data(), typename
internal::SimdTraits
<T
>::tag());
537 /*! \libinternal \brief Proxy object to enable setZero() for SIMD and real types.
539 * This object is returned by setZero(), and depending on what type you assign
540 * the result to the conversion method will call the right low-level function.
542 class SimdSetZeroProxy
545 //!\brief Conversion method that returns 0.0 as float
546 operator float() const { return 0.0f
; }
547 //!\brief Conversion method that returns 0.0 as double
548 operator double() const { return 0.0; }
549 //!\brief Conversion method that returns 0.0 as int32
550 operator std::int32_t() const { return 0; }
551 #if GMX_SIMD_HAVE_FLOAT
552 //!\brief Conversion method that will execute setZero() for SimdFloat
553 operator SimdFloat() const { return setZeroF(); }
554 //!\brief Conversion method that will execute setZero() for SimdFInt32
555 operator SimdFInt32() const { return setZeroFI(); }
557 #if GMX_SIMD4_HAVE_FLOAT
558 //!\brief Conversion method that will execute setZero() for Simd4Float
559 operator Simd4Float() const { return simd4SetZeroF(); }
561 #if GMX_SIMD_HAVE_DOUBLE
562 //!\brief Conversion method that will execute setZero() for SimdDouble
563 operator SimdDouble() const { return setZeroD(); }
564 //!\brief Conversion method that will execute setZero() for SimdDInt32
565 operator SimdDInt32() const { return setZeroDI(); }
567 #if GMX_SIMD4_HAVE_DOUBLE
568 //!\brief Conversion method that will execute setZero() for Simd4Double
569 operator Simd4Double() const { return simd4SetZeroD(); }
573 /*! \brief Helper function to set any SIMD or scalar variable to zero
575 * \return Proxy object that will call the actual function to set a SIMD/scalar
576 * variable to zero based on the conversion function called when you
579 static inline const SimdSetZeroProxy gmx_simdcall
587 //TODO: Don't foward function but properly rename them and use proper traits
589 struct Simd4Traits
{};
591 #if GMX_SIMD4_HAVE_FLOAT
593 struct Simd4Traits
<Simd4Float
>
599 #if GMX_SIMD4_HAVE_DOUBLE
601 struct Simd4Traits
<Simd4Double
>
606 } //namespace internal
608 #if GMX_SIMD4_HAVE_REAL
610 T
load(const typename
internal::Simd4Traits
<T
>::type
* m
)
615 T
loadU(const typename
internal::Simd4Traits
<T
>::type
* m
)
621 /* Implement most of 4xn functions by forwarding them to other functions when possible.
622 * The functions forwarded here don't need to be implemented by each implementation.
623 * For width=4 all functions are forwarded and for width=8 all but loadU4NOffset are forwarded.
625 #if GMX_SIMD_HAVE_FLOAT
626 #if GMX_SIMD_FLOAT_WIDTH < 4
627 #define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT (GMX_SIMD_HAVE_LOADU && GMX_SIMD4_HAVE_FLOAT)
628 #elif GMX_SIMD_FLOAT_WIDTH == 4
629 #define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT GMX_SIMD_HAVE_LOADU
630 //For GMX_SIMD_FLOAT_WIDTH>4 it is the reponsibility of the implementation to set
631 //GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
634 #if GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
635 #if GMX_SIMD_FLOAT_WIDTH < 4
636 using Simd4NFloat
= Simd4Float
;
637 #define GMX_SIMD4N_FLOAT_WIDTH 4
639 using Simd4NFloat
= SimdFloat
;
640 #define GMX_SIMD4N_FLOAT_WIDTH GMX_SIMD_FLOAT_WIDTH
643 #if GMX_SIMD_FLOAT_WIDTH <= 4
644 static inline Simd4NFloat gmx_simdcall
645 loadUNDuplicate4(const float* f
)
647 return Simd4NFloat(*f
);
649 static inline Simd4NFloat gmx_simdcall
650 load4DuplicateN(const float* f
)
652 return load
<Simd4NFloat
>(f
);
654 static inline Simd4NFloat gmx_simdcall
655 loadU4NOffset(const float* f
, int)
657 return loadU
<Simd4NFloat
>(f
);
659 #elif GMX_SIMD_FLOAT_WIDTH == 8
660 static inline Simd4NFloat gmx_simdcall
661 loadUNDuplicate4(const float* f
)
663 return loadU1DualHsimd(f
);
665 static inline Simd4NFloat gmx_simdcall
666 load4DuplicateN(const float* f
)
668 return loadDuplicateHsimd(f
);
671 #endif //GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
672 #else //GMX_SIMD_HAVE_FLOAT
673 #define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT 0
676 #if GMX_SIMD_HAVE_DOUBLE
677 #if GMX_SIMD_DOUBLE_WIDTH < 4
678 #define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE (GMX_SIMD_HAVE_LOADU && GMX_SIMD4_HAVE_DOUBLE)
679 #elif GMX_SIMD_DOUBLE_WIDTH == 4
680 #define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE GMX_SIMD_HAVE_LOADU
681 //For GMX_SIMD_DOUBLE_WIDTH>4 it is the reponsibility of the implementation to set
682 //GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
685 #if GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
686 #if GMX_SIMD_DOUBLE_WIDTH < 4
687 using Simd4NDouble
= Simd4Double
;
688 #define GMX_SIMD4N_DOUBLE_WIDTH 4
690 using Simd4NDouble
= SimdDouble
;
691 #define GMX_SIMD4N_DOUBLE_WIDTH GMX_SIMD_DOUBLE_WIDTH
694 #if GMX_SIMD_DOUBLE_WIDTH <= 4
695 static inline Simd4NDouble gmx_simdcall
696 loadUNDuplicate4(const double* f
)
698 return Simd4NDouble(*f
);
700 static inline Simd4NDouble gmx_simdcall
701 load4DuplicateN(const double* f
)
703 return load
<Simd4NDouble
>(f
);
705 static inline Simd4NDouble gmx_simdcall
706 loadU4NOffset(const double* f
, int /*unused*/)
708 return loadU
<Simd4NDouble
>(f
);
710 #elif GMX_SIMD_DOUBLE_WIDTH == 8
711 static inline Simd4NDouble gmx_simdcall
712 loadUNDuplicate4(const double* f
)
714 return loadU1DualHsimd(f
);
716 static inline Simd4NDouble gmx_simdcall
717 load4DuplicateN(const double* f
)
719 return loadDuplicateHsimd(f
);
722 #endif //GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
723 #else //GMX_SIMD_HAVE_DOUBLE
724 #define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE 0
728 #define GMX_SIMD_HAVE_4NSIMD_UTIL_REAL GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
730 #define GMX_SIMD_HAVE_4NSIMD_UTIL_REAL GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
733 #if GMX_SIMD_HAVE_4NSIMD_UTIL_REAL
735 using Simd4NReal
= Simd4NDouble
;
736 #define GMX_SIMD4N_REAL_WIDTH GMX_SIMD4N_DOUBLE_WIDTH
738 using Simd4NReal
= Simd4NFloat
;
739 #define GMX_SIMD4N_REAL_WIDTH GMX_SIMD4N_FLOAT_WIDTH
743 //! \} end of name-group proxy objects
747 // \} end of module_simd
749 //! \endcond end of condition libapi
752 #if GMX_SIMD_HAVE_FLOAT
754 /*! \brief Returns whether a pointer to float is aligned to a SIMD boundary
756 * \param[in] ptr A pointer to a float
758 static inline bool isSimdAligned(const float *ptr
)
760 return reinterpret_cast<std::size_t>(ptr
) % (GMX_SIMD_FLOAT_WIDTH
*sizeof(float)) == 0;
763 #endif // GMX_SIMD_HAVE_FLOAT
765 #if GMX_SIMD_HAVE_DOUBLE
767 /*! \brief Returns whether a pointer to double is aligned to a SIMD boundary
769 * \param[in] ptr A pointer to a double
771 static inline bool isSimdAligned(const double *ptr
)
773 return reinterpret_cast<std::size_t>(ptr
) % (GMX_SIMD_DOUBLE_WIDTH
*sizeof(double)) == 0;
776 #endif // GMX_SIMD_HAVE_DOUBLE
779 #if GMX_SIMD_HAVE_REAL
780 #if GMX_SIMD_REAL_WIDTH > GMX_REAL_MAX_SIMD_WIDTH
781 #error "GMX_SIMD_REAL_WIDTH > GMX_REAL_MAX_SIMD_WIDTH: increase GMX_REAL_MAX_SIMD_WIDTH in real.h"
787 /* This is a hack to cover the corner case of using an
788 explicit GMX_SIMD_HAVE_FLOAT or GMX_SIMD_HAVE_DOUBLE, rather than
791 Such code is expected to include simd.h to get those symbols
792 defined, but the actual definitions are in the implemention headers
793 included by simd.h. check-source.py is not a full preprocessor, so
794 it does not see the definitions in the implementation headers as
795 belonging to simd.h, thus it cannot check that simd.h is being used
796 correctly in the above hypothetical corner case. However, the
797 checker also does not parse #if 0, so we can fool the checker into
798 thinking that definition occurs here, and that will work well
801 If there's ever other kinds of SIMD code that might have the same
802 problem, we might want to add other variables here.
804 # define GMX_SIMD_HAVE_FLOAT 1
805 # define GMX_SIMD_HAVE_DOUBLE 1
807 #endif // end of hack
809 // The ArrayRef<SimdReal> specialization is always included, because compiler
810 // errors are confusing when template specialization aren't available.
811 #include "gromacs/simd/simd_memory.h"
813 #endif // GMX_SIMD_SIMD_H