128-bit AVX2 SIMD for AMD Ryzen
[gromacs.git] / src / gromacs / simd / simd.h
blobf22022dd4db77ff9f0627cd6c1b2de2297ffe844
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \libinternal
37 * \defgroup module_simd SIMD intrinsics interface (simd)
38 * \ingroup group_utilitymodules
40 * \brief Provides an architecture-independent way of doing SIMD coding.
42 * Overview of the SIMD implementation is provided in \ref page_simd.
43 * The details are documented in gromacs/simd/simd.h and the reference
44 * implementation impl_reference.h.
46 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
49 #ifndef GMX_SIMD_SIMD_H
50 #define GMX_SIMD_SIMD_H
52 /*! \libinternal \file
54 * \brief Definitions, capabilities, and wrappers for SIMD module.
56 * The macros in this file are intended to be used for writing
57 * architecture-independent SIMD intrinsics code.
58 * To support a new architecture, adding a new sub-include with macros here
59 * should be (nearly) all that is needed.
61 * The defines in this top-level file will set default Gromacs real precision
62 * operations to either single or double precision based on whether
63 * GMX_DOUBLE is 1. The actual implementation - including e.g.
64 * conversion operations specifically between single and double - is documented
65 * in impl_reference.h.
67 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
69 * \inlibraryapi
70 * \ingroup module_simd
73 #include "config.h"
75 #include <cstddef>
76 #include <cstdint>
78 #include <array>
80 #include "gromacs/utility/classhelpers.h"
81 #include "gromacs/utility/real.h"
83 //! \cond libapi
86 /*! \addtogroup module_simd
87 * \{
91 /*! \name SIMD predefined macros to describe high-level capabilities
93 * These macros are used to describe the features available in default
94 * Gromacs real precision. They are set from the lower-level implementation
95 * files that have macros describing single and double precision individually,
96 * as well as the implementation details.
97 * \{
100 #if GMX_SIMD_X86_SSE2
101 # include "impl_x86_sse2/impl_x86_sse2.h"
102 #elif GMX_SIMD_X86_SSE4_1
103 # include "impl_x86_sse4_1/impl_x86_sse4_1.h"
104 #elif GMX_SIMD_X86_AVX_128_FMA
105 # include "impl_x86_avx_128_fma/impl_x86_avx_128_fma.h"
106 #elif GMX_SIMD_X86_AVX_256
107 # include "impl_x86_avx_256/impl_x86_avx_256.h"
108 #elif GMX_SIMD_X86_AVX2_256
109 # include "impl_x86_avx2_256/impl_x86_avx2_256.h"
110 #elif GMX_SIMD_X86_AVX2_128
111 # include "impl_x86_avx2_128/impl_x86_avx2_128.h"
112 #elif GMX_SIMD_X86_MIC
113 # include "impl_x86_mic/impl_x86_mic.h"
114 #elif GMX_SIMD_X86_AVX_512
115 # include "impl_x86_avx_512/impl_x86_avx_512.h"
116 #elif GMX_SIMD_X86_AVX_512_KNL
117 # include "impl_x86_avx_512_knl/impl_x86_avx_512_knl.h"
118 #elif GMX_SIMD_ARM_NEON
119 # include "impl_arm_neon/impl_arm_neon.h"
120 #elif GMX_SIMD_ARM_NEON_ASIMD
121 # include "impl_arm_neon_asimd/impl_arm_neon_asimd.h"
122 #elif GMX_SIMD_IBM_QPX
123 # include "impl_ibm_qpx/impl_ibm_qpx.h"
124 #elif GMX_SIMD_IBM_VMX
125 # include "impl_ibm_vmx/impl_ibm_vmx.h"
126 #elif GMX_SIMD_IBM_VSX
127 # include "impl_ibm_vsx/impl_ibm_vsx.h"
128 #elif (GMX_SIMD_REFERENCE || defined DOXYGEN)
129 # include "impl_reference/impl_reference.h" // Includes doxygen documentation
130 #else
131 # include "impl_none/impl_none.h"
132 #endif
134 // The scalar SIMD-mimicking functions are always included so we can use
135 // templated functions even without SIMD support.
136 #include "gromacs/simd/scalar/scalar.h"
137 #include "gromacs/simd/scalar/scalar_math.h"
138 #include "gromacs/simd/scalar/scalar_util.h"
141 #if GMX_DOUBLE
142 # define GMX_SIMD_HAVE_REAL GMX_SIMD_HAVE_DOUBLE
143 # define GMX_SIMD_REAL_WIDTH GMX_SIMD_DOUBLE_WIDTH
144 # define GMX_SIMD_HAVE_INT32_EXTRACT GMX_SIMD_HAVE_DINT32_EXTRACT
145 # define GMX_SIMD_HAVE_INT32_LOGICAL GMX_SIMD_HAVE_DINT32_LOGICAL
146 # define GMX_SIMD_HAVE_INT32_ARITHMETICS GMX_SIMD_HAVE_DINT32_ARITHMETICS
147 # define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE
148 # define GMX_SIMD_HAVE_HSIMD_UTIL_REAL GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE
149 # define GMX_SIMD4_HAVE_REAL GMX_SIMD4_HAVE_DOUBLE
150 #else // GMX_DOUBLE
152 /*! \brief 1 if SimdReal is available, otherwise 0.
154 * \ref GMX_SIMD_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_HAVE_FLOAT.
156 # define GMX_SIMD_HAVE_REAL GMX_SIMD_HAVE_FLOAT
158 /*! \brief Width of SimdReal.
160 * \ref GMX_SIMD_DOUBLE_WIDTH if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_FLOAT_WIDTH.
162 # define GMX_SIMD_REAL_WIDTH GMX_SIMD_FLOAT_WIDTH
164 /*! \brief 1 if support is available for extracting elements from SimdInt32, otherwise 0
166 * \ref GMX_SIMD_HAVE_DINT32_EXTRACT if GMX_DOUBLE is 1, otherwise
167 * \ref GMX_SIMD_HAVE_FINT32_EXTRACT.
169 # define GMX_SIMD_HAVE_INT32_EXTRACT GMX_SIMD_HAVE_FINT32_EXTRACT
171 /*! \brief 1 if logical ops are supported on SimdInt32, otherwise 0.
173 * \ref GMX_SIMD_HAVE_DINT32_LOGICAL if GMX_DOUBLE is 1, otherwise
174 * \ref GMX_SIMD_HAVE_FINT32_LOGICAL.
176 # define GMX_SIMD_HAVE_INT32_LOGICAL GMX_SIMD_HAVE_FINT32_LOGICAL
178 /*! \brief 1 if arithmetic ops are supported on SimdInt32, otherwise 0.
180 * \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS if GMX_DOUBLE is 1, otherwise
181 * \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS.
183 # define GMX_SIMD_HAVE_INT32_ARITHMETICS GMX_SIMD_HAVE_FINT32_ARITHMETICS
185 /*! \brief 1 if gmx::simdGatherLoadUBySimdIntTranspose is present, otherwise 0
187 * \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE if GMX_DOUBLE is 1, otherwise
188 * \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT.
190 # define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT
192 /*! \brief 1 if real half-register load/store/reduce utils present, otherwise 0
194 * \ref GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE if GMX_DOUBLE is 1, otherwise
195 * \ref GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT.
197 # define GMX_SIMD_HAVE_HSIMD_UTIL_REAL GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT
199 /*! \brief 1 if Simd4Real is available, otherwise 0.
201 * \ref GMX_SIMD4_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD4_HAVE_FLOAT.
203 # define GMX_SIMD4_HAVE_REAL GMX_SIMD4_HAVE_FLOAT
205 #endif // GMX_DOUBLE
207 //! \} end of name-group describing high-level capabilities
209 namespace gmx
212 template<class T, size_t N>
213 struct AlignedArray;
215 #if GMX_SIMD_HAVE_FLOAT
216 /*! \libinternal \brief Identical to std::array with GMX_SIMD_FLOAT_WIDTH alignment.
217 * Should not be deleted through base pointer (destructor is non-virtual).
219 template<size_t N>
220 struct alignas(GMX_SIMD_FLOAT_WIDTH*sizeof(float))AlignedArray<float, N> : public std::array<float, N>
223 #endif
225 #if GMX_SIMD_HAVE_DOUBLE
226 /*! \libinternal \brief Identical to std::array with GMX_SIMD_DOUBLE_WIDTH alignment.
227 * Should not be deleted through base pointer (destructor is non-virtual).
229 template<size_t N>
230 struct alignas(GMX_SIMD_DOUBLE_WIDTH*sizeof(double))AlignedArray<double, N> : public std::array<double, N>
233 #endif
235 #if GMX_SIMD_HAVE_REAL
237 /*! \name SIMD data types
239 * The actual storage of these types is implementation dependent. The
240 * documentation is generated from the reference implementation, but for
241 * normal usage this will likely not be what you are using.
242 * \{
245 /*! \brief Real precision floating-point SIMD datatype.
247 * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
249 * \ref SimdDouble if GMX_DOUBLE is 1, otherwise \ref SimdFloat.
251 * \note This variable cannot be placed inside other structures or classes, since
252 * some compilers (including at least clang-3.7) appear to lose the
253 * alignment. This is likely particularly severe when allocating such
254 * memory on the heap, but it occurs for stack structures too.
256 # if GMX_DOUBLE
257 typedef SimdDouble SimdReal;
258 # else
259 typedef SimdFloat SimdReal;
260 # endif
263 /*! \brief Boolean SIMD type for usage with \ref SimdReal.
265 * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
267 * If GMX_DOUBLE is 1, this will be set to \ref SimdDBool
268 * internally, otherwise \ref SimdFBool. This is necessary since some
269 * SIMD implementations use bitpatterns for marking truth, so single-
270 * vs. double precision booleans are not necessarily exchangable.
271 * As long as you just use this type you will not have to worry about precision.
273 * See \ref SimdIBool for an explanation of real vs. integer booleans.
275 * \note This variable cannot be placed inside other structures or classes, since
276 * some compilers (including at least clang-3.7) appear to lose the
277 * alignment. This is likely particularly severe when allocating such
278 * memory on the heap, but it occurs for stack structures too.
280 # if GMX_DOUBLE
281 typedef SimdDBool SimdBool;
282 # else
283 typedef SimdFBool SimdBool;
284 # endif
287 /*! \brief 32-bit integer SIMD type.
289 * If GMX_DOUBLE is 1, this will be set to \ref SimdDInt32
290 * internally, otherwise \ref SimdFInt32. This might seem a strange
291 * implementation detail, but it is because some SIMD implementations use
292 * different types/widths of integers registers when converting from
293 * double vs. single precision floating point. As long as you just use
294 * this type you will not have to worry about precision.
296 * \note This variable cannot be placed inside other structures or classes, since
297 * some compilers (including at least clang-3.7) appear to lose the
298 * alignment. This is likely particularly severe when allocating such
299 * memory on the heap, but it occurs for stack structures too.
301 # if GMX_DOUBLE
302 typedef SimdDInt32 SimdInt32;
303 # else
304 typedef SimdFInt32 SimdInt32;
305 # endif
307 #if GMX_SIMD_HAVE_INT32_ARITHMETICS
308 /*! \brief Boolean SIMD type for usage with \ref SimdInt32.
310 * This type is only available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is 1.
312 * If GMX_DOUBLE is 1, this will be set to \ref SimdDIBool
313 * internally, otherwise \ref SimdFIBool. This is necessary since some
314 * SIMD implementations use bitpatterns for marking truth, so single-
315 * vs. double precision booleans are not necessarily exchangable, and while
316 * a double-precision boolean might be represented with a 64-bit mask, the
317 * corresponding integer might only use a 32-bit mask.
319 * We provide conversion routines for these cases, so the only thing you need to
320 * keep in mind is to use \ref SimdBool when working with
321 * \ref SimdReal while you pick \ref SimdIBool when working with
322 * \ref SimdInt32 .
324 * To convert between them, use \ref cvtB2IB and \ref cvtIB2B.
326 * \note This variable cannot be placed inside other structures or classes, since
327 * some compilers (including at least clang-3.7) appear to lose the
328 * alignment. This is likely particularly severe when allocating such
329 * memory on the heap, but it occurs for stack structures too.
331 # if GMX_DOUBLE
332 typedef SimdDIBool SimdIBool;
333 # else
334 typedef SimdFIBool SimdIBool;
335 # endif
336 #endif // GMX_SIMD_HAVE_INT32_ARITHMETICS
339 #if GMX_DOUBLE
340 const int c_simdBestPairAlignment = c_simdBestPairAlignmentDouble;
341 #else
342 const int c_simdBestPairAlignment = c_simdBestPairAlignmentFloat;
343 #endif
345 #endif // GMX_SIMD_HAVE_REAL
347 #if GMX_SIMD4_HAVE_REAL
348 /*! \brief Real precision floating-point SIMD4 datatype.
350 * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
352 * \ref Simd4Double if GMX_DOUBLE is 1, otherwise \ref Simd4Float.
354 * \note This variable cannot be placed inside other structures or classes, since
355 * some compilers (including at least clang-3.7) appear to lose the
356 * alignment. This is likely particularly severe when allocating such
357 * memory on the heap, but it occurs for stack structures too.
359 # if GMX_DOUBLE
360 typedef Simd4Double Simd4Real;
361 # else
362 typedef Simd4Float Simd4Real;
363 # endif
366 /*! \brief Boolean SIMD4 type for usage with \ref SimdReal.
368 * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
370 * If GMX_DOUBLE is 1, this will be set to \ref Simd4DBool
371 * internally, otherwise \ref Simd4FBool. This is necessary since some
372 * SIMD implementations use bitpatterns for marking truth, so single-
373 * vs. double precision booleans are not necessarily exchangable.
374 * As long as you just use this type you will not have to worry about precision.
376 * \note This variable cannot be placed inside other structures or classes, since
377 * some compilers (including at least clang-3.7) appear to lose the
378 * alignment. This is likely particularly severe when allocating such
379 * memory on the heap, but it occurs for stack structures too.
381 # if GMX_DOUBLE
382 typedef Simd4DBool Simd4Bool;
383 # else
384 typedef Simd4FBool Simd4Bool;
385 # endif
386 #endif // GMX_SIMD4_HAVE_REAL
388 //! \} end of name-group describing SIMD data types
390 /*! \name High-level SIMD proxy objects to disambiguate load/set operations
391 * \{
394 class SimdLoadFProxyInternal;
396 static inline const SimdLoadFProxyInternal gmx_simdcall
397 load(const float *m);
399 template <size_t N>
400 static inline const SimdLoadFProxyInternal gmx_simdcall
401 load(const AlignedArray<float, N> &m);
403 /*! \libinternal \brief Proxy object to enable load() for SIMD and float types
405 * This object is returned by the load() function that takes a single pointer
406 * to a float. When the result is assigned to either SimdFloat or float,
407 * the appropriate conversion method will be executed, which in turn calls
408 * the correct low-level load function.
409 * In pratice this simply means you can use load() regardless for both SIMD
410 * and non-SIMD floating point data in templated functions.
412 * This is an internal class you should never touch or create objects of. The
413 * only reason the constructor isn't private is that the load() function must
414 * be static to enable aggressive inlining.
416 class SimdLoadFProxyInternal
418 public:
419 //! \brief Conversion method that will execute load of scalar float
420 operator float() const { return *m_; }
421 #if GMX_SIMD_HAVE_FLOAT
422 //! \brief Conversion method that will execute load of SimdFloat
423 operator SimdFloat() const { return simdLoad(m_); }
424 #endif
425 private:
426 //! \brief Private constructor can only be called from load()
427 SimdLoadFProxyInternal(const float *m) : m_(m) {}
429 friend const SimdLoadFProxyInternal gmx_simdcall
430 load(const float *m);
431 template <size_t N>
432 friend const SimdLoadFProxyInternal gmx_simdcall
433 load(const AlignedArray<float, N> &m);
435 const float * const m_; //!< The pointer used to load memory
437 GMX_DISALLOW_COPY_AND_ASSIGN(SimdLoadFProxyInternal);
440 /*! \brief Load function that returns proxy object for SimdFloat and float
442 * \param m Pointer to load memory
443 * \return Proxy object that will call the actual load for either SimdFloat
444 * or float when you assign it and the conversion method is called.
446 static inline const SimdLoadFProxyInternal gmx_simdcall
447 load(const float *m)
449 return {
454 template <size_t N>
455 static inline const SimdLoadFProxyInternal gmx_simdcall
456 load(const AlignedArray<float, N> &m)
458 return {
459 m.data()
463 class SimdLoadUFProxyInternal;
465 static inline const SimdLoadUFProxyInternal gmx_simdcall
466 loadU(const float *m);
468 /*! \libinternal \brief Proxy object to enable loadU() for SIMD and float types
470 * This object is returned by the load() function that takes a single pointer
471 * to a float. When the result is assigned to either SimdFloat or float,
472 * the appropriate conversion method will be executed, which in turn calls
473 * the correct low-level load function.
474 * In pratice this simply means you can use load() regardless for both SIMD
475 * and non-SIMD floating point data in templated functions.
477 * This is an internal class you should never touch or create objects of. The
478 * only reason the constructor isn't private is that the load() function must
479 * be static to enable aggressive inlining.
481 class SimdLoadUFProxyInternal
483 public:
484 //! \brief Conversion method that will execute load of scalar float
485 operator float() const { return *m_; }
486 #if GMX_SIMD_HAVE_FLOAT && GMX_SIMD_HAVE_LOADU
487 //! \brief Conversion method that will execute load of SimdFloat
488 operator SimdFloat() const { return simdLoadU(m_); }
489 #endif
490 private:
491 //! \brief Private constructor can only be called from load()
492 SimdLoadUFProxyInternal(const float *m) : m_(m) {}
494 friend const SimdLoadUFProxyInternal gmx_simdcall
495 loadU(const float *m);
497 const float * const m_; //!< The pointer used to load memory
499 GMX_DISALLOW_COPY_AND_ASSIGN(SimdLoadUFProxyInternal);
503 /*! \brief LoadU function that returns proxy object for SimdFloat and float
505 * \param m Pointer to loadU memory
506 * \return Proxy object that will call the actual loadU for either SimdFloat
507 * or float when you assign it and the conversion method is called.
509 static inline const SimdLoadUFProxyInternal gmx_simdcall
510 loadU(const float *m)
512 return {
517 class SimdLoadDProxyInternal;
519 static inline const SimdLoadDProxyInternal gmx_simdcall
520 load(const double *m);
522 template <size_t N>
523 static inline const SimdLoadDProxyInternal gmx_simdcall
524 load(const AlignedArray<double, N> &m);
526 /*! \libinternal \brief Proxy object to enable load() for SIMD and double types
528 * This object is returned by the load() function that takes a single pointer
529 * to a double. When the result is assigned to either SimdDouble or double,
530 * the appropriate conversion method will be executed, which in turn calls
531 * the correct low-level load function.
532 * In pratice this simply means you can use load() regardless for both SIMD
533 * and non-SIMD floating point data in templated functions.
535 * This is an internal class you should never touch or create objects of. The
536 * only reason the constructor isn't private is that the load() function must
537 * be static to enable aggressive inlining.
539 class SimdLoadDProxyInternal
541 public:
542 //! \brief Conversion method that will execute load of scalar double
543 operator double() const { return *m_; }
544 #if GMX_SIMD_HAVE_DOUBLE
545 //! \brief Conversion method that will execute load of SimdDouble
546 operator SimdDouble() const { return simdLoad(m_); }
547 #endif
548 private:
549 //! \brief Private constructor can only be called from load()
550 SimdLoadDProxyInternal(const double *m) : m_(m) {}
552 friend const SimdLoadDProxyInternal gmx_simdcall
553 load(const double *m);
554 template <size_t N>
555 friend const SimdLoadDProxyInternal gmx_simdcall
556 load(const AlignedArray<double, N> &m);
557 const double * const m_; //!< The pointer used to load memory
559 GMX_DISALLOW_COPY_AND_ASSIGN(SimdLoadDProxyInternal);
562 /*! \brief Load function that returns proxy object for SimdDouble and double
564 * \param m Pointer to load memory
565 * \return Proxy object that will call the actual load for either SimdDouble
566 * or double when you assign it and the conversion method is called.
568 static inline const SimdLoadDProxyInternal gmx_simdcall
569 load(const double *m)
571 return {
576 template <size_t N>
577 static inline const SimdLoadDProxyInternal gmx_simdcall
578 load(const AlignedArray<double, N> &m)
580 return {
581 m.data()
585 class SimdLoadUDProxyInternal;
587 static inline const SimdLoadUDProxyInternal gmx_simdcall
588 loadU(const double *m);
590 /*! \libinternal \brief Proxy object to enable loadU() for SIMD and double types
592 * This object is returned by the load() function that takes a single pointer
593 * to a double. When the result is assigned to either SimdDouble or double,
594 * the appropriate conversion method will be executed, which in turn calls
595 * the correct low-level load function.
596 * In pratice this simply means you can use load() regardless for both SIMD
597 * and non-SIMD floating point data in templated functions.
599 * This is an internal class you should never touch or create objects of. The
600 * only reason the constructor isn't private is that the load() function must
601 * be static to enable aggressive inlining.
603 class SimdLoadUDProxyInternal
605 public:
606 //! \brief Conversion method that will execute load of scalar double
607 operator double() const { return *m_; }
608 #if GMX_SIMD_HAVE_DOUBLE && GMX_SIMD_HAVE_LOADU
609 //! \brief Conversion method that will execute load of SimdDouble
610 operator SimdDouble() const { return simdLoadU(m_); }
611 #endif
612 private:
613 //! \brief Private constructor can only be called from load()
614 SimdLoadUDProxyInternal(const double *m) : m_(m) {}
616 friend const SimdLoadUDProxyInternal gmx_simdcall
617 loadU(const double *m);
619 const double * const m_; //!< The pointer used to load memory
621 GMX_DISALLOW_COPY_AND_ASSIGN(SimdLoadUDProxyInternal);
624 /*! \brief Load function that returns proxy object for SimdDouble and double
626 * \param m Pointer to load memory
627 * \return Proxy object that will call the actual load for either SimdDouble
628 * or double when you assign it and the conversion method is called.
630 static inline const SimdLoadUDProxyInternal gmx_simdcall
631 loadU(const double *m)
633 return {
639 class SimdLoadIProxyInternal;
641 static inline const SimdLoadIProxyInternal gmx_simdcall
642 load(const std::int32_t *m);
644 /*! \libinternal \brief Proxy object load() for SimdFInt32, SImdDInt32, and int32
646 * This object is returned by the load() function that takes a single pointer
647 * to an integer. When the result is assigned to either SimdFInt32, SimdDInt32,
648 * or std::int32_t, the appropriate conversion method will be executed, which in
649 * turn calls the correct low-level load function.
650 * In pratice this simply means you can use load() regardless of the type.
652 * This is an internal class you should never touch or create objects of. The
653 * only reason the constructor isn't private is that the load() function must
654 * be static to enable aggressive inlining.
656 class SimdLoadIProxyInternal
658 public:
659 //! \brief Conversion method that will execute load of scalar int32
660 operator std::int32_t() const { return *m_; }
661 #if GMX_SIMD_HAVE_FLOAT
662 //! \brief Conversion method that will execute load of SimdFInt32
663 operator SimdFInt32() const { return simdLoadFI(m_); }
664 #endif
665 #if GMX_SIMD_HAVE_DOUBLE
666 //! \brief Conversion method that will execute load of SimdDInt32
667 operator SimdDInt32() const { return simdLoadDI(m_); }
668 #endif
669 private:
670 //! \brief Private constructor can only be called from load()
671 SimdLoadIProxyInternal(const std::int32_t *m) : m_(m) {}
673 friend const SimdLoadIProxyInternal gmx_simdcall
674 load(const std::int32_t *m);
676 const std::int32_t * const m_; //!< The pointer used to load memory
678 GMX_DISALLOW_COPY_AND_ASSIGN(SimdLoadIProxyInternal);
681 /*! \brief Integer load function (proxy object) for SimdFInt32, SImdDInt32, and int32.
683 * \param m Pointer to load memory
684 * \return Proxy object that will call the actual load for either SimdFInt32
685 * or SimdDInt32 when you assign it and the conversion method is called.
687 static inline const SimdLoadIProxyInternal gmx_simdcall
688 load(const std::int32_t *m)
690 return {
696 class SimdLoadUIProxyInternal;
698 static inline const SimdLoadUIProxyInternal gmx_simdcall
699 loadU(const std::int32_t *m);
701 /*! \libinternal \brief Proxy object - loadU() for SimdFInt32, SImdDInt32, and int32
703 * \copydetails SimdLoadIProxyInternal
705 class SimdLoadUIProxyInternal
707 public:
708 //! \brief Conversion method that will execute unaligned load of scalar int32
709 operator std::int32_t() const { return *m_; }
710 #if GMX_SIMD_HAVE_FLOAT && GMX_SIMD_HAVE_LOADU
711 //!\brief Conversion method that will execute unaligned load of SimdFInt32
712 operator SimdFInt32() const { return simdLoadUFI(m_); }
713 #endif
714 #if GMX_SIMD_HAVE_DOUBLE && GMX_SIMD_HAVE_LOADU
715 //!\brief Conversion method that will execute unaligned load of SimdDInt32
716 operator SimdDInt32() const { return simdLoadUDI(m_); }
717 #endif
718 private:
719 //! \brief Private constructor can only be called from loadU()
720 SimdLoadUIProxyInternal(const std::int32_t *m) : m_(m) {}
722 friend const SimdLoadUIProxyInternal gmx_simdcall
723 loadU(const std::int32_t *m);
725 const std::int32_t * const m_; //!< The pointer used to load memory
727 GMX_DISALLOW_COPY_AND_ASSIGN(SimdLoadUIProxyInternal);
730 /*! \brief Integer loadU function (proxy object) for SimdFInt32, SImdDInt32, and int32.
732 * \param m Pointer to load memory
733 * \return Proxy object that will call the actual load for either SimdFInt32
734 * or SimdDInt32 when you assign it and the conversion method is called.
736 static inline const SimdLoadUIProxyInternal gmx_simdcall
737 loadU(const std::int32_t *m)
739 return {
745 class SimdSetZeroProxyInternal;
747 static inline const SimdSetZeroProxyInternal gmx_simdcall
748 setZero();
750 /*! \libinternal \brief Proxy object to enable setZero() for SIMD and real types.
752 * This object is returned by setZero(), and depending on what type you assign
753 * the result to the conversion method will call the right low-level function.
755 class SimdSetZeroProxyInternal
757 public:
758 //!\brief Conversion method that returns 0.0 as float
759 operator float() const { return 0.0f; }
760 //!\brief Conversion method that returns 0.0 as double
761 operator double() const { return 0.0; }
762 //!\brief Conversion method that returns 0.0 as int32
763 operator std::int32_t() const { return 0; }
764 #if GMX_SIMD_HAVE_FLOAT
765 //!\brief Conversion method that will execute setZero() for SimdFloat
766 operator SimdFloat() const { return setZeroF(); }
767 //!\brief Conversion method that will execute setZero() for SimdFInt32
768 operator SimdFInt32() const { return setZeroFI(); }
769 #endif
770 #if GMX_SIMD4_HAVE_FLOAT
771 //!\brief Conversion method that will execute setZero() for Simd4Float
772 operator Simd4Float() const { return simd4SetZeroF(); }
773 #endif
774 #if GMX_SIMD_HAVE_DOUBLE
775 //!\brief Conversion method that will execute setZero() for SimdDouble
776 operator SimdDouble() const { return setZeroD(); }
777 //!\brief Conversion method that will execute setZero() for SimdDInt32
778 operator SimdDInt32() const { return setZeroDI(); }
779 #endif
780 #if GMX_SIMD4_HAVE_DOUBLE
781 //!\brief Conversion method that will execute setZero() for Simd4Double
782 operator Simd4Double() const { return simd4SetZeroD(); }
783 #endif
785 private:
786 //! \brief Private constructor can only be called from setZero()
787 SimdSetZeroProxyInternal() {}
789 friend const SimdSetZeroProxyInternal gmx_simdcall
790 setZero();
792 GMX_DISALLOW_COPY_AND_ASSIGN(SimdSetZeroProxyInternal);
795 /*! \brief Proxy object to set any SIMD or scalar variable to zero
797 * \return Proxy object that will call the actual function to set a SIMD/scalar
798 * variable to zero based on the conversion function called when you
799 * assign the result.
801 static inline const SimdSetZeroProxyInternal gmx_simdcall
802 setZero()
804 return {};
806 //! \} end of name-group proxy objects
808 } // namespace gmx
810 // \} end of module_simd
812 //! \endcond end of condition libapi
815 #if 0
816 /* This is a hack to cover the corner case of using an
817 explicit GMX_SIMD_HAVE_FLOAT or GMX_SIMD_HAVE_DOUBLE, rather than
818 GMX_SIMD_HAVE_REAL.
820 Such code is expected to include simd.h to get those symbols
821 defined, but the actual definitions are in the implemention headers
822 included by simd.h. check-source.py is not a full preprocessor, so
823 it does not see the definitions in the implementation headers as
824 belonging to simd.h, thus it cannot check that simd.h is being used
825 correctly in the above hypothetical corner case. However, the
826 checker also does not parse #if 0, so we can fool the checker into
827 thinking that definition occurs here, and that will work well
828 enough.
830 If there's ever other kinds of SIMD code that might have the same
831 problem, we might want to add other variables here.
833 # define GMX_SIMD_HAVE_FLOAT 1
834 # define GMX_SIMD_HAVE_DOUBLE 1
836 #endif // end of hack
838 #endif // GMX_SIMD_SIMD_H