src/gromacs/simd/simd.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \libinternal
  37  * \defgroup module_simd SIMD intrinsics interface (simd)
  38  * \ingroup group_utilitymodules
  39  *
  40  * \brief Provides an architecture-independent way of doing SIMD coding.
  41  *
  42  * Overview of the SIMD implementation is provided in \ref page_simd.
  43  * The details are documented in gromacs/simd/simd.h and the reference
  44  * implementation impl_reference.h.
  45  *
  46  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  47  */
  48
  49 #ifndef GMX_SIMD_SIMD_H
  50 #define GMX_SIMD_SIMD_H
  51
  52 /*! \libinternal \file
  53  *
  54  * \brief Definitions, capabilities, and wrappers for SIMD module.
  55  *
  56  * The macros in this file are intended to be used for writing
  57  * architecture-independent SIMD intrinsics code.
  58  * To support a new architecture, adding a new sub-include with macros here
  59  * should be (nearly) all that is needed.
  60  *
  61  * The defines in this top-level file will set default Gromacs real precision
  62  * operations to either single or double precision based on whether
  63  * GMX_DOUBLE is 1. The actual implementation - including e.g.
  64  * conversion operations specifically between single and double - is documented
  65  * in impl_reference.h.
  66  *
  67  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  68  *
  69  * \inlibraryapi
  70  * \ingroup module_simd
  71  */
  72
  73 #include "config.h"
  74
  75 #include <cstddef>
  76 #include <cstdint>
  77
  78 #include <array>
  79
  80 #include "gromacs/utility/classhelpers.h"
  81 #include "gromacs/utility/real.h"
  82
  83 //! \cond libapi
  84
  85
  86 /*! \addtogroup module_simd
  87  * \{
  88  */
  89
  90
  91 /*! \name SIMD predefined macros to describe high-level capabilities
  92  *
  93  *  These macros are used to describe the features available in default
  94  *  Gromacs real precision. They are set from the lower-level implementation
  95  *  files that have macros describing single and double precision individually,
  96  *  as well as the implementation details.
  97  *  \{
  98  */
  99
 100 #if GMX_SIMD_X86_SSE2
 101 #    include "impl_x86_sse2/impl_x86_sse2.h"
 102 #elif GMX_SIMD_X86_SSE4_1
 103 #    include "impl_x86_sse4_1/impl_x86_sse4_1.h"
 104 #elif GMX_SIMD_X86_AVX_128_FMA
 105 #    include "impl_x86_avx_128_fma/impl_x86_avx_128_fma.h"
 106 #elif GMX_SIMD_X86_AVX_256
 107 #    include "impl_x86_avx_256/impl_x86_avx_256.h"
 108 #elif GMX_SIMD_X86_AVX2_256
 109 #    include "impl_x86_avx2_256/impl_x86_avx2_256.h"
 110 #elif GMX_SIMD_X86_AVX2_128
 111 #    include "impl_x86_avx2_128/impl_x86_avx2_128.h"
 112 #elif GMX_SIMD_X86_MIC
 113 #    include "impl_x86_mic/impl_x86_mic.h"
 114 #elif GMX_SIMD_X86_AVX_512
 115 #    include "impl_x86_avx_512/impl_x86_avx_512.h"
 116 #elif GMX_SIMD_X86_AVX_512_KNL
 117 #    include "impl_x86_avx_512_knl/impl_x86_avx_512_knl.h"
 118 #elif GMX_SIMD_ARM_NEON
 119 #    include "impl_arm_neon/impl_arm_neon.h"
 120 #elif GMX_SIMD_ARM_NEON_ASIMD
 121 #    include "impl_arm_neon_asimd/impl_arm_neon_asimd.h"
 122 #elif GMX_SIMD_IBM_QPX
 123 #    include "impl_ibm_qpx/impl_ibm_qpx.h"
 124 #elif GMX_SIMD_IBM_VMX
 125 #    include "impl_ibm_vmx/impl_ibm_vmx.h"
 126 #elif GMX_SIMD_IBM_VSX
 127 #    include "impl_ibm_vsx/impl_ibm_vsx.h"
 128 #elif (GMX_SIMD_REFERENCE || defined DOXYGEN)
 129 #    include "impl_reference/impl_reference.h" // Includes doxygen documentation
 130 #else
 131 #    include "impl_none/impl_none.h"
 132 #endif
 133
 134 // The scalar SIMD-mimicking functions are always included so we can use
 135 // templated functions even without SIMD support.
 136 #include "gromacs/simd/scalar/scalar.h"
 137 #include "gromacs/simd/scalar/scalar_math.h"
 138 #include "gromacs/simd/scalar/scalar_util.h"
 139
 140
 141 #if GMX_DOUBLE
 142 #    define GMX_SIMD_HAVE_REAL                                     GMX_SIMD_HAVE_DOUBLE
 143 #    define GMX_SIMD_REAL_WIDTH                                    GMX_SIMD_DOUBLE_WIDTH
 144 #    define GMX_SIMD_HAVE_INT32_EXTRACT                            GMX_SIMD_HAVE_DINT32_EXTRACT
 145 #    define GMX_SIMD_HAVE_INT32_LOGICAL                            GMX_SIMD_HAVE_DINT32_LOGICAL
 146 #    define GMX_SIMD_HAVE_INT32_ARITHMETICS                        GMX_SIMD_HAVE_DINT32_ARITHMETICS
 147 #    define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL    GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE
 148 #    define GMX_SIMD_HAVE_HSIMD_UTIL_REAL                          GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE
 149 #    define GMX_SIMD4_HAVE_REAL                                    GMX_SIMD4_HAVE_DOUBLE
 150 #else // GMX_DOUBLE
 151
 152 /*! \brief 1 if SimdReal is available, otherwise 0.
 153  *
 154  *  \ref GMX_SIMD_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_HAVE_FLOAT.
 155  */
 156 #    define GMX_SIMD_HAVE_REAL               GMX_SIMD_HAVE_FLOAT
 157
 158 /*! \brief Width of SimdReal.
 159  *
 160  *  \ref GMX_SIMD_DOUBLE_WIDTH if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_FLOAT_WIDTH.
 161  */
 162 #    define GMX_SIMD_REAL_WIDTH              GMX_SIMD_FLOAT_WIDTH
 163
 164 /*! \brief 1 if support is available for extracting elements from SimdInt32, otherwise 0
 165  *
 166  *  \ref GMX_SIMD_HAVE_DINT32_EXTRACT if GMX_DOUBLE is 1, otherwise
 167  *  \ref GMX_SIMD_HAVE_FINT32_EXTRACT.
 168  */
 169 #    define GMX_SIMD_HAVE_INT32_EXTRACT      GMX_SIMD_HAVE_FINT32_EXTRACT
 170
 171 /*! \brief 1 if logical ops are supported on SimdInt32, otherwise 0.
 172  *
 173  *  \ref GMX_SIMD_HAVE_DINT32_LOGICAL if GMX_DOUBLE is 1, otherwise
 174  *  \ref GMX_SIMD_HAVE_FINT32_LOGICAL.
 175  */
 176 #    define GMX_SIMD_HAVE_INT32_LOGICAL      GMX_SIMD_HAVE_FINT32_LOGICAL
 177
 178 /*! \brief 1 if arithmetic ops are supported on SimdInt32, otherwise 0.
 179  *
 180  *  \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS if GMX_DOUBLE is 1, otherwise
 181  *  \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS.
 182  */
 183 #    define GMX_SIMD_HAVE_INT32_ARITHMETICS  GMX_SIMD_HAVE_FINT32_ARITHMETICS
 184
 185 /*! \brief 1 if gmx::simdGatherLoadUBySimdIntTranspose is present, otherwise 0
 186  *
 187  *  \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE if GMX_DOUBLE is 1, otherwise
 188  *  \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT.
 189  */
 190 #    define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL    GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT
 191
 192 /*! \brief 1 if real half-register load/store/reduce utils present, otherwise 0
 193  *
 194  *  \ref GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE if GMX_DOUBLE is 1, otherwise
 195  *  \ref GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT.
 196  */
 197 #    define GMX_SIMD_HAVE_HSIMD_UTIL_REAL    GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT
 198
 199 /*! \brief 1 if Simd4Real is available, otherwise 0.
 200  *
 201  *  \ref GMX_SIMD4_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD4_HAVE_FLOAT.
 202  */
 203 #    define GMX_SIMD4_HAVE_REAL              GMX_SIMD4_HAVE_FLOAT
 204
 205 #endif // GMX_DOUBLE
 206
 207 //! \}  end of name-group describing high-level capabilities
 208
 209 namespace gmx
 210 {
 211
 212 template<class T, size_t N>
 213 struct AlignedArray;
 214
 215 #if GMX_SIMD_HAVE_FLOAT
 216 /*! \libinternal \brief Identical to std::array with GMX_SIMD_FLOAT_WIDTH alignment.
 217  *  Should not be deleted through base pointer (destructor is non-virtual).
 218  */
 219 template<size_t N>
 220 struct alignas(GMX_SIMD_FLOAT_WIDTH*sizeof(float))AlignedArray<float, N> : public std::array<float, N>
 221 {
 222 };
 223 #endif
 224
 225 #if GMX_SIMD_HAVE_DOUBLE
 226 /*! \libinternal \brief  Identical to std::array with GMX_SIMD_DOUBLE_WIDTH alignment.
 227  *  Should not be deleted through base pointer (destructor is non-virtual).
 228  */
 229 template<size_t N>
 230 struct alignas(GMX_SIMD_DOUBLE_WIDTH*sizeof(double))AlignedArray<double, N> : public std::array<double, N>
 231 {
 232 };
 233 #endif
 234
 235 #if GMX_SIMD_HAVE_REAL
 236
 237 /*! \name SIMD data types
 238  *
 239  *  The actual storage of these types is implementation dependent. The
 240  *  documentation is generated from the reference implementation, but for
 241  *  normal usage this will likely not be what you are using.
 242  * \{
 243  */
 244
 245 /*! \brief Real precision floating-point SIMD datatype.
 246  *
 247  * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
 248  *
 249  * \ref SimdDouble if GMX_DOUBLE is 1, otherwise \ref SimdFloat.
 250  *
 251  * \note This variable cannot be placed inside other structures or classes, since
 252  *       some compilers (including at least clang-3.7) appear to lose the
 253  *       alignment. This is likely particularly severe when allocating such
 254  *       memory on the heap, but it occurs for stack structures too.
 255  */
 256 #    if GMX_DOUBLE
 257 typedef SimdDouble               SimdReal;
 258 #    else
 259 typedef SimdFloat                SimdReal;
 260 #    endif
 261
 262
 263 /*! \brief Boolean SIMD type for usage with \ref SimdReal.
 264  *
 265  * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
 266  *
 267  * If GMX_DOUBLE is 1, this will be set to \ref SimdDBool
 268  * internally, otherwise \ref SimdFBool. This is necessary since some
 269  * SIMD implementations use bitpatterns for marking truth, so single-
 270  * vs. double precision booleans are not necessarily exchangable.
 271  * As long as you just use this type you will not have to worry about precision.
 272  *
 273  * See \ref SimdIBool for an explanation of real vs. integer booleans.
 274  *
 275  * \note This variable cannot be placed inside other structures or classes, since
 276  *       some compilers (including at least clang-3.7) appear to lose the
 277  *       alignment. This is likely particularly severe when allocating such
 278  *       memory on the heap, but it occurs for stack structures too.
 279  */
 280 #    if GMX_DOUBLE
 281 typedef SimdDBool                SimdBool;
 282 #    else
 283 typedef SimdFBool                SimdBool;
 284 #    endif
 285
 286
 287 /*! \brief 32-bit integer SIMD type.
 288  *
 289  * If GMX_DOUBLE is 1, this will be set to \ref SimdDInt32
 290  * internally, otherwise \ref SimdFInt32. This might seem a strange
 291  * implementation detail, but it is because some SIMD implementations use
 292  * different types/widths of integers registers when converting from
 293  * double vs. single precision floating point. As long as you just use
 294  * this type you will not have to worry about precision.
 295  *
 296  * \note This variable cannot be placed inside other structures or classes, since
 297  *       some compilers (including at least clang-3.7) appear to lose the
 298  *       alignment. This is likely particularly severe when allocating such
 299  *       memory on the heap, but it occurs for stack structures too.
 300  */
 301 #    if GMX_DOUBLE
 302 typedef SimdDInt32               SimdInt32;
 303 #    else
 304 typedef SimdFInt32               SimdInt32;
 305 #    endif
 306
 307 #if GMX_SIMD_HAVE_INT32_ARITHMETICS
 308 /*! \brief Boolean SIMD type for usage with \ref SimdInt32.
 309  *
 310  * This type is only available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is 1.
 311  *
 312  * If GMX_DOUBLE is 1, this will be set to \ref SimdDIBool
 313  * internally, otherwise \ref SimdFIBool. This is necessary since some
 314  * SIMD implementations use bitpatterns for marking truth, so single-
 315  * vs. double precision booleans are not necessarily exchangable, and while
 316  * a double-precision boolean might be represented with a 64-bit mask, the
 317  * corresponding integer might only use a 32-bit mask.
 318  *
 319  * We provide conversion routines for these cases, so the only thing you need to
 320  * keep in mind is to use \ref SimdBool when working with
 321  * \ref SimdReal while you pick \ref SimdIBool when working with
 322  * \ref SimdInt32 .
 323  *
 324  * To convert between them, use \ref cvtB2IB and \ref cvtIB2B.
 325  *
 326  * \note This variable cannot be placed inside other structures or classes, since
 327  *       some compilers (including at least clang-3.7) appear to lose the
 328  *       alignment. This is likely particularly severe when allocating such
 329  *       memory on the heap, but it occurs for stack structures too.
 330  */
 331 #    if GMX_DOUBLE
 332 typedef SimdDIBool               SimdIBool;
 333 #    else
 334 typedef SimdFIBool               SimdIBool;
 335 #    endif
 336 #endif  // GMX_SIMD_HAVE_INT32_ARITHMETICS
 337
 338
 339 #if GMX_DOUBLE
 340 const int c_simdBestPairAlignment = c_simdBestPairAlignmentDouble;
 341 #else
 342 const int c_simdBestPairAlignment = c_simdBestPairAlignmentFloat;
 343 #endif
 344
 345 #endif  // GMX_SIMD_HAVE_REAL
 346
 347 #if GMX_SIMD4_HAVE_REAL
 348 /*! \brief Real precision floating-point SIMD4 datatype.
 349  *
 350  * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
 351  *
 352  * \ref Simd4Double if GMX_DOUBLE is 1, otherwise \ref Simd4Float.
 353  *
 354  * \note This variable cannot be placed inside other structures or classes, since
 355  *       some compilers (including at least clang-3.7) appear to lose the
 356  *       alignment. This is likely particularly severe when allocating such
 357  *       memory on the heap, but it occurs for stack structures too.
 358  */
 359 #    if GMX_DOUBLE
 360 typedef Simd4Double               Simd4Real;
 361 #    else
 362 typedef Simd4Float                Simd4Real;
 363 #    endif
 364
 365
 366 /*! \brief Boolean SIMD4 type for usage with \ref SimdReal.
 367  *
 368  * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
 369  *
 370  * If GMX_DOUBLE is 1, this will be set to \ref Simd4DBool
 371  * internally, otherwise \ref Simd4FBool. This is necessary since some
 372  * SIMD implementations use bitpatterns for marking truth, so single-
 373  * vs. double precision booleans are not necessarily exchangable.
 374  * As long as you just use this type you will not have to worry about precision.
 375  *
 376  * \note This variable cannot be placed inside other structures or classes, since
 377  *       some compilers (including at least clang-3.7) appear to lose the
 378  *       alignment. This is likely particularly severe when allocating such
 379  *       memory on the heap, but it occurs for stack structures too.
 380  */
 381 #    if GMX_DOUBLE
 382 typedef Simd4DBool                Simd4Bool;
 383 #    else
 384 typedef Simd4FBool                Simd4Bool;
 385 #    endif
 386 #endif // GMX_SIMD4_HAVE_REAL
 387
 388 //! \}  end of name-group describing SIMD data types
 389
 390 /*! \name High-level SIMD proxy objects to disambiguate load/set operations
 391  * \{
 392  */
 393
 394 class SimdLoadFProxyInternal;
 395
 396 static inline const SimdLoadFProxyInternal gmx_simdcall
 397 load(const float *m);
 398
 399 template <size_t N>
 400 static inline const SimdLoadFProxyInternal gmx_simdcall
 401 load(const AlignedArray<float, N> &m);
 402
 403 /*! \libinternal \brief Proxy object to enable load() for SIMD and float types
 404  *
 405  * This object is returned by the load() function that takes a single pointer
 406  * to a float. When the result is assigned to either SimdFloat or float,
 407  * the appropriate conversion method will be executed, which in turn calls
 408  * the correct low-level load function.
 409  * In pratice this simply means you can use load() regardless for both SIMD
 410  * and non-SIMD floating point data in templated functions.
 411  *
 412  * This is an internal class you should never touch or create objects of. The
 413  * only reason the constructor isn't private is that the load() function must
 414  * be static to enable aggressive inlining.
 415  */
 416 class SimdLoadFProxyInternal
 417 {
 418     public:
 419         //! \brief Conversion method that will execute load of scalar float
 420         operator float() const { return *m_; }
 421 #if GMX_SIMD_HAVE_FLOAT
 422         //! \brief Conversion method that will execute load of SimdFloat
 423         operator SimdFloat() const { return simdLoad(m_); }
 424 #endif
 425     private:
 426         //! \brief Private constructor can only be called from load()
 427         SimdLoadFProxyInternal(const float *m) : m_(m) {}
 428
 429         friend const SimdLoadFProxyInternal gmx_simdcall
 430         load(const float *m);
 431         template <size_t N>
 432         friend const SimdLoadFProxyInternal gmx_simdcall
 433         load(const AlignedArray<float, N> &m);
 434
 435         const float * const m_; //!< The pointer used to load memory
 436
 437         GMX_DISALLOW_COPY_AND_ASSIGN(SimdLoadFProxyInternal);
 438 };
 439
 440 /*! \brief Load function that returns proxy object for SimdFloat and float
 441  *
 442  * \param m Pointer to load memory
 443  * \return Proxy object that will call the actual load for either SimdFloat
 444  *         or float when you assign it and the conversion method is called.
 445  */
 446 static inline const SimdLoadFProxyInternal gmx_simdcall
 447 load(const float *m)
 448 {
 449     return {
 450                m
 451     };
 452 }
 453
 454 template <size_t N>
 455 static inline const SimdLoadFProxyInternal gmx_simdcall
 456 load(const AlignedArray<float, N> &m)
 457 {
 458     return {
 459                m.data()
 460     };
 461 }
 462
 463 class SimdLoadUFProxyInternal;
 464
 465 static inline const SimdLoadUFProxyInternal gmx_simdcall
 466 loadU(const float *m);
 467
 468 /*! \libinternal \brief Proxy object to enable loadU() for SIMD and float types
 469  *
 470  * This object is returned by the load() function that takes a single pointer
 471  * to a float. When the result is assigned to either SimdFloat or float,
 472  * the appropriate conversion method will be executed, which in turn calls
 473  * the correct low-level load function.
 474  * In pratice this simply means you can use load() regardless for both SIMD
 475  * and non-SIMD floating point data in templated functions.
 476  *
 477  * This is an internal class you should never touch or create objects of. The
 478  * only reason the constructor isn't private is that the load() function must
 479  * be static to enable aggressive inlining.
 480  */
 481 class SimdLoadUFProxyInternal
 482 {
 483     public:
 484         //! \brief Conversion method that will execute load of scalar float
 485         operator float() const { return *m_; }
 486 #if GMX_SIMD_HAVE_FLOAT && GMX_SIMD_HAVE_LOADU
 487         //! \brief Conversion method that will execute load of SimdFloat
 488         operator SimdFloat() const { return simdLoadU(m_); }
 489 #endif
 490     private:
 491         //! \brief Private constructor can only be called from load()
 492         SimdLoadUFProxyInternal(const float *m) : m_(m) {}
 493
 494         friend const SimdLoadUFProxyInternal gmx_simdcall
 495         loadU(const float *m);
 496
 497         const float * const m_; //!< The pointer used to load memory
 498
 499         GMX_DISALLOW_COPY_AND_ASSIGN(SimdLoadUFProxyInternal);
 500 };
 501
 502
 503 /*! \brief LoadU function that returns proxy object for SimdFloat and float
 504  *
 505  * \param m Pointer to loadU memory
 506  * \return Proxy object that will call the actual loadU for either SimdFloat
 507  *         or float when you assign it and the conversion method is called.
 508  */
 509 static inline const SimdLoadUFProxyInternal gmx_simdcall
 510 loadU(const float *m)
 511 {
 512     return {
 513                m
 514     };
 515 }
 516
 517 class SimdLoadDProxyInternal;
 518
 519 static inline const SimdLoadDProxyInternal gmx_simdcall
 520 load(const double *m);
 521
 522 template <size_t N>
 523 static inline const SimdLoadDProxyInternal gmx_simdcall
 524 load(const AlignedArray<double, N> &m);
 525
 526 /*! \libinternal \brief Proxy object to enable load() for SIMD and double types
 527  *
 528  * This object is returned by the load() function that takes a single pointer
 529  * to a double. When the result is assigned to either SimdDouble or double,
 530  * the appropriate conversion method will be executed, which in turn calls
 531  * the correct low-level load function.
 532  * In pratice this simply means you can use load() regardless for both SIMD
 533  * and non-SIMD floating point data in templated functions.
 534  *
 535  * This is an internal class you should never touch or create objects of. The
 536  * only reason the constructor isn't private is that the load() function must
 537  * be static to enable aggressive inlining.
 538  */
 539 class SimdLoadDProxyInternal
 540 {
 541     public:
 542         //! \brief Conversion method that will execute load of scalar double
 543         operator double() const { return *m_; }
 544 #if GMX_SIMD_HAVE_DOUBLE
 545         //! \brief Conversion method that will execute load of SimdDouble
 546         operator SimdDouble() const { return simdLoad(m_); }
 547 #endif
 548     private:
 549         //! \brief Private constructor can only be called from load()
 550         SimdLoadDProxyInternal(const double *m) : m_(m) {}
 551
 552         friend const SimdLoadDProxyInternal gmx_simdcall
 553         load(const double *m);
 554         template <size_t N>
 555         friend const SimdLoadDProxyInternal gmx_simdcall
 556         load(const AlignedArray<double, N> &m);
 557         const double * const m_; //!< The pointer used to load memory
 558
 559         GMX_DISALLOW_COPY_AND_ASSIGN(SimdLoadDProxyInternal);
 560 };
 561
 562 /*! \brief Load function that returns proxy object for SimdDouble and double
 563  *
 564  * \param m Pointer to load memory
 565  * \return Proxy object that will call the actual load for either SimdDouble
 566  *         or double when you assign it and the conversion method is called.
 567  */
 568 static inline const SimdLoadDProxyInternal gmx_simdcall
 569 load(const double *m)
 570 {
 571     return {
 572                m
 573     };
 574 }
 575
 576 template <size_t N>
 577 static inline const SimdLoadDProxyInternal gmx_simdcall
 578 load(const AlignedArray<double, N> &m)
 579 {
 580     return {
 581                m.data()
 582     };
 583 }
 584
 585 class SimdLoadUDProxyInternal;
 586
 587 static inline const SimdLoadUDProxyInternal gmx_simdcall
 588 loadU(const double *m);
 589
 590 /*! \libinternal \brief Proxy object to enable loadU() for SIMD and double types
 591  *
 592  * This object is returned by the load() function that takes a single pointer
 593  * to a double. When the result is assigned to either SimdDouble or double,
 594  * the appropriate conversion method will be executed, which in turn calls
 595  * the correct low-level load function.
 596  * In pratice this simply means you can use load() regardless for both SIMD
 597  * and non-SIMD floating point data in templated functions.
 598  *
 599  * This is an internal class you should never touch or create objects of. The
 600  * only reason the constructor isn't private is that the load() function must
 601  * be static to enable aggressive inlining.
 602  */
 603 class SimdLoadUDProxyInternal
 604 {
 605     public:
 606         //! \brief Conversion method that will execute load of scalar double
 607         operator double() const { return *m_; }
 608 #if GMX_SIMD_HAVE_DOUBLE && GMX_SIMD_HAVE_LOADU
 609         //! \brief Conversion method that will execute load of SimdDouble
 610         operator SimdDouble() const { return simdLoadU(m_); }
 611 #endif
 612     private:
 613         //! \brief Private constructor can only be called from load()
 614         SimdLoadUDProxyInternal(const double *m) : m_(m) {}
 615
 616         friend const SimdLoadUDProxyInternal gmx_simdcall
 617         loadU(const double *m);
 618
 619         const double * const m_; //!< The pointer used to load memory
 620
 621         GMX_DISALLOW_COPY_AND_ASSIGN(SimdLoadUDProxyInternal);
 622 };
 623
 624 /*! \brief Load function that returns proxy object for SimdDouble and double
 625  *
 626  * \param m Pointer to load memory
 627  * \return Proxy object that will call the actual load for either SimdDouble
 628  *         or double when you assign it and the conversion method is called.
 629  */
 630 static inline const SimdLoadUDProxyInternal gmx_simdcall
 631 loadU(const double *m)
 632 {
 633     return {
 634                m
 635     };
 636 }
 637
 638
 639 class SimdLoadIProxyInternal;
 640
 641 static inline const SimdLoadIProxyInternal gmx_simdcall
 642 load(const std::int32_t *m);
 643
 644 /*! \libinternal \brief Proxy object load() for SimdFInt32, SImdDInt32, and int32
 645  *
 646  * This object is returned by the load() function that takes a single pointer
 647  * to an integer. When the result is assigned to either SimdFInt32, SimdDInt32,
 648  * or std::int32_t, the appropriate conversion method will be executed, which in
 649  * turn calls the correct low-level load function.
 650  * In pratice this simply means you can use load() regardless of the type.
 651  *
 652  * This is an internal class you should never touch or create objects of. The
 653  * only reason the constructor isn't private is that the load() function must
 654  * be static to enable aggressive inlining.
 655  */
 656 class SimdLoadIProxyInternal
 657 {
 658     public:
 659         //! \brief Conversion method that will execute load of scalar int32
 660         operator std::int32_t() const { return *m_; }
 661 #if GMX_SIMD_HAVE_FLOAT
 662         //! \brief Conversion method that will execute load of SimdFInt32
 663         operator SimdFInt32() const { return simdLoadFI(m_); }
 664 #endif
 665 #if GMX_SIMD_HAVE_DOUBLE
 666         //! \brief Conversion method that will execute load of SimdDInt32
 667         operator SimdDInt32() const { return simdLoadDI(m_); }
 668 #endif
 669     private:
 670         //! \brief Private constructor can only be called from load()
 671         SimdLoadIProxyInternal(const std::int32_t *m) : m_(m) {}
 672
 673         friend const SimdLoadIProxyInternal gmx_simdcall
 674         load(const std::int32_t *m);
 675
 676         const std::int32_t * const m_; //!< The pointer used to load memory
 677
 678         GMX_DISALLOW_COPY_AND_ASSIGN(SimdLoadIProxyInternal);
 679 };
 680
 681 /*! \brief Integer load function (proxy object) for SimdFInt32, SImdDInt32, and int32.
 682  *
 683  * \param m Pointer to load memory
 684  * \return Proxy object that will call the actual load for either SimdFInt32
 685  *         or SimdDInt32 when you assign it and the conversion method is called.
 686  */
 687 static inline const SimdLoadIProxyInternal gmx_simdcall
 688 load(const std::int32_t *m)
 689 {
 690     return {
 691                m
 692     };
 693 }
 694
 695
 696 class SimdLoadUIProxyInternal;
 697
 698 static inline const SimdLoadUIProxyInternal gmx_simdcall
 699 loadU(const std::int32_t *m);
 700
 701 /*! \libinternal \brief Proxy object - loadU() for SimdFInt32, SImdDInt32, and int32
 702  *
 703  * \copydetails SimdLoadIProxyInternal
 704  */
 705 class SimdLoadUIProxyInternal
 706 {
 707     public:
 708         //! \brief Conversion method that will execute unaligned load of scalar int32
 709         operator std::int32_t() const { return *m_; }
 710 #if GMX_SIMD_HAVE_FLOAT && GMX_SIMD_HAVE_LOADU
 711         //!\brief Conversion method that will execute unaligned load of SimdFInt32
 712         operator SimdFInt32() const { return simdLoadUFI(m_); }
 713 #endif
 714 #if GMX_SIMD_HAVE_DOUBLE && GMX_SIMD_HAVE_LOADU
 715         //!\brief Conversion method that will execute unaligned load of SimdDInt32
 716         operator SimdDInt32() const { return simdLoadUDI(m_); }
 717 #endif
 718     private:
 719         //! \brief Private constructor can only be called from loadU()
 720         SimdLoadUIProxyInternal(const std::int32_t *m) : m_(m) {}
 721
 722         friend const SimdLoadUIProxyInternal gmx_simdcall
 723         loadU(const std::int32_t *m);
 724
 725         const std::int32_t * const m_; //!< The pointer used to load memory
 726
 727         GMX_DISALLOW_COPY_AND_ASSIGN(SimdLoadUIProxyInternal);
 728 };
 729
 730 /*! \brief Integer loadU function (proxy object) for SimdFInt32, SImdDInt32, and int32.
 731  *
 732  * \param m Pointer to load memory
 733  * \return Proxy object that will call the actual load for either SimdFInt32
 734  *         or SimdDInt32 when you assign it and the conversion method is called.
 735  */
 736 static inline const SimdLoadUIProxyInternal gmx_simdcall
 737 loadU(const std::int32_t *m)
 738 {
 739     return {
 740                m
 741     };
 742 }
 743
 744
 745 class SimdSetZeroProxyInternal;
 746
 747 static inline const SimdSetZeroProxyInternal gmx_simdcall
 748 setZero();
 749
 750 /*! \libinternal \brief Proxy object to enable setZero() for SIMD and real types.
 751  *
 752  * This object is returned by setZero(), and depending on what type you assign
 753  * the result to the conversion method will call the right low-level function.
 754  */
 755 class SimdSetZeroProxyInternal
 756 {
 757     public:
 758         //!\brief Conversion method that returns 0.0 as float
 759         operator float() const { return 0.0f; }
 760         //!\brief Conversion method that returns 0.0 as double
 761         operator double() const { return 0.0; }
 762         //!\brief Conversion method that returns 0.0 as int32
 763         operator std::int32_t() const { return 0; }
 764 #if GMX_SIMD_HAVE_FLOAT
 765         //!\brief Conversion method that will execute setZero() for SimdFloat
 766         operator SimdFloat() const { return setZeroF(); }
 767         //!\brief Conversion method that will execute setZero() for SimdFInt32
 768         operator SimdFInt32() const { return setZeroFI(); }
 769 #endif
 770 #if GMX_SIMD4_HAVE_FLOAT
 771         //!\brief Conversion method that will execute setZero() for Simd4Float
 772         operator Simd4Float() const { return simd4SetZeroF(); }
 773 #endif
 774 #if GMX_SIMD_HAVE_DOUBLE
 775         //!\brief Conversion method that will execute setZero() for SimdDouble
 776         operator SimdDouble() const { return setZeroD(); }
 777         //!\brief Conversion method that will execute setZero() for SimdDInt32
 778         operator SimdDInt32() const { return setZeroDI(); }
 779 #endif
 780 #if GMX_SIMD4_HAVE_DOUBLE
 781         //!\brief Conversion method that will execute setZero() for Simd4Double
 782         operator Simd4Double() const { return simd4SetZeroD(); }
 783 #endif
 784
 785     private:
 786         //! \brief Private constructor can only be called from setZero()
 787         SimdSetZeroProxyInternal() {}
 788
 789         friend const SimdSetZeroProxyInternal gmx_simdcall
 790         setZero();
 791
 792         GMX_DISALLOW_COPY_AND_ASSIGN(SimdSetZeroProxyInternal);
 793 };
 794
 795 /*! \brief Proxy object to set any SIMD or scalar variable to zero
 796  *
 797  * \return Proxy object that will call the actual function to set a SIMD/scalar
 798  *         variable to zero based on the conversion function called when you
 799  *         assign the result.
 800  */
 801 static inline const SimdSetZeroProxyInternal gmx_simdcall
 802 setZero()
 803 {
 804     return {};
 805 }
 806 //! \}  end of name-group proxy objects
 807
 808 }      // namespace gmx
 809
 810 // \}          end of module_simd
 811
 812 //! \endcond   end of condition libapi
 813
 814
 815 #if 0
 816 /* This is a hack to cover the corner case of using an
 817    explicit GMX_SIMD_HAVE_FLOAT or GMX_SIMD_HAVE_DOUBLE, rather than
 818    GMX_SIMD_HAVE_REAL.
 819
 820    Such code is expected to include simd.h to get those symbols
 821    defined, but the actual definitions are in the implemention headers
 822    included by simd.h. check-source.py is not a full preprocessor, so
 823    it does not see the definitions in the implementation headers as
 824    belonging to simd.h, thus it cannot check that simd.h is being used
 825    correctly in the above hypothetical corner case. However, the
 826    checker also does not parse #if 0, so we can fool the checker into
 827    thinking that definition occurs here, and that will work well
 828    enough.
 829
 830    If there's ever other kinds of SIMD code that might have the same
 831    problem, we might want to add other variables here.
 832  */
 833 #    define GMX_SIMD_HAVE_FLOAT         1
 834 #    define GMX_SIMD_HAVE_DOUBLE        1
 835
 836 #endif // end of hack
 837
 838 #endif // GMX_SIMD_SIMD_H