cmake/gmxManageSimd.cmake

   1 #
   2 # This file is part of the GROMACS molecular simulation package.
   3 #
   4 # Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
   5 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6 # and including many others, as listed in the AUTHORS file in the
   7 # top-level source directory and at http://www.gromacs.org.
   8 #
   9 # GROMACS is free software; you can redistribute it and/or
  10 # modify it under the terms of the GNU Lesser General Public License
  11 # as published by the Free Software Foundation; either version 2.1
  12 # of the License, or (at your option) any later version.
  13 #
  14 # GROMACS is distributed in the hope that it will be useful,
  15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 # Lesser General Public License for more details.
  18 #
  19 # You should have received a copy of the GNU Lesser General Public
  20 # License along with GROMACS; if not, see
  21 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23 #
  24 # If you want to redistribute modifications to GROMACS, please
  25 # consider that scientific software is very special. Version
  26 # control is crucial - bugs must be traceable. We will be happy to
  27 # consider code for inclusion in the official distribution, but
  28 # derived work must not be called official GROMACS. Details are found
  29 # in the README & COPYING files - if they are missing, get the
  30 # official version at http://www.gromacs.org.
  31 #
  32 # To help us fund GROMACS development, we humbly ask that you cite
  33 # the research papers on the package. Check out http://www.gromacs.org.
  34
  35 # include avx test source, used if the AVX flags are set below
  36 include(gmxTestAVXMaskload)
  37 include(gmxFindFlagsForSource)
  38
  39
  40 macro(gmx_use_clang_as_with_gnu_compilers_on_osx)
  41     # On OS X, we often want to use gcc instead of clang, since gcc supports
  42     # OpenMP. However, by default gcc uses the external system assembler, which
  43     # does not support AVX, so we need to tell the linker to use the clang
  44     # compilers assembler instead - and this has to happen before we detect AVX
  45     # flags.
  46     if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "GNU")
  47         gmx_test_cflag(GNU_C_USE_CLANG_AS "-Wa,-q" SIMD_C_FLAGS)
  48     endif()
  49     if(APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
  50         gmx_test_cxxflag(GNU_CXX_USE_CLANG_AS "-Wa,-q" SIMD_CXX_FLAGS)
  51     endif()
  52 endmacro()
  53
  54 # Issue a fatal error with an appropriate message, when the toolchain
  55 # was not able to compile code for SIMD support.
  56 #
  57 # Inputs:
  58 #  SIMD_STRING              A string describing the kind of SIMD support that didn't work.
  59 #  ALTERNATIVE_SUGGESTION   A string describing anything the user could try other than getting a new compiler.
  60 #  SUGGEST_BINUTILS_UPDATE  True when there's information that the compiler was OK, but something else was not.
  61 function(gmx_give_fatal_error_when_simd_support_not_found SIMD_STRING ALTERNATIVE_SUGGESTION SUGGEST_BINUTILS_UPDATE)
  62     if(SUGGEST_BINUTILS_UPDATE)
  63         set(_msg "Found a compiler flag for ${SIMD_STRING} support, but some other problem exists. Update your assembler and/or linker, e.g. in the binutils package of your distribution.")
  64     else()
  65         set(_msg "Cannot find ${SIMD_STRING} compiler flag. Use a newer compiler, or ${ALTERNATIVE_SUGGESTION}.")
  66     endif()
  67     message(FATAL_ERROR ${_msg})
  68 endfunction()
  69
  70 macro(gmx_manage_simd)
  71
  72 set(GMX_SIMD_ACCURACY_BITS_SINGLE 22 CACHE STRING "Target mantissa bits for SIMD single math")
  73 #
  74 # Note that we typically restrict double precision target accuracy to be twice that
  75 # of single. This means we only need one more N-R iteration for 1/sqrt(x) and 1(x),
  76 # and the first iteration can sometimes be done as a pair in single precision. This should
  77 # be plenty enough for Molecular Dynamics applications. Many of our double precision math
  78 # functions still achieve very close to full double precision, but we do not guarantee that
  79 # they will be able to achieve higher accuracy if you set this beyond 44 bits. GROMACS will
  80 # work - but some unit tests might fail.
  81 #
  82 set(GMX_SIMD_ACCURACY_BITS_DOUBLE 44 CACHE STRING "Target mantissa bits for SIMD double math")
  83 mark_as_advanced(GMX_SIMD_ACCURACY_BITS_SINGLE)
  84 mark_as_advanced(GMX_SIMD_ACCURACY_BITS_DOUBLE)
  85
  86 if(${GMX_SIMD_ACCURACY_BITS_SINGLE} GREATER 22)
  87     message(STATUS "Note: Full mantissa accuracy (including least significant bit) requested for SIMD single math. Presently we cannot get the least significant bit correct since that would require different algorithms - reducing to 22 bits.")
  88     set(GMX_SIMD_ACCURACY_BITS_SINGLE 22 CACHE STRING "Target mantissa bits for SIMD single math" FORCE)
  89 endif()
  90
  91 if(${GMX_SIMD_ACCURACY_BITS_DOUBLE} GREATER 51)
  92     message(STATUS "Note: Full mantissa accuracy (including least significant bit) requested for SIMD double math. Presently we cannot get the least significant bit correct since that would require different algorithms - reducing to 51 bits.")
  93     set(GMX_SIMD_ACCURACY_BITS_DOUBLE 51 CACHE STRING "Target mantissa bits for SIMD double math" FORCE)
  94 endif()
  95
  96 #
  97 # Section to set (and test) compiler flags for SIMD.
  98 #
  99 # The flags will be set based on the GMX_SIMD choice provided by the user.
 100 # Automatic detection of the architecture on the build host is done prior to
 101 # calling this macro.
 102 #
 103
 104 if(GMX_SIMD STREQUAL "NONE")
 105     # nothing to do configuration-wise
 106     set(SIMD_STATUS_MESSAGE "SIMD instructions disabled")
 107 elseif(GMX_SIMD STREQUAL "SSE2")
 108
 109     gmx_find_cflag_for_source(CFLAGS_SSE2
 110                               "#include<xmmintrin.h>
 111                               int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_rsqrt_ps(x);return _mm_movemask_ps(x);}"
 112                               SIMD_C_FLAGS
 113                               "-msse2" "/arch:SSE2" "-hgnu")
 114     gmx_find_cxxflag_for_source(CXXFLAGS_SSE2
 115                                 "#include<xmmintrin.h>
 116                                 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_rsqrt_ps(x);return _mm_movemask_ps(x);}"
 117                                 SIMD_CXX_FLAGS
 118                                 "-msse2" "/arch:SSE2" "-hgnu")
 119
 120     if(NOT CFLAGS_SSE2 OR NOT CXXFLAGS_SSE2)
 121         gmx_give_fatal_error_when_simd_support_not_found("SSE2" "disable SIMD support (slow)" "${SUGGEST_BINUTILS_UPDATE}")
 122     endif()
 123
 124     set(GMX_SIMD_X86_SSE2 1)
 125     set(SIMD_STATUS_MESSAGE "Enabling SSE2 SIMD instructions")
 126
 127 elseif(GMX_SIMD STREQUAL "SSE4.1")
 128
 129     # Note: MSVC enables SSE4.1 with the SSE2 flag, so we include that in testing.
 130     gmx_find_cflag_for_source(CFLAGS_SSE4_1
 131                               "#include<smmintrin.h>
 132                               int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_dp_ps(x,x,0x77);return _mm_movemask_ps(x);}"
 133                               SIMD_C_FLAGS
 134                               "-msse4.1" "/arch:SSE4.1" "/arch:SSE2" "-hgnu")
 135     gmx_find_cxxflag_for_source(CXXFLAGS_SSE4_1
 136                                 "#include<smmintrin.h>
 137                                 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_dp_ps(x,x,0x77);return _mm_movemask_ps(x);}"
 138                                 SIMD_CXX_FLAGS
 139                                 "-msse4.1" "/arch:SSE4.1" "/arch:SSE2" "-hgnu")
 140
 141     if(NOT CFLAGS_SSE4_1 OR NOT CXXFLAGS_SSE4_1)
 142         gmx_give_fatal_error_when_simd_support_not_found("SSE4.1" "choose SSE2 SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 143     endif()
 144
 145     if(CMAKE_C_COMPILER_ID MATCHES "Intel" AND CMAKE_C_COMPILER_VERSION VERSION_EQUAL "11.1")
 146         message(FATAL_ERROR "You are using Intel compiler version 11.1, which produces incorrect results with SSE4.1 SIMD. You need to use a newer compiler (e.g. icc >= 12.0) or in worst case try a lower level of SIMD if performance is not critical.")
 147     endif()
 148
 149     set(GMX_SIMD_X86_SSE4_1 1)
 150     set(SIMD_STATUS_MESSAGE "Enabling SSE4.1 SIMD instructions")
 151
 152 elseif(GMX_SIMD STREQUAL "AVX_128_FMA")
 153
 154     gmx_use_clang_as_with_gnu_compilers_on_osx()
 155
 156     # AVX128/FMA on AMD is a bit complicated. We need to do detection in three stages:
 157     # 1) Find the flags required for generic AVX support
 158     # 2) Find the flags necessary to enable fused-multiply add support
 159     # 3) Optional: Find a flag to enable the AMD XOP instructions
 160
 161     ### STAGE 1: Find the generic AVX flag
 162     gmx_find_cflag_for_source(CFLAGS_AVX_128
 163                               "#include<immintrin.h>
 164                               int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_permute_ps(x,1);return 0;}"
 165                               SIMD_C_FLAGS
 166                               "-mavx" "/arch:AVX" "-hgnu")
 167     gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128
 168                                 "#include<immintrin.h>
 169                                 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_permute_ps(x,1);return 0;}"
 170                                 SIMD_CXX_FLAGS
 171                                 "-mavx" "/arch:AVX" "-hgnu")
 172
 173     ### STAGE 2: Find the fused-multiply add flag.
 174     # GCC requires x86intrin.h for FMA support. MSVC 2010 requires intrin.h for FMA support.
 175     check_include_file(x86intrin.h HAVE_X86INTRIN_H ${SIMD_C_FLAGS})
 176     check_include_file(intrin.h HAVE_INTRIN_H ${SIMD_C_FLAGS})
 177     if(HAVE_X86INTRIN_H)
 178         set(INCLUDE_X86INTRIN_H "#include <x86intrin.h>")
 179     endif()
 180     if(HAVE_INTRIN_H)
 181         set(INCLUDE_INTRIN_H "#include <xintrin.h>")
 182     endif()
 183
 184     gmx_find_cflag_for_source(CFLAGS_AVX_128_FMA
 185 "#include<immintrin.h>
 186 ${INCLUDE_X86INTRIN_H}
 187 ${INCLUDE_INTRIN_H}
 188 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_macc_ps(x,x,x);return _mm_movemask_ps(x);}"
 189                               SIMD_C_FLAGS
 190                               "-mfma4" "-hgnu")
 191     gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128_FMA
 192 "#include<immintrin.h>
 193 ${INCLUDE_X86INTRIN_H}
 194 ${INCLUDE_INTRIN_H}
 195 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_macc_ps(x,x,x);return _mm_movemask_ps(x);}"
 196                                 SIMD_CXX_FLAGS
 197                                 "-mfma4" "-hgnu")
 198
 199     # We only need to check the last (FMA) test; that will always fail if the basic AVX128 test failed
 200     if(NOT CFLAGS_AVX_128_FMA OR NOT CXXFLAGS_AVX_128_FMA)
 201         gmx_give_fatal_error_when_simd_support_not_found("128-bit AVX with FMA support" "choose SSE4.1 SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 202     endif()
 203
 204     ### STAGE 3: Optional: Find the XOP instruction flag (No point in yelling if this does not work)
 205     gmx_find_cflag_for_source(CFLAGS_AVX_128_XOP
 206 "#include<immintrin.h>
 207 ${INCLUDE_X86INTRIN_H}
 208 ${INCLUDE_INTRIN_H}
 209 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_frcz_ps(x);return _mm_movemask_ps(x);}"
 210                               SIMD_C_FLAGS
 211                               "-mxop")
 212     gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128_XOP
 213 "#include<immintrin.h>
 214 ${INCLUDE_X86INTRIN_H}
 215 ${INCLUDE_INTRIN_H}
 216 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_frcz_ps(x);return _mm_movemask_ps(x);}"
 217                                 SIMD_CXX_FLAGS
 218                                 "-mxop")
 219
 220     # We don't have the full compiler version string yet (BUILD_C_COMPILER),
 221     # so we can't distinguish vanilla from Apple clang versions, but catering for a few rare AMD
 222     # hackintoshes is not worth the effort.
 223     if (APPLE AND (CMAKE_C_COMPILER_ID STREQUAL "Clang" OR
 224                 CMAKE_CXX_COMPILER_ID STREQUAL "Clang"))
 225         message(WARNING "Due to a known compiler bug, Clang up to version 3.2 (and Apple Clang up to version 4.1) produces incorrect code with AVX_128_FMA SIMD. As we cannot work around this bug on OS X, you will have to select a different compiler or SIMD instruction set.")
 226     endif()
 227
 228
 229     if (GMX_USE_CLANG_C_FMA_BUG_WORKAROUND)
 230         # we assume that we have an external assembler that supports AVX
 231         message(STATUS "Clang ${CMAKE_C_COMPILER_VERSION} detected, enabling FMA bug workaround")
 232         set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -no-integrated-as")
 233     endif()
 234     if (GMX_USE_CLANG_CXX_FMA_BUG_WORKAROUND)
 235         # we assume that we have an external assembler that supports AVX
 236         message(STATUS "Clang ${CMAKE_CXX_COMPILER_VERSION} detected, enabling FMA bug workaround")
 237         set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -no-integrated-as")
 238     endif()
 239
 240     gmx_test_avx_gcc_maskload_bug(GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG "${SIMD_C_FLAGS}")
 241
 242     set(GMX_SIMD_X86_AVX_128_FMA 1)
 243     set(SIMD_STATUS_MESSAGE "Enabling 128-bit AVX SIMD GROMACS SIMD (with fused-multiply add)")
 244
 245 elseif(GMX_SIMD STREQUAL "AVX_256")
 246
 247     gmx_use_clang_as_with_gnu_compilers_on_osx()
 248
 249     gmx_find_cflag_for_source(CFLAGS_AVX
 250                               "#include<immintrin.h>
 251                               int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_add_ps(x,x);return _mm256_movemask_ps(x);}"
 252                               SIMD_C_FLAGS
 253                               "-mavx" "/arch:AVX" "-hgnu")
 254     gmx_find_cxxflag_for_source(CXXFLAGS_AVX
 255                                 "#include<immintrin.h>
 256                                 int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_add_ps(x,x);return _mm256_movemask_ps(x);}"
 257                                 SIMD_CXX_FLAGS
 258                                 "-mavx" "/arch:AVX" "-hgnu")
 259
 260     if(NOT CFLAGS_AVX OR NOT CXXFLAGS_AVX)
 261         gmx_give_fatal_error_when_simd_support_not_found("AVX" "choose SSE4.1 SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 262     endif()
 263
 264     gmx_test_avx_gcc_maskload_bug(GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG "${SIMD_C_FLAGS}")
 265
 266     set(GMX_SIMD_X86_AVX_256 1)
 267     set(SIMD_STATUS_MESSAGE "Enabling 256-bit AVX SIMD instructions")
 268
 269 elseif(GMX_SIMD STREQUAL "AVX2_256")
 270
 271     gmx_use_clang_as_with_gnu_compilers_on_osx()
 272
 273     gmx_find_cflag_for_source(CFLAGS_AVX2
 274                               "#include<immintrin.h>
 275                               int main(){__m256i x=_mm256_set1_epi32(5);x=_mm256_add_epi32(x,x);return _mm256_movemask_epi8(x);}"
 276                               SIMD_C_FLAGS
 277                               "-march=core-avx2" "-mavx2" "/arch:AVX" "-hgnu") # no AVX2-specific flag for MSVC yet
 278     gmx_find_cxxflag_for_source(CXXFLAGS_AVX2
 279                                 "#include<immintrin.h>
 280                                 int main(){__m256i x=_mm256_set1_epi32(5);x=_mm256_add_epi32(x,x);return _mm256_movemask_epi8(x);}"
 281                                 SIMD_CXX_FLAGS
 282                                 "-march=core-avx2" "-mavx2" "/arch:AVX" "-hgnu") # no AVX2-specific flag for MSVC yet
 283
 284     if(NOT CFLAGS_AVX2 OR NOT CXXFLAGS_AVX2)
 285         gmx_give_fatal_error_when_simd_support_not_found("AVX2" "choose AVX SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 286     endif()
 287
 288     # No need to test for Maskload bug - it was fixed before gcc added AVX2 support
 289
 290     set(GMX_SIMD_X86_AVX2_256 1)
 291     set(SIMD_STATUS_MESSAGE "Enabling 256-bit AVX2 SIMD instructions")
 292
 293 elseif(GMX_SIMD STREQUAL "MIC")
 294
 295     # No flags needed. Not testing.
 296     set(GMX_SIMD_X86_MIC 1)
 297     set(SIMD_STATUS_MESSAGE "Enabling MIC (Xeon Phi) SIMD instructions")
 298
 299 elseif(GMX_SIMD STREQUAL "AVX_512")
 300
 301     gmx_use_clang_as_with_gnu_compilers_on_osx()
 302
 303     gmx_find_cflag_for_source(CFLAGS_AVX_512F
 304                               "#include<immintrin.h>
 305                               int main(){__m512 y,x=_mm512_set1_ps(0.5);y=_mm512_fmadd_ps(x,x,x);return (int)_mm512_cmp_ps_mask(x,y,_CMP_LT_OS);}"
 306                               SIMD_C_FLAGS
 307                               "-xMIC-AVX512" "-mavx512f -mfma" "-mavx512f" "/arch:AVX" "-hgnu") # no AVX_512F flags known for MSVC yet
 308     gmx_find_cxxflag_for_source(CXXFLAGS_AVX_512F
 309                                 "#include<immintrin.h>
 310                                 int main(){__m512 y,x=_mm512_set1_ps(0.5);y=_mm512_fmadd_ps(x,x,x);return (int)_mm512_cmp_ps_mask(x,y,_CMP_LT_OS);}"
 311                                 SIMD_CXX_FLAGS
 312                                 "-xMIC-AVX512" "-mavx512f -mfma" "-mavx512f" "/arch:AVX" "-hgnu") # no AVX_512F flags known for MSVC yet
 313
 314     if(NOT CFLAGS_AVX_512F OR NOT CXXFLAGS_AVX_512F)
 315         gmx_give_fatal_error_when_simd_support_not_found("AVX 512F" "choose a lower level of SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 316     endif()
 317
 318     set(GMX_SIMD_X86_AVX_512 1)
 319     set(SIMD_STATUS_MESSAGE "Enabling 512-bit AVX-512 SIMD instructions")
 320
 321 elseif(GMX_SIMD STREQUAL "AVX_512_KNL")
 322
 323     gmx_use_clang_as_with_gnu_compilers_on_osx()
 324
 325     gmx_find_cflag_for_source(CFLAGS_AVX_512ER
 326                               "#include<immintrin.h>
 327                               int main(){__m512 y,x=_mm512_set1_ps(0.5);y=_mm512_rsqrt28_ps(x);return (int)_mm512_cmp_ps_mask(x,y,_CMP_LT_OS);}"
 328                               SIMD_C_FLAGS
 329                               "-xMIC-AVX512" "-mavx512er -mfma" "-mavx512er" "/arch:AVX" "-hgnu") # no AVX_512ER flags known for MSVC yet
 330     gmx_find_cxxflag_for_source(CXXFLAGS_AVX_512ER
 331                                 "#include<immintrin.h>
 332                                 int main(){__m512 y,x=_mm512_set1_ps(0.5);y=_mm512_rsqrt28_ps(x);return (int)_mm512_cmp_ps_mask(x,y,_CMP_LT_OS);}"
 333                                 SIMD_CXX_FLAGS
 334                                 "-xMIC-AVX512" "-mavx512er -mfma" "-mavx512er" "/arch:AVX" "-hgnu") # no AVX_512ER flags known for MSVC yet
 335
 336     if(NOT CFLAGS_AVX_512ER OR NOT CXXFLAGS_AVX_512ER)
 337         gmx_give_fatal_error_when_simd_support_not_found("AVX 512ER" "choose a lower level of SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 338     endif()
 339
 340     set(GMX_SIMD_X86_AVX_512_KNL 1)
 341     set(SIMD_STATUS_MESSAGE "Enabling 512-bit AVX-512-KNL SIMD instructions")
 342
 343 elseif(GMX_SIMD STREQUAL "ARM_NEON")
 344
 345     gmx_find_cflag_for_source(CFLAGS_ARM_NEON
 346                               "#include<arm_neon.h>
 347                               int main(){float32x4_t x=vdupq_n_f32(0.5);x=vmlaq_f32(x,x,x);return vgetq_lane_f32(x,0)>0;}"
 348                               SIMD_C_FLAGS
 349                               "-mfpu=neon-vfpv4" "-mfpu=neon" "")
 350     gmx_find_cxxflag_for_source(CXXFLAGS_ARM_NEON
 351                                 "#include<arm_neon.h>
 352                                 int main(){float32x4_t x=vdupq_n_f32(0.5);x=vmlaq_f32(x,x,x);return vgetq_lane_f32(x,0)>0;}"
 353                                 SIMD_CXX_FLAGS
 354                                 "-mfpu=neon-vfpv4" "-mfpu=neon" "-D__STDC_CONSTANT_MACROS" "")
 355
 356     if(NOT CFLAGS_ARM_NEON OR NOT CXXFLAGS_ARM_NEON)
 357         gmx_give_fatal_error_when_simd_support_not_found("ARM NEON" "disable SIMD support (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 358     endif()
 359
 360     set(GMX_SIMD_ARM_NEON 1)
 361     set(SIMD_STATUS_MESSAGE "Enabling 32-bit ARM NEON SIMD instructions")
 362
 363 elseif(GMX_SIMD STREQUAL "ARM_NEON_ASIMD")
 364
 365     gmx_find_cflag_for_source(CFLAGS_ARM_NEON_ASIMD
 366                               "#include<arm_neon.h>
 367                                int main(){float64x2_t x=vdupq_n_f64(0.5);x=vfmaq_f64(x,x,x);x=vrndnq_f64(x);return vgetq_lane_f64(x,0)>0;}"
 368                               SIMD_C_FLAGS
 369                               "")
 370     gmx_find_cxxflag_for_source(CXXFLAGS_ARM_NEON_ASIMD
 371                                 "#include<arm_neon.h>
 372                                 int main(){float64x2_t x=vdupq_n_f64(0.5);x=vfmaq_f64(x,x,x);x=vrndnq_f64(x);return vgetq_lane_f64(x,0)>0;}"
 373                                 SIMD_CXX_FLAGS
 374                                 "")
 375
 376     if(NOT CFLAGS_ARM_NEON_ASIMD OR NOT CXXFLAGS_ARM_NEON_ASIMD)
 377         gmx_give_fatal_error_when_simd_support_not_found("ARM (AArch64) NEON Advanced SIMD" "particularly gcc version 4.9 or later, or disable SIMD support (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 378     endif()
 379
 380     if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_LESS "3.4")
 381         message(FATAL_ERROR "Clang version 3.4 or later is required for ARM (AArch64) NEON Advanced SIMD.")
 382     endif()
 383
 384     set(GMX_SIMD_ARM_NEON_ASIMD 1)
 385     set(SIMD_STATUS_MESSAGE "Enabling ARM (AArch64) NEON Advanced SIMD instructions")
 386
 387 elseif(GMX_SIMD STREQUAL "IBM_QPX")
 388
 389     try_compile(TEST_QPX ${CMAKE_BINARY_DIR}
 390         "${CMAKE_SOURCE_DIR}/cmake/TestQPX.c")
 391
 392     if (TEST_QPX)
 393         message(WARNING "IBM QPX SIMD instructions selected. This will work, but SIMD kernels are only available for the Verlet cut-off scheme. The plain C kernels that are used for the group cut-off scheme kernels will be slow, so please consider using the Verlet cut-off scheme.")
 394         set(GMX_SIMD_IBM_QPX 1)
 395         set(SIMD_STATUS_MESSAGE "Enabling IBM QPX SIMD instructions")
 396
 397     else()
 398         gmx_give_fatal_error_when_simd_support_not_found("IBM QPX" "or 'cmake .. -DCMAKE_TOOLCHAIN_FILE=Platform/BlueGeneQ-static-XL-CXX' to set up the tool chain" "${SUGGEST_BINUTILS_UPDATE}")
 399     endif()
 400
 401 elseif(GMX_SIMD STREQUAL "IBM_VMX")
 402
 403     gmx_find_cflag_for_source(CFLAGS_IBM_VMX
 404                               "#include<altivec.h>
 405                               int main(){vector float x,y=vec_ctf(vec_splat_s32(1),0);x=vec_madd(y,y,y);return vec_all_ge(y,x);}"
 406                               SIMD_C_FLAGS
 407                               "-maltivec -mabi=altivec" "-qarch=auto -qaltivec")
 408     gmx_find_cxxflag_for_source(CXXFLAGS_IBM_VMX
 409                                 "#include<altivec.h>
 410                                 int main(){vector float x,y=vec_ctf(vec_splat_s32(1),0);x=vec_madd(y,y,y);return vec_all_ge(y,x);}"
 411                                 SIMD_CXX_FLAGS
 412                                 "-maltivec -mabi=altivec" "-qarch=auto -qaltivec")
 413
 414     if(NOT CFLAGS_IBM_VMX OR NOT CXXFLAGS_IBM_VMX)
 415         gmx_give_fatal_error_when_simd_support_not_found("IBM VMX" "disable SIMD support (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 416     endif()
 417
 418     set(GMX_SIMD_IBM_VMX 1)
 419     set(SIMD_STATUS_MESSAGE "Enabling IBM VMX SIMD instructions")
 420
 421 elseif(GMX_SIMD STREQUAL "IBM_VSX")
 422
 423     if(${CMAKE_CXX_COMPILER_ID} MATCHES "GNU" OR ${CMAKE_C_COMPILER_ID} MATCHES "GNU")
 424         # VSX uses the same function API as Altivec/VMX, so make sure we tune for the current CPU and not VMX.
 425         # By putting these flags here rather than in the general compiler flags file we can safely assume
 426         # that we are at least on Power7 since that is when VSX appeared.
 427         if(BUILD_CPU_BRAND MATCHES "POWER7")
 428             gmx_test_cflag(GNU_C_VSX_POWER7   "-mcpu=power7 -mtune=power7" SIMD_C_FLAGS)
 429             gmx_test_cflag(GNU_CXX_VSX_POWER7 "-mcpu=power7 -mtune=power7" SIMD_CXX_FLAGS)
 430         else()
 431             # Enable power8 vector extensions on all platforms except old Power7.
 432             gmx_test_cflag(GNU_C_VSX_POWER8   "-mcpu=power8 -mpower8-vector -mpower8-fusion -mdirect-move" SIMD_C_FLAGS)
 433             gmx_test_cflag(GNU_CXX_VSX_POWER8 "-mcpu=power8 -mpower8-vector -mpower8-fusion -mdirect-move" SIMD_CXX_FLAGS)
 434         endif()
 435         # Altivec was originally single-only, and it took a while for compilers
 436         # to support the double-precision features in VSX.
 437         if(GMX_DOUBLE AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9")
 438             message(FATAL_ERROR "Using VSX SIMD in double precision with GCC requires GCC-4.9 or later.")
 439         endif()
 440     endif()
 441
 442     gmx_find_cflag_for_source(CFLAGS_IBM_VSX
 443                               "#include<altivec.h>
 444                               int main(){vector double x,y=vec_splats(1.0);x=vec_madd(y,y,y);return vec_all_ge(y,x);}"
 445                               SIMD_C_FLAGS
 446                               "-mvsx" "-maltivec -mabi=altivec" "-qarch=auto -qaltivec")
 447     gmx_find_cxxflag_for_source(CXXFLAGS_IBM_VSX
 448                                 "#include<altivec.h>
 449                                 int main(){vector double x,y=vec_splats(1.0);x=vec_madd(y,y,y);return vec_all_ge(y,x);}"
 450                                 SIMD_CXX_FLAGS
 451                                 "-mvsx" "-maltivec -mabi=altivec" "-qarch=auto -qaltivec")
 452
 453     if(NOT CFLAGS_IBM_VSX OR NOT CXXFLAGS_IBM_VSX)
 454         gmx_give_fatal_error_when_simd_support_not_found("IBM VSX" "disable SIMD support (slower)" "${SUGGEST_BINUTILS_UPDATE}")
 455     endif()
 456
 457     set(GMX_SIMD_IBM_VSX 1)
 458     set(SIMD_STATUS_MESSAGE "Enabling IBM VSX SIMD instructions")
 459
 460 elseif(GMX_SIMD STREQUAL "SPARC64_HPC_ACE")
 461
 462     # Note that GMX_RELAXED_DOUBLE_PRECISION is enabled by default in the top-level CMakeLists.txt
 463
 464     set(GMX_SIMD_SPARC64_HPC_ACE 1)
 465     set(SIMD_STATUS_MESSAGE "Enabling Sparc64 HPC-ACE SIMD instructions")
 466
 467 elseif(GMX_SIMD STREQUAL "REFERENCE")
 468
 469     # NB: This file handles settings for the SIMD module, so in the interest
 470     # of proper modularization, please do NOT put any verlet kernel settings in this file.
 471
 472     if(GMX_SIMD_REF_FLOAT_WIDTH)
 473         add_definitions(-DGMX_SIMD_REF_FLOAT_WIDTH=${GMX_SIMD_REF_FLOAT_WIDTH})
 474     endif()
 475     if(GMX_SIMD_REF_DOUBLE_WIDTH)
 476         add_definitions(-DGMX_SIMD_REF_DOUBLE_WIDTH=${GMX_SIMD_REF_DOUBLE_WIDTH})
 477     endif()
 478
 479     set(GMX_SIMD_REFERENCE 1)
 480     set(SIMD_STATUS_MESSAGE "Enabling reference (emulated) SIMD instructions.")
 481
 482 else()
 483     gmx_invalid_option_value(GMX_SIMD)
 484 endif()
 485
 486
 487 gmx_check_if_changed(SIMD_CHANGED GMX_SIMD)
 488 if (SIMD_CHANGED AND DEFINED SIMD_STATUS_MESSAGE)
 489     message(STATUS "${SIMD_STATUS_MESSAGE}")
 490 endif()
 491
 492 # By default, 32-bit windows cannot pass SIMD (SSE/AVX) arguments in registers,
 493 # and even on 64-bit (all platforms) it is only used for a handful of arguments.
 494 # The __vectorcall (MSVC, from MSVC2013) or __regcall (ICC) calling conventions
 495 # enable this, which is critical to enable 32-bit SIMD and improves performance
 496 # for 64-bit SIMD.
 497 # Check if the compiler supports one of these, and in that case set gmx_simdcall
 498 # to that string. If we do not have any such calling convention modifier, set it
 499 # to an empty string.
 500 #
 501 # Update 2015-11-04: As of version 3.6, clang has added support for __vectorcall
 502 # (also on Linux). This appears to be buggy for the reference SIMD
 503 # implementation when using the Debug build (when functions are not inlined)
 504 # while it seems works fine for the actual SIMD implementations. This is likely
 505 # because the reference build ends up passing lots of structures with arrays
 506 # rather than actual vector data. For now we disable __vectorcall with clang
 507 # when using the reference build.
 508 #
 509 if(NOT DEFINED GMX_SIMD_CALLING_CONVENTION)
 510     if(GMX_TARGET_BGQ)
 511         set(CALLCONV_LIST " ")
 512     elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND GMX_SIMD STREQUAL "REFERENCE")
 513         set(CALLCONV_LIST __regcall " ")
 514     else()
 515         set(CALLCONV_LIST __vectorcall __regcall " ")
 516     endif()
 517     foreach(callconv ${CALLCONV_LIST})
 518         set(callconv_compile_var "_callconv_${callconv}")
 519         check_c_source_compiles("int ${callconv} f(int i) {return i;} int main(void) {return f(0);}" ${callconv_compile_var})
 520         if(${callconv_compile_var})
 521             set(GMX_SIMD_CALLING_CONVENTION "${callconv}" CACHE INTERNAL "Calling convention for SIMD routines" FORCE)
 522             break()
 523         endif()
 524     endforeach()
 525 endif()
 526
 527 endmacro()
 528