cmake/gmxManageSimd.cmake

   1 #
   2 # This file is part of the GROMACS molecular simulation package.
   3 #
   4 # Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
   5 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6 # and including many others, as listed in the AUTHORS file in the
   7 # top-level source directory and at http://www.gromacs.org.
   8 #
   9 # GROMACS is free software; you can redistribute it and/or
  10 # modify it under the terms of the GNU Lesser General Public License
  11 # as published by the Free Software Foundation; either version 2.1
  12 # of the License, or (at your option) any later version.
  13 #
  14 # GROMACS is distributed in the hope that it will be useful,
  15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 # Lesser General Public License for more details.
  18 #
  19 # You should have received a copy of the GNU Lesser General Public
  20 # License along with GROMACS; if not, see
  21 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23 #
  24 # If you want to redistribute modifications to GROMACS, please
  25 # consider that scientific software is very special. Version
  26 # control is crucial - bugs must be traceable. We will be happy to
  27 # consider code for inclusion in the official distribution, but
  28 # derived work must not be called official GROMACS. Details are found
  29 # in the README & COPYING files - if they are missing, get the
  30 # official version at http://www.gromacs.org.
  31 #
  32 # To help us fund GROMACS development, we humbly ask that you cite
  33 # the research papers on the package. Check out http://www.gromacs.org.
  34
  35 # include avx test source, used if the AVX flags are set below
  36 include(gmxTestAVXMaskload)
  37 include(gmxFindFlagsForSource)
  38
  39
  40 macro(gmx_use_clang_as_with_gnu_compilers_on_osx)
  41     # On OS X, we often want to use gcc instead of clang, since gcc supports
  42     # OpenMP. However, by default gcc uses the external system assembler, which
  43     # does not support AVX, so we need to tell the linker to use the clang
  44     # compilers assembler instead - and this has to happen before we detect AVX
  45     # flags.
  46     if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "GNU")
  47         gmx_test_cflag(GNU_C_USE_CLANG_AS "-Wa,-q" SIMD_C_FLAGS)
  48     endif()
  49     if(APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
  50         gmx_test_cxxflag(GNU_CXX_USE_CLANG_AS "-Wa,-q" SIMD_CXX_FLAGS)
  51     endif()
  52 endmacro()
  53
  54
  55 macro(gmx_manage_simd)
  56
  57 set(GMX_SIMD_ACCURACY_BITS_SINGLE 22 CACHE STRING "Target mantissa bits for SIMD single math")
  58 #
  59 # Note that we typically restrict double precision target accuracy to be twice that
  60 # of single. This means we only need one more N-R iteration for 1/sqrt(x) and 1(x),
  61 # and the first iteration can sometimes be done as a pair in single precision. This should
  62 # be plenty enough for Molecular Dynamics applications. Many of our double precision math
  63 # functions still achieve very close to full double precision, but we do not guarantee that
  64 # they will be able to achieve higher accuracy if you set this beyond 44 bits. GROMACS will
  65 # work - but some unit tests might fail.
  66 #
  67 set(GMX_SIMD_ACCURACY_BITS_DOUBLE 44 CACHE STRING "Target mantissa bits for SIMD double math")
  68 mark_as_advanced(GMX_SIMD_ACCURACY_BITS_SINGLE)
  69 mark_as_advanced(GMX_SIMD_ACCURACY_BITS_DOUBLE)
  70
  71 if(${GMX_SIMD_ACCURACY_BITS_SINGLE} GREATER 22)
  72     message(STATUS "Note: Full mantissa accuracy (including least significant bit) requested for SIMD single math. Presently we cannot get the least significant bit correct since that would require different algorithms - reducing to 22 bits.")
  73     set(GMX_SIMD_ACCURACY_BITS_SINGLE 22 CACHE STRING "Target mantissa bits for SIMD single math" FORCE)
  74 endif()
  75
  76 if(${GMX_SIMD_ACCURACY_BITS_DOUBLE} GREATER 51)
  77     message(STATUS "Note: Full mantissa accuracy (including least significant bit) requested for SIMD double math. Presently we cannot get the least significant bit correct since that would require different algorithms - reducing to 51 bits.")
  78     set(GMX_SIMD_ACCURACY_BITS_DOUBLE 51 CACHE STRING "Target mantissa bits for SIMD double math" FORCE)
  79 endif()
  80
  81 #
  82 # Section to set (and test) compiler flags for SIMD.
  83 #
  84 # The flags will be set based on the GMX_SIMD choice provided by the user.
  85 # Automatic detection of the architecture on the build host is done prior to
  86 # calling this macro.
  87 #
  88
  89 if(GMX_SIMD STREQUAL "NONE")
  90     # nothing to do configuration-wise
  91     set(SIMD_STATUS_MESSAGE "SIMD instructions disabled")
  92 elseif(GMX_SIMD STREQUAL "SSE2")
  93
  94     gmx_find_cflag_for_source(CFLAGS_SSE2 "C compiler SSE2 flag"
  95                               "#include<xmmintrin.h>
  96                               int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_rsqrt_ps(x);return _mm_movemask_ps(x);}"
  97                               SIMD_C_FLAGS
  98                               "-msse2" "/arch:SSE2" "-hgnu")
  99     gmx_find_cxxflag_for_source(CXXFLAGS_SSE2 "C++ compiler SSE2 flag"
 100                                 "#include<xmmintrin.h>
 101                                 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_rsqrt_ps(x);return _mm_movemask_ps(x);}"
 102                                 SIMD_CXX_FLAGS
 103                                 "-msse2" "/arch:SSE2" "-hgnu")
 104
 105     if(NOT CFLAGS_SSE2 OR NOT CXXFLAGS_SSE2)
 106         message(FATAL_ERROR "Cannot find SSE2 compiler flag. Use a newer compiler, or disable SIMD (slower).")
 107     endif()
 108
 109     set(GMX_SIMD_X86_SSE2 1)
 110     set(SIMD_STATUS_MESSAGE "Enabling SSE2 SIMD instructions")
 111
 112 elseif(GMX_SIMD STREQUAL "SSE4.1")
 113
 114     # Note: MSVC enables SSE4.1 with the SSE2 flag, so we include that in testing.
 115     gmx_find_cflag_for_source(CFLAGS_SSE4_1 "C compiler SSE4.1 flag"
 116                               "#include<smmintrin.h>
 117                               int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_dp_ps(x,x,0x77);return _mm_movemask_ps(x);}"
 118                               SIMD_C_FLAGS
 119                               "-msse4.1" "/arch:SSE4.1" "/arch:SSE2" "-hgnu")
 120     gmx_find_cxxflag_for_source(CXXFLAGS_SSE4_1 "C++ compiler SSE4.1 flag"
 121                                 "#include<smmintrin.h>
 122                                 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_dp_ps(x,x,0x77);return _mm_movemask_ps(x);}"
 123                                 SIMD_CXX_FLAGS
 124                                 "-msse4.1" "/arch:SSE4.1" "/arch:SSE2" "-hgnu")
 125
 126     if(NOT CFLAGS_SSE4_1 OR NOT CXXFLAGS_SSE4_1)
 127         message(FATAL_ERROR "Cannot find SSE4.1 compiler flag. "
 128                             "Use a newer compiler, or choose SSE2 SIMD (slower).")
 129     endif()
 130
 131     if(CMAKE_C_COMPILER_ID MATCHES "Intel" AND CMAKE_C_COMPILER_VERSION VERSION_EQUAL "11.1")
 132         message(FATAL_ERROR "You are using Intel compiler version 11.1, which produces incorrect results with SSE4.1 SIMD. You need to use a newer compiler (e.g. icc >= 12.0) or in worst case try a lower level of SIMD if performance is not critical.")
 133     endif()
 134
 135     set(GMX_SIMD_X86_SSE4_1 1)
 136     set(SIMD_STATUS_MESSAGE "Enabling SSE4.1 SIMD instructions")
 137
 138 elseif(GMX_SIMD STREQUAL "AVX_128_FMA")
 139
 140     gmx_use_clang_as_with_gnu_compilers_on_osx()
 141
 142     # AVX128/FMA on AMD is a bit complicated. We need to do detection in three stages:
 143     # 1) Find the flags required for generic AVX support
 144     # 2) Find the flags necessary to enable fused-multiply add support
 145     # 3) Optional: Find a flag to enable the AMD XOP instructions
 146
 147     ### STAGE 1: Find the generic AVX flag
 148     gmx_find_cflag_for_source(CFLAGS_AVX_128 "C compiler AVX (128 bit) flag"
 149                               "#include<immintrin.h>
 150                               int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_permute_ps(x,1);return 0;}"
 151                               SIMD_C_FLAGS
 152                               "-mavx" "/arch:AVX" "-hgnu")
 153     gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128 "C++ compiler AVX (128 bit) flag"
 154                                 "#include<immintrin.h>
 155                                 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_permute_ps(x,1);return 0;}"
 156                                 SIMD_CXX_FLAGS
 157                                 "-mavx" "/arch:AVX" "-hgnu")
 158
 159     ### STAGE 2: Find the fused-multiply add flag.
 160     # GCC requires x86intrin.h for FMA support. MSVC 2010 requires intrin.h for FMA support.
 161     check_include_file(x86intrin.h HAVE_X86INTRIN_H ${SIMD_C_FLAGS})
 162     check_include_file(intrin.h HAVE_INTRIN_H ${SIMD_C_FLAGS})
 163     if(HAVE_X86INTRIN_H)
 164         set(INCLUDE_X86INTRIN_H "#include <x86intrin.h>")
 165     endif()
 166     if(HAVE_INTRIN_H)
 167         set(INCLUDE_INTRIN_H "#include <xintrin.h>")
 168     endif()
 169
 170     gmx_find_cflag_for_source(CFLAGS_AVX_128_FMA "C compiler AVX (128 bit) FMA4 flag"
 171 "#include<immintrin.h>
 172 ${INCLUDE_X86INTRIN_H}
 173 ${INCLUDE_INTRIN_H}
 174 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_macc_ps(x,x,x);return _mm_movemask_ps(x);}"
 175                               SIMD_C_FLAGS
 176                               "-mfma4" "-hgnu")
 177     gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128_FMA "C++ compiler AVX (128 bit) FMA4 flag"
 178 "#include<immintrin.h>
 179 ${INCLUDE_X86INTRIN_H}
 180 ${INCLUDE_INTRIN_H}
 181 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_macc_ps(x,x,x);return _mm_movemask_ps(x);}"
 182                                 SIMD_CXX_FLAGS
 183                                 "-mfma4" "-hgnu")
 184
 185     # We only need to check the last (FMA) test; that will always fail if the basic AVX128 test failed
 186     if(NOT CFLAGS_AVX_128_FMA OR NOT CXXFLAGS_AVX_128_FMA)
 187         message(FATAL_ERROR "Cannot find compiler flags for 128 bit AVX with FMA support. Use a newer compiler, or choose SSE4.1 SIMD (slower).")
 188     endif()
 189
 190     ### STAGE 3: Optional: Find the XOP instruction flag (No point in yelling if this does not work)
 191     gmx_find_cflag_for_source(CFLAGS_AVX_128_XOP "C compiler AVX (128 bit) XOP flag"
 192 "#include<immintrin.h>
 193 ${INCLUDE_X86INTRIN_H}
 194 ${INCLUDE_INTRIN_H}
 195 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_frcz_ps(x);return _mm_movemask_ps(x);}"
 196                               SIMD_C_FLAGS
 197                               "-mxop")
 198     gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128_XOP "C++ compiler AVX (128 bit) XOP flag"
 199 "#include<immintrin.h>
 200 ${INCLUDE_X86INTRIN_H}
 201 ${INCLUDE_INTRIN_H}
 202 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_frcz_ps(x);return _mm_movemask_ps(x);}"
 203                                 SIMD_CXX_FLAGS
 204                                 "-mxop")
 205
 206     # We don't have the full compiler version string yet (BUILD_C_COMPILER),
 207     # so we can't distinguish vanilla from Apple clang versions, but catering for a few rare AMD
 208     # hackintoshes is not worth the effort.
 209     if (APPLE AND (CMAKE_C_COMPILER_ID STREQUAL "Clang" OR
 210                 CMAKE_CXX_COMPILER_ID STREQUAL "Clang"))
 211         message(WARNING "Due to a known compiler bug, Clang up to version 3.2 (and Apple Clang up to version 4.1) produces incorrect code with AVX_128_FMA SIMD. As we cannot work around this bug on OS X, you will have to select a different compiler or SIMD instruction set.")
 212     endif()
 213
 214
 215     if (GMX_USE_CLANG_C_FMA_BUG_WORKAROUND)
 216         # we assume that we have an external assembler that supports AVX
 217         message(STATUS "Clang ${CMAKE_C_COMPILER_VERSION} detected, enabling FMA bug workaround")
 218         set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -no-integrated-as")
 219     endif()
 220     if (GMX_USE_CLANG_CXX_FMA_BUG_WORKAROUND)
 221         # we assume that we have an external assembler that supports AVX
 222         message(STATUS "Clang ${CMAKE_CXX_COMPILER_VERSION} detected, enabling FMA bug workaround")
 223         set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -no-integrated-as")
 224     endif()
 225
 226     gmx_test_avx_gcc_maskload_bug(GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG "${SIMD_C_FLAGS}")
 227
 228     set(GMX_SIMD_X86_AVX_128_FMA 1)
 229     set(SIMD_STATUS_MESSAGE "Enabling 128-bit AVX SIMD GROMACS SIMD (with fused-multiply add)")
 230
 231 elseif(GMX_SIMD STREQUAL "AVX_256")
 232
 233     gmx_use_clang_as_with_gnu_compilers_on_osx()
 234
 235     gmx_find_cflag_for_source(CFLAGS_AVX "C compiler AVX (256 bit) flag"
 236                               "#include<immintrin.h>
 237                               int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_add_ps(x,x);return _mm256_movemask_ps(x);}"
 238                               SIMD_C_FLAGS
 239                               "-mavx" "/arch:AVX" "-hgnu")
 240     gmx_find_cxxflag_for_source(CXXFLAGS_AVX "C++ compiler AVX (256 bit) flag"
 241                                 "#include<immintrin.h>
 242                                 int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_add_ps(x,x);return _mm256_movemask_ps(x);}"
 243                                 SIMD_CXX_FLAGS
 244                                 "-mavx" "/arch:AVX" "-hgnu")
 245
 246     if(NOT CFLAGS_AVX OR NOT CXXFLAGS_AVX)
 247         message(FATAL_ERROR "Cannot find AVX compiler flag. Use a newer compiler, or choose SSE4.1 SIMD (slower).")
 248     endif()
 249
 250     gmx_test_avx_gcc_maskload_bug(GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG "${SIMD_C_FLAGS}")
 251
 252     set(GMX_SIMD_X86_AVX_256 1)
 253     set(SIMD_STATUS_MESSAGE "Enabling 256-bit AVX SIMD instructions")
 254
 255 elseif(GMX_SIMD STREQUAL "AVX2_256")
 256
 257     gmx_use_clang_as_with_gnu_compilers_on_osx()
 258
 259     gmx_find_cflag_for_source(CFLAGS_AVX2 "C compiler AVX2 flag"
 260                               "#include<immintrin.h>
 261                               int main(){__m256i x=_mm256_set1_epi32(5);x=_mm256_add_epi32(x,x);return _mm256_movemask_epi8(x);}"
 262                               SIMD_C_FLAGS
 263                               "-march=core-avx2" "-mavx2" "/arch:AVX" "-hgnu") # no AVX2-specific flag for MSVC yet
 264     gmx_find_cxxflag_for_source(CXXFLAGS_AVX2 "C++ compiler AVX2 flag"
 265                                 "#include<immintrin.h>
 266                                 int main(){__m256i x=_mm256_set1_epi32(5);x=_mm256_add_epi32(x,x);return _mm256_movemask_epi8(x);}"
 267                                 SIMD_CXX_FLAGS
 268                                 "-march=core-avx2" "-mavx2" "/arch:AVX" "-hgnu") # no AVX2-specific flag for MSVC yet
 269
 270     if(NOT CFLAGS_AVX2 OR NOT CXXFLAGS_AVX2)
 271         message(FATAL_ERROR "Cannot find AVX2 compiler flag. Use a newer compiler, or choose AVX SIMD (slower).")
 272     endif()
 273
 274     # No need to test for Maskload bug - it was fixed before gcc added AVX2 support
 275
 276     set(GMX_SIMD_X86_AVX2_256 1)
 277     set(SIMD_STATUS_MESSAGE "Enabling 256-bit AVX2 SIMD instructions")
 278
 279 elseif(GMX_SIMD STREQUAL "MIC")
 280
 281     # No flags needed. Not testing.
 282     set(GMX_SIMD_X86_MIC 1)
 283     set(SIMD_STATUS_MESSAGE "Enabling MIC (Xeon Phi) SIMD instructions")
 284
 285 elseif(GMX_SIMD STREQUAL "AVX_512F")
 286
 287     gmx_use_clang_as_with_gnu_compilers_on_osx()
 288
 289     gmx_find_cflag_for_source(CFLAGS_AVX_512F "C compiler AVX-512F flag"
 290                               "#include<immintrin.h>
 291                               int main(){__m512 y,x=_mm512_set1_ps(0.5);y=_mm512_fmadd_ps(x,x,x);return (int)_mm512_cmp_ps_mask(x,y,_CMP_LT_OS);}"
 292                               SIMD_C_FLAGS
 293                               "-xMIC-AVX512" "-mavx512f" "/arch:AVX" "-hgnu") # no AVX_512F flags known for MSVC yet
 294     gmx_find_cxxflag_for_source(CXXFLAGS_AVX_512F "C++ compiler AVX-512F flag"
 295                                 "#include<immintrin.h>
 296                                 int main(){__m512 y,x=_mm512_set1_ps(0.5);y=_mm512_fmadd_ps(x,x,x);return (int)_mm512_cmp_ps_mask(x,y,_CMP_LT_OS);}"
 297                                 SIMD_CXX_FLAGS
 298                                 "-xMIC-AVX512" "-mavx512f" "/arch:AVX" "-hgnu") # no AVX_512F flags known for MSVC yet
 299
 300     if(NOT CFLAGS_AVX_512F OR NOT CXXFLAGS_AVX_512F)
 301         message(FATAL_ERROR "Cannot find AVX 512F compiler flag. Use a newer compiler, or choose a lower level of SIMD")
 302     endif()
 303
 304     set(GMX_SIMD_X86_AVX_512F 1)
 305     set(SIMD_STATUS_MESSAGE "Enabling 512-bit AVX-512F SIMD instructions")
 306
 307 elseif(GMX_SIMD STREQUAL "AVX_512ER")
 308
 309     gmx_use_clang_as_with_gnu_compilers_on_osx()
 310
 311     gmx_find_cflag_for_source(CFLAGS_AVX_512ER "C compiler AVX-512ER flag"
 312                               "#include<immintrin.h>
 313                               int main(){__m512 y,x=_mm512_set1_ps(0.5);y=_mm512_rsqrt28_ps(x);return (int)_mm512_cmp_ps_mask(x,y,_CMP_LT_OS);}"
 314                               SIMD_C_FLAGS
 315                               "-xMIC-AVX512" "-mavx512er" "/arch:AVX" "-hgnu") # no AVX_512ER flags known for MSVC yet
 316     gmx_find_cxxflag_for_source(CXXFLAGS_AVX_512ER "C++ compiler AVX-512ER flag"
 317                                 "#include<immintrin.h>
 318                                 int main(){__m512 y,x=_mm512_set1_ps(0.5);y=_mm512_rsqrt28_ps(x);return (int)_mm512_cmp_ps_mask(x,y,_CMP_LT_OS);}"
 319                                 SIMD_CXX_FLAGS
 320                                 "-xMIC-AVX512" "-mavx512er" "/arch:AVX" "-hgnu") # no AVX_512ER flags known for MSVC yet
 321
 322     if(NOT CFLAGS_AVX_512ER OR NOT CXXFLAGS_AVX_512ER)
 323         message(FATAL_ERROR "Cannot find AVX 512ER compiler flag. Use a newer compiler, or choose a lower level of SIMD")
 324     endif()
 325
 326     set(GMX_SIMD_X86_AVX_512ER 1)
 327     set(SIMD_STATUS_MESSAGE "Enabling 512-bit AVX-512ER SIMD instructions")
 328
 329 elseif(GMX_SIMD STREQUAL "ARM_NEON")
 330
 331     gmx_find_cflag_for_source(CFLAGS_ARM_NEON "C compiler 32-bit ARM NEON flag"
 332                               "#include<arm_neon.h>
 333                               int main(){float32x4_t x=vdupq_n_f32(0.5);x=vmlaq_f32(x,x,x);return vgetq_lane_f32(x,0)>0;}"
 334                               SIMD_C_FLAGS
 335                               "-mfpu=neon" "")
 336     gmx_find_cxxflag_for_source(CXXFLAGS_ARM_NEON "C++ compiler 32-bit ARM NEON flag"
 337                                 "#include<arm_neon.h>
 338                                 int main(){float32x4_t x=vdupq_n_f32(0.5);x=vmlaq_f32(x,x,x);return vgetq_lane_f32(x,0)>0;}"
 339                                 SIMD_CXX_FLAGS
 340                                 "-mfpu=neon" "-D__STDC_CONSTANT_MACROS" "")
 341
 342     if(NOT CFLAGS_ARM_NEON OR NOT CXXFLAGS_ARM_NEON)
 343         message(FATAL_ERROR "Cannot find ARM 32-bit NEON compiler flag. Use a newer compiler, or disable NEON SIMD.")
 344     endif()
 345
 346     set(GMX_SIMD_ARM_NEON 1)
 347     set(SIMD_STATUS_MESSAGE "Enabling 32-bit ARM NEON SIMD instructions")
 348
 349 elseif(GMX_SIMD STREQUAL "ARM_NEON_ASIMD")
 350     # Gcc-4.8.1 appears to have a bug where the c++ compiler requires
 351     # -D__STDC_CONSTANT_MACROS if we include arm_neon.h
 352
 353     gmx_find_cflag_for_source(CFLAGS_ARM_NEON_ASIMD "C compiler ARM NEON Advanced SIMD flag"
 354                               "#include<arm_neon.h>
 355                               int main(){float64x2_t x=vdupq_n_f64(0.5);x=vfmaq_f64(x,x,x);return vgetq_lane_f64(x,0)>0;}"
 356                               SIMD_C_FLAGS
 357                               "")
 358     gmx_find_cxxflag_for_source(CXXFLAGS_ARM_NEON_ASIMD "C++ compiler ARM NEON Advanced SIMD flag"
 359                                 "#include<arm_neon.h>
 360                                 int main(){float64x2_t x=vdupq_n_f64(0.5);x=vfmaq_f64(x,x,x);return vgetq_lane_f64(x,0)>0;}"
 361                                 SIMD_CXX_FLAGS
 362                                 "-D__STDC_CONSTANT_MACROS" "")
 363
 364     if(NOT CFLAGS_ARM_NEON_ASIMD OR NOT CXXFLAGS_ARM_NEON_ASIMD)
 365         message(FATAL_ERROR "Cannot find ARM (AArch64) NEON Advanced SIMD compiler flag. Use a newer compiler, or disable SIMD.")
 366     endif()
 367
 368     if(CMAKE_C_COMPILER_ID MATCHES "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS "4.9")
 369         message(WARNING "At least gcc-4.8.1 has many bugs for ARM (AArch64) NEON Advanced SIMD compilation. You might need gcc version 4.9 or later.")
 370     endif()
 371
 372     if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_LESS "3.4")
 373         message(FATAL_ERROR "Clang version 3.4 or later is required for ARM (AArch64) NEON Advanced SIMD.")
 374     endif()
 375
 376     set(GMX_SIMD_ARM_NEON_ASIMD 1)
 377     set(SIMD_STATUS_MESSAGE "Enabling ARM (AArch64) NEON Advanced SIMD instructions")
 378
 379 elseif(GMX_SIMD STREQUAL "IBM_QPX")
 380
 381     try_compile(TEST_QPX ${CMAKE_BINARY_DIR}
 382         "${CMAKE_SOURCE_DIR}/cmake/TestQPX.c")
 383
 384     if (TEST_QPX)
 385         message(WARNING "IBM QPX SIMD instructions selected. This will work, but SIMD kernels are only available for the Verlet cut-off scheme. The plain C kernels that are used for the group cut-off scheme kernels will be slow, so please consider using the Verlet cut-off scheme.")
 386         set(GMX_SIMD_IBM_QPX 1)
 387         set(SIMD_STATUS_MESSAGE "Enabling IBM QPX SIMD instructions")
 388
 389     else()
 390         message(FATAL_ERROR "Cannot compile the requested IBM QPX intrinsics. If you are compiling for BlueGene/Q with the XL compilers, use 'cmake .. -DCMAKE_TOOLCHAIN_FILE=Platform/BlueGeneQ-static-XL-C' to set up the tool chain.")
 391     endif()
 392
 393 elseif(GMX_SIMD STREQUAL "IBM_VMX")
 394
 395     gmx_find_cflag_for_source(CFLAGS_IBM_VMX "C compiler IBM VMX SIMD flag"
 396                               "#include<altivec.h>
 397                               int main(){vector float x,y=vec_ctf(vec_splat_s32(1),0);x=vec_madd(y,y,y);return vec_all_ge(y,x);}"
 398                               SIMD_C_FLAGS
 399                               "-maltivec -mabi=altivec" "-qarch=auto -qaltivec")
 400     gmx_find_cxxflag_for_source(CXXFLAGS_IBM_VMX "C++ compiler IBM VMX SIMD flag"
 401                                 "#include<altivec.h>
 402                                 int main(){vector float x,y=vec_ctf(vec_splat_s32(1),0);x=vec_madd(y,y,y);return vec_all_ge(y,x);}"
 403                                 SIMD_CXX_FLAGS
 404                                 "-maltivec -mabi=altivec" "-qarch=auto -qaltivec")
 405
 406     if(NOT CFLAGS_IBM_VMX OR NOT CXXFLAGS_IBM_VMX)
 407         message(FATAL_ERROR "Cannot find IBM VMX SIMD compiler flag. Use a newer compiler, or disable VMX SIMD.")
 408     endif()
 409
 410     set(GMX_SIMD_IBM_VMX 1)
 411     set(SIMD_STATUS_MESSAGE "Enabling IBM VMX SIMD instructions")
 412
 413 elseif(GMX_SIMD STREQUAL "IBM_VSX")
 414
 415     # Altivec was originally single-only, and it took a while for compilers
 416     # to support the double-precision features in VSX.
 417     if(GMX_DOUBLE AND CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9")
 418         message(FATAL_ERROR "Using VSX SIMD in double precision with GCC requires GCC-4.9 or later.")
 419     endif()
 420
 421     gmx_find_cflag_for_source(CFLAGS_IBM_VSX "C compiler IBM VSX SIMD flag"
 422                               "#include<altivec.h>
 423                               int main(){vector double x,y=vec_splats(1.0);x=vec_madd(y,y,y);return vec_all_ge(y,x);}"
 424                               SIMD_C_FLAGS
 425                               "-mvsx" "-maltivec -mabi=altivec" "-qarch=auto -qaltivec")
 426     gmx_find_cxxflag_for_source(CXXFLAGS_IBM_VSX "C++ compiler IBM VSX SIMD flag"
 427                                 "#include<altivec.h>
 428                                 int main(){vector double x,y=vec_splats(1.0);x=vec_madd(y,y,y);return vec_all_ge(y,x);}"
 429                                 SIMD_CXX_FLAGS
 430                                 "-mvsx" "-maltivec -mabi=altivec" "-qarch=auto -qaltivec")
 431
 432     if(NOT CFLAGS_IBM_VSX OR NOT CXXFLAGS_IBM_VSX)
 433         message(FATAL_ERROR "Cannot find IBM VSX SIMD compiler flag. Use a newer compiler, or disable VSX SIMD.")
 434     endif()
 435
 436     set(GMX_SIMD_IBM_VSX 1)
 437     set(SIMD_STATUS_MESSAGE "Enabling IBM VSX SIMD instructions")
 438
 439 elseif(GMX_SIMD STREQUAL "SPARC64_HPC_ACE")
 440
 441     # Note that GMX_RELAXED_DOUBLE_PRECISION is enabled by default in the top-level CMakeLists.txt
 442
 443     set(GMX_SIMD_SPARC64_HPC_ACE 1)
 444     set(SIMD_STATUS_MESSAGE "Enabling Sparc64 HPC-ACE SIMD instructions")
 445
 446 elseif(GMX_SIMD STREQUAL "REFERENCE")
 447
 448     # NB: This file handles settings for the SIMD module, so in the interest
 449     # of proper modularization, please do NOT put any verlet kernel settings in this file.
 450
 451     if(GMX_SIMD_REF_FLOAT_WIDTH)
 452         add_definitions(-DGMX_SIMD_REF_FLOAT_WIDTH=${GMX_SIMD_REF_FLOAT_WIDTH})
 453     endif()
 454     if(GMX_SIMD_REF_DOUBLE_WIDTH)
 455         add_definitions(-DGMX_SIMD_REF_DOUBLE_WIDTH=${GMX_SIMD_REF_DOUBLE_WIDTH})
 456     endif()
 457
 458     set(GMX_SIMD_REFERENCE 1)
 459     set(SIMD_STATUS_MESSAGE "Enabling reference (emulated) SIMD instructions.")
 460
 461 else()
 462     gmx_invalid_option_value(GMX_SIMD)
 463 endif()
 464
 465
 466 gmx_check_if_changed(SIMD_CHANGED GMX_SIMD)
 467 if (SIMD_CHANGED AND DEFINED SIMD_STATUS_MESSAGE)
 468     message(STATUS "${SIMD_STATUS_MESSAGE}")
 469 endif()
 470
 471 # By default, 32-bit windows cannot pass SIMD (SSE/AVX) arguments in registers,
 472 # and even on 64-bit (all platforms) it is only used for a handful of arguments.
 473 # The __vectorcall (MSVC, from MSVC2013) or __regcall (ICC) calling conventions
 474 # enable this, which is critical to enable 32-bit SIMD and improves performance
 475 # for 64-bit SIMD.
 476 # Check if the compiler supports one of these, and in that case set gmx_simdcall
 477 # to that string. If we do not have any such calling convention modifier, set it
 478 # to an empty string.
 479 #
 480 # Update 2015-11-04: As of version 3.6, clang has added support for __vectorcall
 481 # (also on Linux). This appears to be buggy for the reference SIMD
 482 # implementation when using the Debug build (when functions are not inlined)
 483 # while it seems works fine for the actual SIMD implementations. This is likely
 484 # because the reference build ends up passing lots of structures with arrays
 485 # rather than actual vector data. For now we disable __vectorcall with clang
 486 # when using the reference build.
 487 #
 488 if(NOT DEFINED GMX_SIMD_CALLING_CONVENTION)
 489     if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND GMX_SIMD STREQUAL "REFERENCE")
 490         set(CALLCONV_LIST __regcall " ")
 491     else()
 492         set(CALLCONV_LIST __vectorcall __regcall " ")
 493     endif()
 494     foreach(callconv ${CALLCONV_LIST})
 495         set(callconv_compile_var "_callconv_${callconv}")
 496         check_c_source_compiles("int ${callconv} f(int i) {return i;} int main(void) {return f(0);}" ${callconv_compile_var})
 497         if(${callconv_compile_var})
 498             set(GMX_SIMD_CALLING_CONVENTION "${callconv}" CACHE INTERNAL "Calling convention for SIMD routines" FORCE)
 499             break()
 500         endif()
 501     endforeach()
 502 endif()
 503
 504 endmacro()
 505