2 # This file is part of the GROMACS molecular simulation package.
4 # Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
5 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 # and including many others, as listed in the AUTHORS file in the
7 # top-level source directory and at http://www.gromacs.org.
9 # GROMACS is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU Lesser General Public License
11 # as published by the Free Software Foundation; either version 2.1
12 # of the License, or (at your option) any later version.
14 # GROMACS is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 # Lesser General Public License for more details.
19 # You should have received a copy of the GNU Lesser General Public
20 # License along with GROMACS; if not, see
21 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 # If you want to redistribute modifications to GROMACS, please
25 # consider that scientific software is very special. Version
26 # control is crucial - bugs must be traceable. We will be happy to
27 # consider code for inclusion in the official distribution, but
28 # derived work must not be called official GROMACS. Details are found
29 # in the README & COPYING files - if they are missing, get the
30 # official version at http://www.gromacs.org.
32 # To help us fund GROMACS development, we humbly ask that you cite
33 # the research papers on the package. Check out http://www.gromacs.org.
35 # include avx test source, used if the AVX flags are set below
36 include(gmxTestAVXMaskload)
37 include(gmxFindFlagsForSource)
40 macro(gmx_use_clang_as_with_gnu_compilers_on_osx)
41 # On OS X, we often want to use gcc instead of clang, since gcc supports
42 # OpenMP. However, by default gcc uses the external system assembler, which
43 # does not support AVX, so we need to tell the linker to use the clang
44 # compilers assembler instead - and this has to happen before we detect AVX
46 if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "GNU")
47 gmx_test_cflag(GNU_C_USE_CLANG_AS "-Wa,-q" SIMD_C_FLAGS)
49 if(APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
50 gmx_test_cxxflag(GNU_CXX_USE_CLANG_AS "-Wa,-q" SIMD_CXX_FLAGS)
54 # Issue a fatal error with an appropriate message, when the toolchain
55 # was not able to compile code for SIMD support.
58 # SIMD_STRING A string describing the kind of SIMD support that didn't work.
59 # ALTERNATIVE_SUGGESTION A string describing anything the user could try other than getting a new compiler.
60 # SUGGEST_BINUTILS_UPDATE True when there's information that the compiler was OK, but something else was not.
61 function(gmx_give_fatal_error_when_simd_support_not_found SIMD_STRING ALTERNATIVE_SUGGESTION SUGGEST_BINUTILS_UPDATE)
62 if(SUGGEST_BINUTILS_UPDATE)
63 set(_msg "Found a compiler flag for ${SIMD_STRING} support, but some other problem exists. Update your assembler and/or linker, e.g. in the binutils package of your distribution.")
65 set(_msg "Cannot find ${SIMD_STRING} compiler flag. Use a newer compiler, or ${ALTERNATIVE_SUGGESTION}.")
67 message(FATAL_ERROR ${_msg})
70 macro(gmx_manage_simd)
72 set(GMX_SIMD_ACCURACY_BITS_SINGLE 22 CACHE STRING "Target mantissa bits for SIMD single math")
74 # Note that we typically restrict double precision target accuracy to be twice that
75 # of single. This means we only need one more N-R iteration for 1/sqrt(x) and 1(x),
76 # and the first iteration can sometimes be done as a pair in single precision. This should
77 # be plenty enough for Molecular Dynamics applications. Many of our double precision math
78 # functions still achieve very close to full double precision, but we do not guarantee that
79 # they will be able to achieve higher accuracy if you set this beyond 44 bits. GROMACS will
80 # work - but some unit tests might fail.
82 set(GMX_SIMD_ACCURACY_BITS_DOUBLE 44 CACHE STRING "Target mantissa bits for SIMD double math")
83 mark_as_advanced(GMX_SIMD_ACCURACY_BITS_SINGLE)
84 mark_as_advanced(GMX_SIMD_ACCURACY_BITS_DOUBLE)
86 if(${GMX_SIMD_ACCURACY_BITS_SINGLE} GREATER 22)
87 message(STATUS "Note: Full mantissa accuracy (including least significant bit) requested for SIMD single math. Presently we cannot get the least significant bit correct since that would require different algorithms - reducing to 22 bits.")
88 set(GMX_SIMD_ACCURACY_BITS_SINGLE 22 CACHE STRING "Target mantissa bits for SIMD single math" FORCE)
91 if(${GMX_SIMD_ACCURACY_BITS_DOUBLE} GREATER 51)
92 message(STATUS "Note: Full mantissa accuracy (including least significant bit) requested for SIMD double math. Presently we cannot get the least significant bit correct since that would require different algorithms - reducing to 51 bits.")
93 set(GMX_SIMD_ACCURACY_BITS_DOUBLE 51 CACHE STRING "Target mantissa bits for SIMD double math" FORCE)
97 # Section to set (and test) compiler flags for SIMD.
99 # The flags will be set based on the GMX_SIMD choice provided by the user.
100 # Automatic detection of the architecture on the build host is done prior to
101 # calling this macro.
104 if(GMX_SIMD STREQUAL "NONE")
105 # nothing to do configuration-wise
106 set(SIMD_STATUS_MESSAGE "SIMD instructions disabled")
107 elseif(GMX_SIMD STREQUAL "SSE2")
109 gmx_find_cflag_for_source(CFLAGS_SSE2 "C compiler SSE2 flag"
110 "#include<xmmintrin.h>
111 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_rsqrt_ps(x);return _mm_movemask_ps(x);}"
113 "-msse2" "/arch:SSE2" "-hgnu")
114 gmx_find_cxxflag_for_source(CXXFLAGS_SSE2 "C++ compiler SSE2 flag"
115 "#include<xmmintrin.h>
116 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_rsqrt_ps(x);return _mm_movemask_ps(x);}"
118 "-msse2" "/arch:SSE2" "-hgnu")
120 if(NOT CFLAGS_SSE2 OR NOT CXXFLAGS_SSE2)
121 gmx_give_fatal_error_when_simd_support_not_found("SSE2" "disable SIMD support (slow)" "${SUGGEST_BINUTILS_UPDATE}")
124 set(GMX_SIMD_X86_SSE2 1)
125 set(SIMD_STATUS_MESSAGE "Enabling SSE2 SIMD instructions")
127 elseif(GMX_SIMD STREQUAL "SSE4.1")
129 # Note: MSVC enables SSE4.1 with the SSE2 flag, so we include that in testing.
130 gmx_find_cflag_for_source(CFLAGS_SSE4_1 "C compiler SSE4.1 flag"
131 "#include<smmintrin.h>
132 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_dp_ps(x,x,0x77);return _mm_movemask_ps(x);}"
134 "-msse4.1" "/arch:SSE4.1" "/arch:SSE2" "-hgnu")
135 gmx_find_cxxflag_for_source(CXXFLAGS_SSE4_1 "C++ compiler SSE4.1 flag"
136 "#include<smmintrin.h>
137 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_dp_ps(x,x,0x77);return _mm_movemask_ps(x);}"
139 "-msse4.1" "/arch:SSE4.1" "/arch:SSE2" "-hgnu")
141 if(NOT CFLAGS_SSE4_1 OR NOT CXXFLAGS_SSE4_1)
142 gmx_give_fatal_error_when_simd_support_not_found("SSE4.1" "choose SSE2 SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
145 if(CMAKE_C_COMPILER_ID MATCHES "Intel" AND CMAKE_C_COMPILER_VERSION VERSION_EQUAL "11.1")
146 message(FATAL_ERROR "You are using Intel compiler version 11.1, which produces incorrect results with SSE4.1 SIMD. You need to use a newer compiler (e.g. icc >= 12.0) or in worst case try a lower level of SIMD if performance is not critical.")
149 set(GMX_SIMD_X86_SSE4_1 1)
150 set(SIMD_STATUS_MESSAGE "Enabling SSE4.1 SIMD instructions")
152 elseif(GMX_SIMD STREQUAL "AVX_128_FMA")
154 gmx_use_clang_as_with_gnu_compilers_on_osx()
156 # AVX128/FMA on AMD is a bit complicated. We need to do detection in three stages:
157 # 1) Find the flags required for generic AVX support
158 # 2) Find the flags necessary to enable fused-multiply add support
159 # 3) Optional: Find a flag to enable the AMD XOP instructions
161 ### STAGE 1: Find the generic AVX flag
162 gmx_find_cflag_for_source(CFLAGS_AVX_128 "C compiler AVX (128 bit) flag"
163 "#include<immintrin.h>
164 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_permute_ps(x,1);return 0;}"
166 "-mavx" "/arch:AVX" "-hgnu")
167 gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128 "C++ compiler AVX (128 bit) flag"
168 "#include<immintrin.h>
169 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_permute_ps(x,1);return 0;}"
171 "-mavx" "/arch:AVX" "-hgnu")
173 ### STAGE 2: Find the fused-multiply add flag.
174 # GCC requires x86intrin.h for FMA support. MSVC 2010 requires intrin.h for FMA support.
175 check_include_file(x86intrin.h HAVE_X86INTRIN_H ${SIMD_C_FLAGS})
176 check_include_file(intrin.h HAVE_INTRIN_H ${SIMD_C_FLAGS})
178 set(INCLUDE_X86INTRIN_H "#include <x86intrin.h>")
181 set(INCLUDE_INTRIN_H "#include <xintrin.h>")
184 gmx_find_cflag_for_source(CFLAGS_AVX_128_FMA "C compiler AVX (128 bit) FMA4 flag"
185 "#include<immintrin.h>
186 ${INCLUDE_X86INTRIN_H}
188 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_macc_ps(x,x,x);return _mm_movemask_ps(x);}"
191 gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128_FMA "C++ compiler AVX (128 bit) FMA4 flag"
192 "#include<immintrin.h>
193 ${INCLUDE_X86INTRIN_H}
195 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_macc_ps(x,x,x);return _mm_movemask_ps(x);}"
199 # We only need to check the last (FMA) test; that will always fail if the basic AVX128 test failed
200 if(NOT CFLAGS_AVX_128_FMA OR NOT CXXFLAGS_AVX_128_FMA)
201 gmx_give_fatal_error_when_simd_support_not_found("128-bit AVX with FMA support" "choose SSE4.1 SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
204 ### STAGE 3: Optional: Find the XOP instruction flag (No point in yelling if this does not work)
205 gmx_find_cflag_for_source(CFLAGS_AVX_128_XOP "C compiler AVX (128 bit) XOP flag"
206 "#include<immintrin.h>
207 ${INCLUDE_X86INTRIN_H}
209 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_frcz_ps(x);return _mm_movemask_ps(x);}"
212 gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128_XOP "C++ compiler AVX (128 bit) XOP flag"
213 "#include<immintrin.h>
214 ${INCLUDE_X86INTRIN_H}
216 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_frcz_ps(x);return _mm_movemask_ps(x);}"
220 # We don't have the full compiler version string yet (BUILD_C_COMPILER),
221 # so we can't distinguish vanilla from Apple clang versions, but catering for a few rare AMD
222 # hackintoshes is not worth the effort.
223 if (APPLE AND (CMAKE_C_COMPILER_ID STREQUAL "Clang" OR
224 CMAKE_CXX_COMPILER_ID STREQUAL "Clang"))
225 message(WARNING "Due to a known compiler bug, Clang up to version 3.2 (and Apple Clang up to version 4.1) produces incorrect code with AVX_128_FMA SIMD. As we cannot work around this bug on OS X, you will have to select a different compiler or SIMD instruction set.")
229 if (GMX_USE_CLANG_C_FMA_BUG_WORKAROUND)
230 # we assume that we have an external assembler that supports AVX
231 message(STATUS "Clang ${CMAKE_C_COMPILER_VERSION} detected, enabling FMA bug workaround")
232 set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -no-integrated-as")
234 if (GMX_USE_CLANG_CXX_FMA_BUG_WORKAROUND)
235 # we assume that we have an external assembler that supports AVX
236 message(STATUS "Clang ${CMAKE_CXX_COMPILER_VERSION} detected, enabling FMA bug workaround")
237 set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -no-integrated-as")
240 gmx_test_avx_gcc_maskload_bug(GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG "${SIMD_C_FLAGS}")
242 set(GMX_SIMD_X86_AVX_128_FMA 1)
243 set(SIMD_STATUS_MESSAGE "Enabling 128-bit AVX SIMD GROMACS SIMD (with fused-multiply add)")
245 elseif(GMX_SIMD STREQUAL "AVX_256")
247 gmx_use_clang_as_with_gnu_compilers_on_osx()
249 gmx_find_cflag_for_source(CFLAGS_AVX "C compiler AVX (256 bit) flag"
250 "#include<immintrin.h>
251 int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_add_ps(x,x);return _mm256_movemask_ps(x);}"
253 "-mavx" "/arch:AVX" "-hgnu")
254 gmx_find_cxxflag_for_source(CXXFLAGS_AVX "C++ compiler AVX (256 bit) flag"
255 "#include<immintrin.h>
256 int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_add_ps(x,x);return _mm256_movemask_ps(x);}"
258 "-mavx" "/arch:AVX" "-hgnu")
260 if(NOT CFLAGS_AVX OR NOT CXXFLAGS_AVX)
261 gmx_give_fatal_error_when_simd_support_not_found("AVX" "choose SSE4.1 SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
264 gmx_test_avx_gcc_maskload_bug(GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG "${SIMD_C_FLAGS}")
266 set(GMX_SIMD_X86_AVX_256 1)
267 set(SIMD_STATUS_MESSAGE "Enabling 256-bit AVX SIMD instructions")
269 elseif(GMX_SIMD STREQUAL "AVX2_256")
271 gmx_use_clang_as_with_gnu_compilers_on_osx()
273 gmx_find_cflag_for_source(CFLAGS_AVX2 "C compiler AVX2 flag"
274 "#include<immintrin.h>
275 int main(){__m256i x=_mm256_set1_epi32(5);x=_mm256_add_epi32(x,x);return _mm256_movemask_epi8(x);}"
277 "-march=core-avx2" "-mavx2" "/arch:AVX" "-hgnu") # no AVX2-specific flag for MSVC yet
278 gmx_find_cxxflag_for_source(CXXFLAGS_AVX2 "C++ compiler AVX2 flag"
279 "#include<immintrin.h>
280 int main(){__m256i x=_mm256_set1_epi32(5);x=_mm256_add_epi32(x,x);return _mm256_movemask_epi8(x);}"
282 "-march=core-avx2" "-mavx2" "/arch:AVX" "-hgnu") # no AVX2-specific flag for MSVC yet
284 if(NOT CFLAGS_AVX2 OR NOT CXXFLAGS_AVX2)
285 gmx_give_fatal_error_when_simd_support_not_found("AVX2" "choose AVX SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
288 # No need to test for Maskload bug - it was fixed before gcc added AVX2 support
290 set(GMX_SIMD_X86_AVX2_256 1)
291 set(SIMD_STATUS_MESSAGE "Enabling 256-bit AVX2 SIMD instructions")
293 elseif(GMX_SIMD STREQUAL "MIC")
295 # No flags needed. Not testing.
296 set(GMX_SIMD_X86_MIC 1)
297 set(SIMD_STATUS_MESSAGE "Enabling MIC (Xeon Phi) SIMD instructions")
299 elseif(GMX_SIMD STREQUAL "AVX_512F")
301 gmx_use_clang_as_with_gnu_compilers_on_osx()
303 gmx_find_cflag_for_source(CFLAGS_AVX_512F "C compiler AVX-512F flag"
304 "#include<immintrin.h>
305 int main(){__m512 y,x=_mm512_set1_ps(0.5);y=_mm512_fmadd_ps(x,x,x);return (int)_mm512_cmp_ps_mask(x,y,_CMP_LT_OS);}"
307 "-xMIC-AVX512" "-mavx512f" "/arch:AVX" "-hgnu") # no AVX_512F flags known for MSVC yet
308 gmx_find_cxxflag_for_source(CXXFLAGS_AVX_512F "C++ compiler AVX-512F flag"
309 "#include<immintrin.h>
310 int main(){__m512 y,x=_mm512_set1_ps(0.5);y=_mm512_fmadd_ps(x,x,x);return (int)_mm512_cmp_ps_mask(x,y,_CMP_LT_OS);}"
312 "-xMIC-AVX512" "-mavx512f" "/arch:AVX" "-hgnu") # no AVX_512F flags known for MSVC yet
314 if(NOT CFLAGS_AVX_512F OR NOT CXXFLAGS_AVX_512F)
315 gmx_give_fatal_error_when_simd_support_not_found("AVX 512F" "choose a lower level of SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
318 set(GMX_SIMD_X86_AVX_512F 1)
319 set(SIMD_STATUS_MESSAGE "Enabling 512-bit AVX-512F SIMD instructions")
321 elseif(GMX_SIMD STREQUAL "AVX_512ER")
323 gmx_use_clang_as_with_gnu_compilers_on_osx()
325 gmx_find_cflag_for_source(CFLAGS_AVX_512ER "C compiler AVX-512ER flag"
326 "#include<immintrin.h>
327 int main(){__m512 y,x=_mm512_set1_ps(0.5);y=_mm512_rsqrt28_ps(x);return (int)_mm512_cmp_ps_mask(x,y,_CMP_LT_OS);}"
329 "-xMIC-AVX512" "-mavx512er" "/arch:AVX" "-hgnu") # no AVX_512ER flags known for MSVC yet
330 gmx_find_cxxflag_for_source(CXXFLAGS_AVX_512ER "C++ compiler AVX-512ER flag"
331 "#include<immintrin.h>
332 int main(){__m512 y,x=_mm512_set1_ps(0.5);y=_mm512_rsqrt28_ps(x);return (int)_mm512_cmp_ps_mask(x,y,_CMP_LT_OS);}"
334 "-xMIC-AVX512" "-mavx512er" "/arch:AVX" "-hgnu") # no AVX_512ER flags known for MSVC yet
336 if(NOT CFLAGS_AVX_512ER OR NOT CXXFLAGS_AVX_512ER)
337 gmx_give_fatal_error_when_simd_support_not_found("AVX 512ER" "choose a lower level of SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
340 set(GMX_SIMD_X86_AVX_512ER 1)
341 set(SIMD_STATUS_MESSAGE "Enabling 512-bit AVX-512ER SIMD instructions")
343 elseif(GMX_SIMD STREQUAL "ARM_NEON")
345 gmx_find_cflag_for_source(CFLAGS_ARM_NEON "C compiler 32-bit ARM NEON flag"
346 "#include<arm_neon.h>
347 int main(){float32x4_t x=vdupq_n_f32(0.5);x=vmlaq_f32(x,x,x);return vgetq_lane_f32(x,0)>0;}"
350 gmx_find_cxxflag_for_source(CXXFLAGS_ARM_NEON "C++ compiler 32-bit ARM NEON flag"
351 "#include<arm_neon.h>
352 int main(){float32x4_t x=vdupq_n_f32(0.5);x=vmlaq_f32(x,x,x);return vgetq_lane_f32(x,0)>0;}"
354 "-mfpu=neon" "-D__STDC_CONSTANT_MACROS" "")
356 if(NOT CFLAGS_ARM_NEON OR NOT CXXFLAGS_ARM_NEON)
357 gmx_give_fatal_error_when_simd_support_not_found("ARM 32-bit NEON" "disable SIMD support (slower)" "${SUGGEST_BINUTILS_UPDATE}")
360 set(GMX_SIMD_ARM_NEON 1)
361 set(SIMD_STATUS_MESSAGE "Enabling 32-bit ARM NEON SIMD instructions")
363 elseif(GMX_SIMD STREQUAL "ARM_NEON_ASIMD")
364 # Gcc-4.8.1 appears to have a bug where the c++ compiler requires
365 # -D__STDC_CONSTANT_MACROS if we include arm_neon.h
367 gmx_find_cflag_for_source(CFLAGS_ARM_NEON_ASIMD "C compiler ARM NEON Advanced SIMD flag"
368 "#include<arm_neon.h>
369 int main(){float64x2_t x=vdupq_n_f64(0.5);x=vfmaq_f64(x,x,x);return vgetq_lane_f64(x,0)>0;}"
372 gmx_find_cxxflag_for_source(CXXFLAGS_ARM_NEON_ASIMD "C++ compiler ARM NEON Advanced SIMD flag"
373 "#include<arm_neon.h>
374 int main(){float64x2_t x=vdupq_n_f64(0.5);x=vfmaq_f64(x,x,x);return vgetq_lane_f64(x,0)>0;}"
376 "-D__STDC_CONSTANT_MACROS" "")
378 if(NOT CFLAGS_ARM_NEON_ASIMD OR NOT CXXFLAGS_ARM_NEON_ASIMD)
379 gmx_give_fatal_error_when_simd_support_not_found("ARM (AArch64) NEON Advanced SIMD" "disable SIMD support (slower)" "${SUGGEST_BINUTILS_UPDATE}")
382 if(CMAKE_C_COMPILER_ID MATCHES "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS "4.9")
383 message(WARNING "At least gcc-4.8.1 has many bugs for ARM (AArch64) NEON Advanced SIMD compilation. You might need gcc version 4.9 or later.")
386 if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_LESS "3.4")
387 message(FATAL_ERROR "Clang version 3.4 or later is required for ARM (AArch64) NEON Advanced SIMD.")
390 set(GMX_SIMD_ARM_NEON_ASIMD 1)
391 set(SIMD_STATUS_MESSAGE "Enabling ARM (AArch64) NEON Advanced SIMD instructions")
393 elseif(GMX_SIMD STREQUAL "IBM_QPX")
395 try_compile(TEST_QPX ${CMAKE_BINARY_DIR}
396 "${CMAKE_SOURCE_DIR}/cmake/TestQPX.c")
399 message(WARNING "IBM QPX SIMD instructions selected. This will work, but SIMD kernels are only available for the Verlet cut-off scheme. The plain C kernels that are used for the group cut-off scheme kernels will be slow, so please consider using the Verlet cut-off scheme.")
400 set(GMX_SIMD_IBM_QPX 1)
401 set(SIMD_STATUS_MESSAGE "Enabling IBM QPX SIMD instructions")
404 gmx_give_fatal_error_when_simd_support_not_found("IBM QPX" "or 'cmake .. -DCMAKE_TOOLCHAIN_FILE=Platform/BlueGeneQ-static-XL-CXX' to set up the tool chain" "${SUGGEST_BINUTILS_UPDATE}")
407 elseif(GMX_SIMD STREQUAL "IBM_VMX")
409 gmx_find_cflag_for_source(CFLAGS_IBM_VMX "C compiler IBM VMX SIMD flag"
411 int main(){vector float x,y=vec_ctf(vec_splat_s32(1),0);x=vec_madd(y,y,y);return vec_all_ge(y,x);}"
413 "-maltivec -mabi=altivec" "-qarch=auto -qaltivec")
414 gmx_find_cxxflag_for_source(CXXFLAGS_IBM_VMX "C++ compiler IBM VMX SIMD flag"
416 int main(){vector float x,y=vec_ctf(vec_splat_s32(1),0);x=vec_madd(y,y,y);return vec_all_ge(y,x);}"
418 "-maltivec -mabi=altivec" "-qarch=auto -qaltivec")
420 if(NOT CFLAGS_IBM_VMX OR NOT CXXFLAGS_IBM_VMX)
421 gmx_give_fatal_error_when_simd_support_not_found("IBM VMX" "disable SIMD support (slower)" "${SUGGEST_BINUTILS_UPDATE}")
424 set(GMX_SIMD_IBM_VMX 1)
425 set(SIMD_STATUS_MESSAGE "Enabling IBM VMX SIMD instructions")
427 elseif(GMX_SIMD STREQUAL "IBM_VSX")
429 # Altivec was originally single-only, and it took a while for compilers
430 # to support the double-precision features in VSX.
431 if(GMX_DOUBLE AND CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9")
432 message(FATAL_ERROR "Using VSX SIMD in double precision with GCC requires GCC-4.9 or later.")
435 gmx_find_cflag_for_source(CFLAGS_IBM_VSX "C compiler IBM VSX SIMD flag"
437 int main(){vector double x,y=vec_splats(1.0);x=vec_madd(y,y,y);return vec_all_ge(y,x);}"
439 "-mvsx" "-maltivec -mabi=altivec" "-qarch=auto -qaltivec")
440 gmx_find_cxxflag_for_source(CXXFLAGS_IBM_VSX "C++ compiler IBM VSX SIMD flag"
442 int main(){vector double x,y=vec_splats(1.0);x=vec_madd(y,y,y);return vec_all_ge(y,x);}"
444 "-mvsx" "-maltivec -mabi=altivec" "-qarch=auto -qaltivec")
446 if(NOT CFLAGS_IBM_VSX OR NOT CXXFLAGS_IBM_VSX)
447 gmx_give_fatal_error_when_simd_support_not_found("IBM VSX" "disable SIMD support (slower)" "${SUGGEST_BINUTILS_UPDATE}")
450 set(GMX_SIMD_IBM_VSX 1)
451 set(SIMD_STATUS_MESSAGE "Enabling IBM VSX SIMD instructions")
453 elseif(GMX_SIMD STREQUAL "SPARC64_HPC_ACE")
455 # Note that GMX_RELAXED_DOUBLE_PRECISION is enabled by default in the top-level CMakeLists.txt
457 set(GMX_SIMD_SPARC64_HPC_ACE 1)
458 set(SIMD_STATUS_MESSAGE "Enabling Sparc64 HPC-ACE SIMD instructions")
460 elseif(GMX_SIMD STREQUAL "REFERENCE")
462 # NB: This file handles settings for the SIMD module, so in the interest
463 # of proper modularization, please do NOT put any verlet kernel settings in this file.
465 if(GMX_SIMD_REF_FLOAT_WIDTH)
466 add_definitions(-DGMX_SIMD_REF_FLOAT_WIDTH=${GMX_SIMD_REF_FLOAT_WIDTH})
468 if(GMX_SIMD_REF_DOUBLE_WIDTH)
469 add_definitions(-DGMX_SIMD_REF_DOUBLE_WIDTH=${GMX_SIMD_REF_DOUBLE_WIDTH})
472 set(GMX_SIMD_REFERENCE 1)
473 set(SIMD_STATUS_MESSAGE "Enabling reference (emulated) SIMD instructions.")
476 gmx_invalid_option_value(GMX_SIMD)
480 gmx_check_if_changed(SIMD_CHANGED GMX_SIMD)
481 if (SIMD_CHANGED AND DEFINED SIMD_STATUS_MESSAGE)
482 message(STATUS "${SIMD_STATUS_MESSAGE}")
485 # By default, 32-bit windows cannot pass SIMD (SSE/AVX) arguments in registers,
486 # and even on 64-bit (all platforms) it is only used for a handful of arguments.
487 # The __vectorcall (MSVC, from MSVC2013) or __regcall (ICC) calling conventions
488 # enable this, which is critical to enable 32-bit SIMD and improves performance
490 # Check if the compiler supports one of these, and in that case set gmx_simdcall
491 # to that string. If we do not have any such calling convention modifier, set it
492 # to an empty string.
493 if(NOT DEFINED GMX_SIMD_CALLING_CONVENTION)
494 foreach(callconv __vectorcall __regcall "")
495 set(callconv_compile_var "_callconv_${callconv}")
496 check_c_source_compiles("int ${callconv} f(int i) {return i;} int main(void) {return f(0);}" ${callconv_compile_var})
497 if(${callconv_compile_var})
498 set(GMX_SIMD_CALLING_CONVENTION "${callconv}" CACHE INTERNAL "Calling convention for SIMD routines" FORCE)