From 570db57b3ce38bd13c1ed3d13d1a34264f42e590 Mon Sep 17 00:00:00 2001 From: Erik Lindahl Date: Tue, 14 Mar 2017 14:35:35 +0100 Subject: [PATCH] 128-bit AVX2 SIMD for AMD Ryzen While Ryzen supports 256-bit AVX2, the internal units are organized to execute either a single 256-bit instruction or two 128-bit SIMD instruction per cycle. Since most of our kernels are slightly less efficient for wider SIMD, this improves performance by roughly 10%. Change-Id: Ie601b1dbe13d70334cdf9284e236ad9132951ec9 --- CMakeLists.txt | 2 +- cmake/gmxDetectSimd.cmake | 8 +- cmake/gmxManageSimd.cmake | 9 ++- src/config.h.cmakein | 5 +- src/gromacs/hardware/cpuinfo.cpp | 11 +++ src/gromacs/hardware/cpuinfo.h | 4 +- .../simd/impl_x86_avx2_128/impl_x86_avx2_128.h | 48 ++++++++++++ .../impl_x86_avx2_128_definitions.h | 82 +++++++++++++++++++ .../impl_x86_avx2_128/impl_x86_avx2_128_general.h | 41 ++++++++++ .../impl_x86_avx2_128_simd4_double.h | 41 ++++++++++ .../impl_x86_avx2_128_simd4_float.h | 41 ++++++++++ .../impl_x86_avx2_128_simd_double.h | 89 +++++++++++++++++++++ .../impl_x86_avx2_128_simd_float.h | 90 +++++++++++++++++++++ .../impl_x86_avx2_128_util_double.h | 91 ++++++++++++++++++++++ .../impl_x86_avx2_128_util_float.h | 83 ++++++++++++++++++++ src/gromacs/simd/simd.h | 2 + src/gromacs/simd/support.cpp | 11 ++- src/gromacs/simd/support.h | 3 +- 18 files changed, 650 insertions(+), 11 deletions(-) create mode 100644 src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128.h create mode 100644 src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_definitions.h create mode 100644 src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_general.h create mode 100644 src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_simd4_double.h create mode 100644 src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_simd4_float.h create mode 100644 src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_simd_double.h create mode 100644 src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_simd_float.h create mode 100644 src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_util_double.h create mode 100644 src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_util_float.h diff --git a/CMakeLists.txt b/CMakeLists.txt index a8cfd33cfe..e874efb8dc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -243,7 +243,7 @@ gmx_option_multichoice( GMX_SIMD "SIMD instruction set for CPU kernels and compiler optimization" "${GMX_SUGGESTED_SIMD}" - None SSE2 SSE4.1 AVX_128_FMA AVX_256 AVX2_256 AVX_512 AVX_512_KNL MIC ARM_NEON ARM_NEON_ASIMD IBM_QPX IBM_VMX IBM_VSX Sparc64_HPC_ACE Reference) + None SSE2 SSE4.1 AVX_128_FMA AVX_256 AVX2_256 AVX2_128 AVX_512 AVX_512_KNL MIC ARM_NEON ARM_NEON_ASIMD IBM_QPX IBM_VMX IBM_VSX Sparc64_HPC_ACE Reference) if(GMX_TARGET_MIC) set(GMX_FFT_LIBRARY_DEFAULT "mkl") diff --git a/cmake/gmxDetectSimd.cmake b/cmake/gmxDetectSimd.cmake index fdf64df5f8..44dc6884bf 100644 --- a/cmake/gmxDetectSimd.cmake +++ b/cmake/gmxDetectSimd.cmake @@ -1,7 +1,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by +# Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -113,7 +113,11 @@ function(gmx_suggest_simd _suggested_simd) elseif(OUTPUT_TMP MATCHES " avx512f ") set(OUTPUT_SIMD "AVX_512") elseif(OUTPUT_TMP MATCHES " avx2 ") - set(OUTPUT_SIMD "AVX2_256") + if(OUTPUT_TMP MATCHES " amd ") + set(OUTPUT_SIMD "AVX2_128") + else() + set(OUTPUT_SIMD "AVX2_256") + endif() elseif(OUTPUT_TMP MATCHES " avx ") if(OUTPUT_TMP MATCHES " fma4 ") # AMD that works better with avx-128-fma diff --git a/cmake/gmxManageSimd.cmake b/cmake/gmxManageSimd.cmake index 85249369cf..5b34c289dd 100644 --- a/cmake/gmxManageSimd.cmake +++ b/cmake/gmxManageSimd.cmake @@ -270,7 +270,7 @@ elseif(GMX_SIMD STREQUAL "AVX_256") set(GMX_SIMD_X86_${GMX_SIMD} 1) set(SIMD_STATUS_MESSAGE "Enabling 256-bit AVX SIMD instructions") -elseif(GMX_SIMD STREQUAL "AVX2_256") +elseif(GMX_SIMD MATCHES "AVX2_") prepare_x86_toolchain(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS) @@ -288,7 +288,12 @@ elseif(GMX_SIMD STREQUAL "AVX2_256") set(SIMD_C_FLAGS "${TOOLCHAIN_C_FLAGS}") set(SIMD_CXX_FLAGS "${TOOLCHAIN_CXX_FLAGS}") set(GMX_SIMD_X86_${GMX_SIMD} 1) - set(SIMD_STATUS_MESSAGE "Enabling 256-bit AVX2 SIMD instructions") + + if(GMX_SIMD STREQUAL "AVX2_128") + set(SIMD_STATUS_MESSAGE "Enabling 128-bit AVX2 SIMD instructions") + else() + set(SIMD_STATUS_MESSAGE "Enabling 256-bit AVX2 SIMD instructions") + endif() elseif(GMX_SIMD STREQUAL "MIC") diff --git a/src/config.h.cmakein b/src/config.h.cmakein index 587490c59a..c43bd2005a 100644 --- a/src/config.h.cmakein +++ b/src/config.h.cmakein @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2009,2010,2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by + * Copyright (c) 2009,2010,2011,2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -89,6 +89,9 @@ /* AVX2 256-bit SIMD instruction set level was selected */ #cmakedefine01 GMX_SIMD_X86_AVX2_256 +/* AVX2 128-bit SIMD instruction set level was selected */ +#cmakedefine01 GMX_SIMD_X86_AVX2_128 + /* MIC (Xeon Phi) SIMD instruction set level was selected */ #cmakedefine01 GMX_SIMD_X86_MIC diff --git a/src/gromacs/hardware/cpuinfo.cpp b/src/gromacs/hardware/cpuinfo.cpp index cdb3ae7ffb..4975918caa 100644 --- a/src/gromacs/hardware/cpuinfo.cpp +++ b/src/gromacs/hardware/cpuinfo.cpp @@ -876,6 +876,15 @@ CpuInfo CpuInfo::detect() defined __x86_64__ || defined __amd64__ || defined _M_X64 || defined _M_AMD64 result.vendor_ = detectX86Vendor(); + + if (result.vendor_ == CpuInfo::Vendor::Intel) + { + result.features_.insert(CpuInfo::Feature::X86_Intel); + } + else if (result.vendor_ == CpuInfo::Vendor::Amd) + { + result.features_.insert(CpuInfo::Feature::X86_Amd); + } detectX86Features(&result.brandString_, &result.family_, &result.model_, &result.stepping_, &result.features_); result.logicalProcessors_ = detectX86LogicalProcessors(); @@ -944,6 +953,7 @@ const std::map CpuInfo::s_featureStrings_ = { { CpuInfo::Feature::X86_Aes, "aes" }, + { CpuInfo::Feature::X86_Amd, "amd" }, { CpuInfo::Feature::X86_Apic, "apic" }, { CpuInfo::Feature::X86_Avx, "avx" }, { CpuInfo::Feature::X86_Avx2, "avx2" }, @@ -962,6 +972,7 @@ CpuInfo::s_featureStrings_ = { CpuInfo::Feature::X86_Fma4, "fma4" }, { CpuInfo::Feature::X86_Hle, "hle" }, { CpuInfo::Feature::X86_Htt, "htt" }, + { CpuInfo::Feature::X86_Intel, "intel" }, { CpuInfo::Feature::X86_Lahf, "lahf" }, { CpuInfo::Feature::X86_MisalignSse, "misalignsse" }, { CpuInfo::Feature::X86_Mmx, "mmx" }, diff --git a/src/gromacs/hardware/cpuinfo.h b/src/gromacs/hardware/cpuinfo.h index 7ea85c1845..676321a4a7 100644 --- a/src/gromacs/hardware/cpuinfo.h +++ b/src/gromacs/hardware/cpuinfo.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2015,2016, by the GROMACS development team, led by + * Copyright (c) 2015,2016,2017, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -97,6 +97,7 @@ class CpuInfo enum class Feature { X86_Aes, //!< x86 advanced encryption standard accel. + X86_Amd, //!< This is an AMD x86 processor X86_Apic, //!< APIC support X86_Avx, //!< Advanced vector extensions X86_Avx2, //!< AVX2 including gather support (not used yet) @@ -115,6 +116,7 @@ class CpuInfo X86_Fma4, //!< 4-operand FMA, only on AMD for now X86_Hle, //!< Hardware lock elision X86_Htt, //!< Hyper-Threading supported (but maybe not enabled) + X86_Intel, //!< This is an Intel x86 processor X86_Lahf, //!< LAHF/SAHF support in 64 bits X86_MisalignSse, //!< Support for misaligned SSE data instructions X86_Mmx, //!< MMX registers and instructions diff --git a/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128.h b/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128.h new file mode 100644 index 0000000000..2ee27a3736 --- /dev/null +++ b/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128.h @@ -0,0 +1,48 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +#ifndef GMX_SIMD_IMPL_X86_AVX2_128_H +#define GMX_SIMD_IMPL_X86_AVX2_128_H + +#include "impl_x86_avx2_128_definitions.h" +#include "impl_x86_avx2_128_general.h" +#include "impl_x86_avx2_128_simd4_double.h" +#include "impl_x86_avx2_128_simd4_float.h" +#include "impl_x86_avx2_128_simd_double.h" +#include "impl_x86_avx2_128_simd_float.h" +#include "impl_x86_avx2_128_util_double.h" +#include "impl_x86_avx2_128_util_float.h" + +#endif // GMX_SIMD_IMPL_X86_AVX2_128_H diff --git a/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_definitions.h b/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_definitions.h new file mode 100644 index 0000000000..49182e22f9 --- /dev/null +++ b/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_definitions.h @@ -0,0 +1,82 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +#ifndef GMX_SIMD_IMPL_X86_AVX2_128_DEFINITIONS_H +#define GMX_SIMD_IMPL_X86_AVX2_128_DEFINITIONS_H + +// Capability definitions for (mostly) 128-bit AVX2 +#define GMX_SIMD 1 +#define GMX_SIMD_HAVE_FLOAT 1 +#define GMX_SIMD_HAVE_DOUBLE 1 +#define GMX_SIMD_HAVE_LOADU 1 +#define GMX_SIMD_HAVE_STOREU 1 +#define GMX_SIMD_HAVE_LOGICAL 1 +#define GMX_SIMD_HAVE_FMA 1 +#define GMX_SIMD_HAVE_FINT32_EXTRACT 1 +#define GMX_SIMD_HAVE_FINT32_LOGICAL 1 +#define GMX_SIMD_HAVE_FINT32_ARITHMETICS 1 +#define GMX_SIMD_HAVE_DINT32_EXTRACT 1 +#define GMX_SIMD_HAVE_DINT32_LOGICAL 1 +#define GMX_SIMD_HAVE_DINT32_ARITHMETICS 1 +#define GMX_SIMD_HAVE_NATIVE_COPYSIGN_FLOAT 0 +#define GMX_SIMD_HAVE_NATIVE_RSQRT_ITER_FLOAT 0 +#define GMX_SIMD_HAVE_NATIVE_RCP_ITER_FLOAT 0 +#define GMX_SIMD_HAVE_NATIVE_LOG_FLOAT 0 +#define GMX_SIMD_HAVE_NATIVE_EXP2_FLOAT 0 +#define GMX_SIMD_HAVE_NATIVE_EXP_FLOAT 0 +#define GMX_SIMD_HAVE_NATIVE_COPYSIGN_DOUBLE 0 +#define GMX_SIMD_HAVE_NATIVE_RSQRT_ITER_DOUBLE 0 +#define GMX_SIMD_HAVE_NATIVE_RCP_ITER_DOUBLE 0 +#define GMX_SIMD_HAVE_NATIVE_LOG_DOUBLE 0 +#define GMX_SIMD_HAVE_NATIVE_EXP2_DOUBLE 0 +#define GMX_SIMD_HAVE_NATIVE_EXP_DOUBLE 0 +#define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT 1 +#define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE 1 +#define GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT 0 // No need for half-simd, width is 4 +#define GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE 0 // No need for half-simd, width is 2 + +#define GMX_SIMD4_HAVE_FLOAT 1 +#define GMX_SIMD4_HAVE_DOUBLE 1 + +// Implementation details +#define GMX_SIMD_FLOAT_WIDTH 4 +#define GMX_SIMD_DOUBLE_WIDTH 2 +#define GMX_SIMD_FINT32_WIDTH 4 +#define GMX_SIMD_DINT32_WIDTH 2 +#define GMX_SIMD4_WIDTH 4 +#define GMX_SIMD_RSQRT_BITS 11 +#define GMX_SIMD_RCP_BITS 11 + +#endif // GMX_SIMD_IMPL_X86_AVX2_128_DEFINITIONS_H diff --git a/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_general.h b/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_general.h new file mode 100644 index 0000000000..788fe872af --- /dev/null +++ b/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_general.h @@ -0,0 +1,41 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +#ifndef GMX_SIMD_IMPL_X86_AVX2_128_GENERAL_H +#define GMX_SIMD_IMPL_X86_AVX2_128_GENERAL_H + +#include "gromacs/simd/impl_x86_avx_256/impl_x86_avx_256_general.h" + +#endif // GMX_SIMD_IMPL_X86_AVX2_128_GENERAL_H diff --git a/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_simd4_double.h b/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_simd4_double.h new file mode 100644 index 0000000000..dbf743b037 --- /dev/null +++ b/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_simd4_double.h @@ -0,0 +1,41 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +#ifndef GMX_SIMD_IMPL_X86_AVX2_128_SIMD4_DOUBLE_H +#define GMX_SIMD_IMPL_X86_AVX2_128_SIMD4_DOUBLE_H + +#include "gromacs/simd/impl_x86_avx2_256/impl_x86_avx2_256_simd4_double.h" + +#endif // GMX_SIMD_IMPL_X86_AVX2_128_SIMD4_DOUBLE_H diff --git a/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_simd4_float.h b/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_simd4_float.h new file mode 100644 index 0000000000..69a6f668a6 --- /dev/null +++ b/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_simd4_float.h @@ -0,0 +1,41 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +#ifndef GMX_SIMD_IMPL_X86_AVX2_128_SIMD4_FLOAT_H +#define GMX_SIMD_IMPL_X86_AVX2_128_SIMD4_FLOAT_H + +#include "gromacs/simd/impl_x86_avx2_256/impl_x86_avx2_256_simd4_float.h" + +#endif // GMX_SIMD_IMPL_X86_AVX2_128_SIMD4_FLOAT_H diff --git a/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_simd_double.h b/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_simd_double.h new file mode 100644 index 0000000000..8fb554c411 --- /dev/null +++ b/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_simd_double.h @@ -0,0 +1,89 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +#ifndef GMX_SIMD_IMPL_X86_AVX2_128_SIMD_DOUBLE_H +#define GMX_SIMD_IMPL_X86_AVX2_128_SIMD_DOUBLE_H + +#include "config.h" + +#include + +#include "gromacs/simd/impl_x86_sse4_1/impl_x86_sse4_1_simd_double.h" + +namespace gmx +{ + +static inline double gmx_simdcall +reduce(SimdDouble a) +{ + __m128d b = _mm_add_sd(a.simdInternal_, _mm_permute_pd(a.simdInternal_, _MM_SHUFFLE2(1, 1))); + return *reinterpret_cast(&b); +} + +static inline SimdDouble gmx_simdcall +fma(SimdDouble a, SimdDouble b, SimdDouble c) +{ + return { + _mm_fmadd_pd(a.simdInternal_, b.simdInternal_, c.simdInternal_) + }; +} + +static inline SimdDouble gmx_simdcall +fms(SimdDouble a, SimdDouble b, SimdDouble c) +{ + return { + _mm_fmsub_pd(a.simdInternal_, b.simdInternal_, c.simdInternal_) + }; +} + +static inline SimdDouble gmx_simdcall +fnma(SimdDouble a, SimdDouble b, SimdDouble c) +{ + return { + _mm_fnmadd_pd(a.simdInternal_, b.simdInternal_, c.simdInternal_) + }; +} + +static inline SimdDouble gmx_simdcall +fnms(SimdDouble a, SimdDouble b, SimdDouble c) +{ + return { + _mm_fnmsub_pd(a.simdInternal_, b.simdInternal_, c.simdInternal_) + }; +} + +} // namespace gmx + +#endif // GMX_SIMD_IMPL_X86_AVX2_128_SIMD_DOUBLE_H diff --git a/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_simd_float.h b/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_simd_float.h new file mode 100644 index 0000000000..eda4881e96 --- /dev/null +++ b/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_simd_float.h @@ -0,0 +1,90 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +#ifndef GMX_SIMD_IMPL_X86_AVX2_128_SIMD_FLOAT_H +#define GMX_SIMD_IMPL_X86_AVX2_128_SIMD_FLOAT_H + +#include "config.h" + +#include + +#include "gromacs/simd/impl_x86_sse4_1/impl_x86_sse4_1_simd_float.h" + +namespace gmx +{ + +static inline float gmx_simdcall +reduce(SimdFloat a) +{ + a.simdInternal_ = _mm_add_ps(a.simdInternal_, _mm_permute_ps(a.simdInternal_, _MM_SHUFFLE(1, 0, 3, 2))); + a.simdInternal_ = _mm_add_ss(a.simdInternal_, _mm_permute_ps(a.simdInternal_, _MM_SHUFFLE(0, 3, 2, 1))); + return *reinterpret_cast(&a); +} + +static inline SimdFloat gmx_simdcall +fma(SimdFloat a, SimdFloat b, SimdFloat c) +{ + return { + _mm_fmadd_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_) + }; +} + +static inline SimdFloat gmx_simdcall +fms(SimdFloat a, SimdFloat b, SimdFloat c) +{ + return { + _mm_fmsub_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_) + }; +} + +static inline SimdFloat gmx_simdcall +fnma(SimdFloat a, SimdFloat b, SimdFloat c) +{ + return { + _mm_fnmadd_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_) + }; +} + +static inline SimdFloat gmx_simdcall +fnms(SimdFloat a, SimdFloat b, SimdFloat c) +{ + return { + _mm_fnmsub_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_) + }; +} + +} // namespace gmx + +#endif // GMX_SIMD_IMPL_X86_AVX2_128_SIMD_FLOAT_H diff --git a/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_util_double.h b/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_util_double.h new file mode 100644 index 0000000000..01bfa6c59d --- /dev/null +++ b/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_util_double.h @@ -0,0 +1,91 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +#ifndef GMX_SIMD_IMPL_X86_AVX2_128_UTIL_DOUBLE_H +#define GMX_SIMD_IMPL_X86_AVX2_128_UTIL_DOUBLE_H + +#include "config.h" + +#include + +#include "gromacs/simd/impl_x86_sse4_1/impl_x86_sse4_1_util_double.h" + +namespace gmx +{ + +static inline void gmx_simdcall +expandScalarsToTriplets(SimdDouble scalar, + SimdDouble * triplets0, + SimdDouble * triplets1, + SimdDouble * triplets2) +{ + triplets0->simdInternal_ = _mm_permute_pd(scalar.simdInternal_, _MM_SHUFFLE2(0, 0)); + triplets1->simdInternal_ = _mm_permute_pd(scalar.simdInternal_, _MM_SHUFFLE2(1, 0)); + triplets2->simdInternal_ = _mm_permute_pd(scalar.simdInternal_, _MM_SHUFFLE2(1, 1)); +} + +static inline double +reduceIncr4ReturnSum(double * m, + SimdDouble v0, + SimdDouble v1, + SimdDouble v2, + SimdDouble v3) +{ + __m128d t1, t2, t3, t4; + + t1 = _mm_unpacklo_pd(v0.simdInternal_, v1.simdInternal_); + t2 = _mm_unpackhi_pd(v0.simdInternal_, v1.simdInternal_); + t3 = _mm_unpacklo_pd(v2.simdInternal_, v3.simdInternal_); + t4 = _mm_unpackhi_pd(v2.simdInternal_, v3.simdInternal_); + + t1 = _mm_add_pd(t1, t2); + t3 = _mm_add_pd(t3, t4); + + assert(std::size_t(m) % 16 == 0); + + t2 = _mm_add_pd(t1, _mm_load_pd(m)); + t4 = _mm_add_pd(t3, _mm_load_pd(m + 2)); + _mm_store_pd(m, t2); + _mm_store_pd(m + 2, t4); + + t1 = _mm_add_pd(t1, t3); + + t2 = _mm_add_sd(t1, _mm_permute_pd(t1, _MM_SHUFFLE2(1, 1))); + return *reinterpret_cast(&t2); +} + +} // namespace gmx + +#endif // GMX_SIMD_IMPL_X86_AVX2_128_UTIL_DOUBLE_H diff --git a/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_util_float.h b/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_util_float.h new file mode 100644 index 0000000000..f22be079d3 --- /dev/null +++ b/src/gromacs/simd/impl_x86_avx2_128/impl_x86_avx2_128_util_float.h @@ -0,0 +1,83 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +#ifndef GMX_SIMD_IMPL_X86_AVX2_128_UTIL_FLOAT_H +#define GMX_SIMD_IMPL_X86_AVX2_128_UTIL_FLOAT_H + +#include "config.h" + +#include + +#include "gromacs/simd/impl_x86_sse4_1/impl_x86_sse4_1_util_float.h" + +namespace gmx +{ + +static inline void gmx_simdcall +expandScalarsToTriplets(SimdFloat scalar, + SimdFloat * triplets0, + SimdFloat * triplets1, + SimdFloat * triplets2) +{ + triplets0->simdInternal_ = _mm_permute_ps(scalar.simdInternal_, _MM_SHUFFLE(1, 0, 0, 0)); + triplets1->simdInternal_ = _mm_permute_ps(scalar.simdInternal_, _MM_SHUFFLE(2, 2, 1, 1)); + triplets2->simdInternal_ = _mm_permute_ps(scalar.simdInternal_, _MM_SHUFFLE(3, 3, 3, 2)); +} + +static inline float gmx_simdcall +reduceIncr4ReturnSum(float * m, + SimdFloat v0, + SimdFloat v1, + SimdFloat v2, + SimdFloat v3) +{ + _MM_TRANSPOSE4_PS(v0.simdInternal_, v1.simdInternal_, v2.simdInternal_, v3.simdInternal_); + v0.simdInternal_ = _mm_add_ps(v0.simdInternal_, v1.simdInternal_); + v2.simdInternal_ = _mm_add_ps(v2.simdInternal_, v3.simdInternal_); + v0.simdInternal_ = _mm_add_ps(v0.simdInternal_, v2.simdInternal_); + + assert(std::size_t(m) % 16 == 0); + + v2.simdInternal_ = _mm_add_ps(v0.simdInternal_, _mm_load_ps(m)); + _mm_store_ps(m, v2.simdInternal_); + + __m128 b = _mm_add_ps(v0.simdInternal_, _mm_permute_ps(v0.simdInternal_, _MM_SHUFFLE(1, 0, 3, 2))); + b = _mm_add_ss(b, _mm_permute_ps(b, _MM_SHUFFLE(0, 3, 2, 1))); + return *reinterpret_cast(&b); +} + +} // namespace gmx + +#endif // GMX_SIMD_IMPL_X86_AVX2_128_UTIL_FLOAT_H diff --git a/src/gromacs/simd/simd.h b/src/gromacs/simd/simd.h index 81c8c84c58..f22022dd4d 100644 --- a/src/gromacs/simd/simd.h +++ b/src/gromacs/simd/simd.h @@ -107,6 +107,8 @@ # include "impl_x86_avx_256/impl_x86_avx_256.h" #elif GMX_SIMD_X86_AVX2_256 # include "impl_x86_avx2_256/impl_x86_avx2_256.h" +#elif GMX_SIMD_X86_AVX2_128 +# include "impl_x86_avx2_128/impl_x86_avx2_128.h" #elif GMX_SIMD_X86_MIC # include "impl_x86_mic/impl_x86_mic.h" #elif GMX_SIMD_X86_AVX_512 diff --git a/src/gromacs/simd/support.cpp b/src/gromacs/simd/support.cpp index 503e93bd14..3ba4f74d7a 100644 --- a/src/gromacs/simd/support.cpp +++ b/src/gromacs/simd/support.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2015,2016, by the GROMACS development team, led by + * Copyright (c) 2015,2016,2017, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -74,6 +74,7 @@ simdString(SimdType s) { SimdType::X86_Avx128Fma, "AVX_128_FMA" }, { SimdType::X86_Avx, "AVX_256" }, { SimdType::X86_Avx2, "AVX2_256" }, + { SimdType::X86_Avx2_128, "AVX2_128" }, { SimdType::X86_Avx512, "AVX_512" }, { SimdType::X86_Avx512Knl, "AVX_512_KNL" }, { SimdType::X86_Mic, "X86_MIC" }, @@ -126,8 +127,10 @@ simdSuggested(const CpuInfo &c) case CpuInfo::Vendor::Amd: if (c.feature(CpuInfo::Feature::X86_Avx2)) { - // When Amd starts supporting Avx2 we assume it will be 256 bits - suggested = SimdType::X86_Avx2; + // AMD Ryzen supports 256-bit AVX2, but performs better with 128-bit + // since it can execute two independent such instructions per cycle, + // and wider SIMD has slightly lower efficiency in GROMACS. + suggested = SimdType::X86_Avx2_128; } else if (c.feature(CpuInfo::Feature::X86_Avx)) { @@ -199,6 +202,8 @@ simdCompiled() return SimdType::X86_Mic; #elif GMX_SIMD_X86_AVX2_256 return SimdType::X86_Avx2; +#elif GMX_SIMD_X86_AVX2_128 + return SimdType::X86_Avx2_128; #elif GMX_SIMD_X86_AVX_256 return SimdType::X86_Avx; #elif GMX_SIMD_X86_AVX_128_FMA diff --git a/src/gromacs/simd/support.h b/src/gromacs/simd/support.h index 9a5a53a0c7..5e223804ee 100644 --- a/src/gromacs/simd/support.h +++ b/src/gromacs/simd/support.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2015,2016, by the GROMACS development team, led by + * Copyright (c) 2015,2016,2017, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -64,6 +64,7 @@ enum class SimdType X86_Avx128Fma, //!< 128-bit Avx with FMA (Amd) X86_Avx, //!< 256-bit Avx X86_Avx2, //!< AVX2 + X86_Avx2_128, //!< 128-bit AVX2, better than 256-bit for AMD Ryzen X86_Avx512, //!< AVX_512 X86_Avx512Knl, //!< AVX_512_KNL X86_Mic, //!< Knight's corner -- 2.11.4.GIT