From 82e7490a9939ba1fa86eb710f867b5e7a2721d67 Mon Sep 17 00:00:00 2001
From: Erik Lindahl <erik@kth.se>
Date: Fri, 5 Aug 2016 12:05:26 +0200
Subject: [PATCH] Bump oldest cmake, compiler and CUDA versions required

For release 2017 we now require gcc-4.8.1, clang-3.3 and icc-15, so we
can rely on full C++11 support. We now also require CUDA-6.5 and
CMake-3.4.3. This in turn means we can remove some older cmake tests,
and since many users won't read the install guide there are now
version tests that produce fatal errors during CMake configuration for
compiler versions we know are too old.

Various hacks and workarounds in source and build system can
now be removed.

Fixed error in previous definition of GMX_ALIGNED

Updated C++11 compatibility tests in line with code we are now using.

Fixed that hwloc includes were not treated as system headers.

Treated including thread-MPI and TNG header files as system paths, to
be consistent with how we'd treat them if we were using external
versions of these.

Fixed icc 16 warnings in lapack routines.

Fixes #2012.

Change-Id: I36e02379a985b22c72f0f06481c65cae6e780c02
---
 CMakeLists.txt                                     | 54 +++++------------
 admin/builds/pre-submit-matrix.txt                 | 12 ++--
 admin/builds/release-matrix.txt                    |  4 +-
 cmake/TestAVXMaskload.c                            | 18 ------
 cmake/TestClangVersion.c                           | 18 ------
 cmake/gmxCFlags.cmake                              | 15 ++++-
 cmake/gmxManageGPU.cmake                           | 37 ++----------
 cmake/gmxManageLinearAlgebraLibraries.cmake        |  7 +--
 cmake/gmxManageMPI.cmake                           |  9 ---
 cmake/gmxManageNvccConfig.cmake                    | 63 +-------------------
 cmake/gmxManageOpenMP.cmake                        | 58 +++++++++---------
 cmake/gmxManageSimd.cmake                          |  8 ---
 cmake/gmxManageTNG.cmake                           |  3 +-
 cmake/gmxTestAVXMaskload.cmake                     | 68 ----------------------
 cmake/gmxTestCXX11.cmake                           | 29 +++++++--
 cmake/gmxTestCompilerProblems.cmake                | 40 ++++---------
 docs/CMakeLists.txt                                |  2 +-
 docs/conf-vars.py.cmakein                          |  2 +-
 docs/conf.py                                       |  2 +
 docs/dev-manual/build-system.rst                   |  2 +-
 docs/dev-manual/language-features.rst              |  7 ++-
 docs/install-guide/index.rst                       | 32 ++++++----
 src/config.h.cmakein                               |  3 -
 src/external/tng_io/BuildTNG.cmake                 |  6 +-
 src/external/tng_io/CMakeLists.txt                 |  4 +-
 src/gromacs/gmxana/legacytests/gmx_traj_tests.cpp  |  4 --
 .../kernelutil_x86_avx_128_fma_single.h            | 18 ++----
 .../kernelutil_x86_avx_256_single.h                | 18 ++----
 src/gromacs/gpu_utils/cudautils.cu                 | 29 +--------
 src/gromacs/gpu_utils/cudautils.cuh                |  6 --
 src/gromacs/hardware/detecthardware.cpp            |  6 +-
 src/gromacs/linearalgebra/gmx_lapack/dbdsqr.cpp    |  2 +-
 src/gromacs/linearalgebra/gmx_lapack/sbdsqr.cpp    |  2 +-
 src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu         |  4 +-
 src/gromacs/mdlib/tests/settle.cpp                 | 34 ++---------
 src/gromacs/random/gammadistribution.h             |  5 +-
 .../impl_x86_avx_128_fma_definitions.h             | 14 +----
 src/gromacs/utility/basedefinitions.h              | 12 +---
 38 files changed, 175 insertions(+), 482 deletions(-)
 delete mode 100644 cmake/TestAVXMaskload.c
 delete mode 100644 cmake/TestClangVersion.c
 delete mode 100644 cmake/gmxTestAVXMaskload.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c896d915fe..d888a3b20e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -32,11 +32,7 @@
 # To help us fund GROMACS development, we humbly ask that you cite
 # the research papers on the package. Check out http://www.gromacs.org.
 
-cmake_minimum_required(VERSION 2.8.8)
-# When we require cmake >= 2.8.12, it will provide
-# CMAKE_MINIMUM_REQUIRED_VERSION automatically, but in the meantime we
-# need to set a variable, and it must have a different name.
-set(GMX_CMAKE_MINIMUM_REQUIRED_VERSION "2.8.8")
+cmake_minimum_required(VERSION 3.4.3)
 
 # CMake modules/macros are in a subdirectory to keep this file cleaner
 # This needs to be set before project() in order to pick up toolchain files
@@ -196,12 +192,16 @@ gmx_add_cache_dependency(GMX_COOL_QUOTES BOOL "NOT GMX_FAHCORE" OFF)
 
 option(GMX_USE_OPENCL "Enable OpenCL acceleration" OFF)
 
-# Decide on GPU settings based on user-settings and GPU/CUDA detection.
-# GCC 4.6 requires CUDA 5.0 and VS2015 requires CUDA 8.0
+# Decide on GPU settings based on user-settings and GPU/CUDA
+# detection.  GCC 4.8 requires CUDA 6.0 (but we choose 6.5 for the
+# preliminary C++11 support), icc 15 requires CUDA 7.0, and VS2015
+# requires CUDA 8.0
 if(MSVC)
     set(REQUIRED_CUDA_VERSION 8.0)
+elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
+    set(REQUIRED_CUDA_VERSION 7.0)
 else()
-    set(REQUIRED_CUDA_VERSION 5.0)
+    set(REQUIRED_CUDA_VERSION 6.5)
 endif()
 set(REQUIRED_CUDA_COMPUTE_CAPABILITY 2.0)
 
@@ -335,17 +335,6 @@ set(EXTRA_CXX_FLAGS "")
 # Run through a number of tests for buggy compilers and other issues
 include(gmxTestCompilerProblems)
 gmx_test_compiler_problems()
-# GMX_SIMD will not be set automatically until the second
-# pass (which is not strictly guaranteed to occur), so putting this
-# check here among logically-related tests is inefficient, but the
-# potential loss is likely zero.
-if(GMX_SIMD STREQUAL "AVX_256"
-        AND CMAKE_COMPILER_IS_GNUCC
-        AND (C_COMPILER_VERSION VERSION_EQUAL "4.6.1"
-            OR CXX_COMPILER_VERSION VERSION_EQUAL "4.6.1"))
-    message(FATAL_ERROR "gcc 4.6.1 has buggy support for AVX, and GROMACS mdrun will not work. If you want simulation performance, use a more recent compiler. Otherwise, use GMX_SIMD=SSE4.1")
-    # See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49002
-endif()
 
 # Implement double-precision option. This is complicated because we
 # need installed headers to use the precision mode of the build that
@@ -505,7 +494,7 @@ endif()
 option(GMX_HWLOC "Add support for hwloc Portable Hardware locality library" ${GMX_HWLOC_DEFAULT})
 if(GMX_HWLOC)
     if(HWLOC_FOUND)
-        include_directories(${HWLOC_INCLUDE_DIRS})
+        include_directories(SYSTEM ${HWLOC_INCLUDE_DIRS})
         list(APPEND GMX_EXTRA_LIBRARIES ${HWLOC_LIBRARIES})
     else()
         message(FATAL_ERROR "Hwloc package support requested, but not found.")
@@ -572,8 +561,6 @@ include(gmxManageLmfit)
 if(GMX_GPU)
     # now that we have detected the dependencies, do the second configure pass
     gmx_gpu_setup()
-else()
-    mark_as_advanced(CUDA_HOST_COMPILER)
 endif()
 
 if(CYGWIN)
@@ -606,7 +593,9 @@ gmx_add_cache_dependency(GMX_BUILD_UNITTESTS BOOL BUILD_TESTING OFF)
 
 add_definitions( -DHAVE_CONFIG_H )
 include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src)
-include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/external/thread_mpi/include)
+# TODO required at high level because both libgromacs and progs/mdrun
+# require it, both for thread-MPI and its atomics and mutexes.
+include_directories(BEFORE SYSTEM ${CMAKE_SOURCE_DIR}/src/external/thread_mpi/include)
 # Required for config.h, maybe should only be set in src/CMakeLists.txt
 include_directories(BEFORE ${CMAKE_BINARY_DIR}/src)
 
@@ -826,17 +815,12 @@ include(gmxManageSuffixes)
 ################################################################
 # Shared library load path settings
 ################################################################
-# CMake supports RPATH on OS X only from 2.8.12 upwards.
-# CMAKE_SYSTEM_VERSION > 8.0 matches OS X 10.5 and above, where RPATH support
-# was added.
-
 if(NOT GMX_BUILD_SHARED_EXE)
     # No rpath
     set(CMAKE_SKIP_RPATH TRUE)
     set(CMAKE_EXE_LINK_DYNAMIC_C_FLAGS) # remove -Wl,-Bdynamic
     set(CMAKE_EXE_LINK_DYNAMIC_CXX_FLAGS)
-elseif((NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin") OR
-   ((CMAKE_SYSTEM_VERSION VERSION_GREATER 8.0) AND (NOT CMAKE_VERSION VERSION_LESS 2.8.12)))
+else()
     # The build folder always has bin/ and lib/; if we are also going to
     # install to lib/, then the installation RPATH works also in the build
     # tree.  This makes installation slightly faster (no need to rewrite the
@@ -846,23 +830,13 @@ elseif((NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin") OR
     endif()
     # Set the RPATH as relative to the executable location to make the
     # binaries relocatable.
-    if(NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+    if(NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin") #Assume OS X >=10.5
         set(CMAKE_INSTALL_RPATH "\$ORIGIN/../${GMX_LIB_INSTALL_DIR}")
     else()
         set(CMAKE_INSTALL_RPATH "@executable_path/../${GMX_LIB_INSTALL_DIR}")
     endif()
     set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
     set(CMAKE_MACOSX_RPATH 1)
-else()
-    # We are on Darwin/OSX, and CMake cannot handle RPATHs automatically.
-    if(CMAKE_SYSTEM_VERSION VERSION_GREATER 8.0)
-        # Set the RPATH options manually.
-        set(CMAKE_INSTALL_NAME_DIR "@rpath")
-        set(GMX_EXE_LINKER_FLAGS ${GMX_EXE_LINKER_FLAGS} "-Wl,-rpath,@executable_path/../${GMX_LIB_INSTALL_DIR}")
-    else()
-        # Use the old INSTALL_NAME_DIR mechanism if RPATH is not supported.
-        set(CMAKE_INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/${LIB_INSTALL_DIR}")
-    endif()
 endif()
 
 #COPYING file: Only necessary for binary distributions.
diff --git a/admin/builds/pre-submit-matrix.txt b/admin/builds/pre-submit-matrix.txt
index 6f24dcad9a..260e1eba29 100644
--- a/admin/builds/pre-submit-matrix.txt
+++ b/admin/builds/pre-submit-matrix.txt
@@ -1,13 +1,13 @@
-gcc-4.6 gpu cuda-5.0 mpi openmp x11 cmake-2.8.8
+gcc-4.8 gpu cuda-6.5 mpi openmp x11
 gcc-4.8 gpu cuda-7.5 openmp release
 gcc-4.9 tsan fftpack simd=avx2_256
 gcc-6.1 double
-clang-3.4 double no-openmp fftpack asan
+clang-3.4 double no-openmp fftpack asan cmake-3.4.3
 # TODO move mdrun-only config to post-submit matrix
-clang-3.7 double mpi no-openmp fftpack mdrun-only
+clang-3.7 double mpi no-openmp fftpack mdrun-only cmake-3.4.3
 msvc-2015 openmp release
 icc-16.0 msvc-2015 fftpack
-icc-16.0 no-thread-mpi openmp mkl cmake-3.3.2 simd=avx_256
-gcc-5.2 mpi openmp simd=avx_128_fma
+icc-16.0 no-thread-mpi openmp mkl simd=avx_256
+gcc-5.1 mpi openmp cmake-3.4.3
 gcc-4.8 openmp opencl cuda-7.5 mpi release
-gcc-5.2 openmp opencl amdappsdk-3.0
+gcc-5.2 openmp opencl simd=avx_128_fma amdappsdk-3.0
diff --git a/admin/builds/release-matrix.txt b/admin/builds/release-matrix.txt
index ef5faea09b..36e4df93ad 100644
--- a/admin/builds/release-matrix.txt
+++ b/admin/builds/release-matrix.txt
@@ -1,6 +1,6 @@
 # These configurations will be used to build and test the tarballs
 # before the release.
 gcc-4.8 mpi mdrun-only
-gcc-4.6 static
-gcc-4.7 double
+gcc-6.1 static
+gcc-5.1 double
 clang-3.4 static double
diff --git a/cmake/TestAVXMaskload.c b/cmake/TestAVXMaskload.c
deleted file mode 100644
index e8438a1cc7..0000000000
--- a/cmake/TestAVXMaskload.c
+++ /dev/null
@@ -1,18 +0,0 @@
-#include<immintrin.h>
-int main()
-{
-    __m256d a;
-    __m256i mask;
-    double  d[4]={1,2,3,4};
-
-    a = _mm256_setzero_pd();
-    mask = _mm256_castpd_si256(a);
-
-#if GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG
-    a = _mm256_maskload_pd(d,_mm256_castsi256_pd(mask));
-#else
-    a = _mm256_maskload_pd(d,mask);
-#endif
-    return 0;
-}
-
diff --git a/cmake/TestClangVersion.c b/cmake/TestClangVersion.c
deleted file mode 100644
index f47c7774e2..0000000000
--- a/cmake/TestClangVersion.c
+++ /dev/null
@@ -1,18 +0,0 @@
-int main()
-{
-/* This detects 3.0 versions for both C and C++ clang. It detects the
- * version of the LLVM back end, and not (for example) the Apple clang
- * version number (which might be 4.1 or some number based on its
- * "compatibility with gcc 4.2.1," even though the LLVM back end is
- * 3.0!).
- *
- * If/when we have time or user complaints, we can maybe ban earlier
- * versions of clang, but we don't actually know there's a problem
- * with them at the time of this commit.
- */
-#if (__clang_major__ == 3) && (__clang_minor__ == 0)
-    return 0;
-#else
-#error clang version information not found
-#endif
-}
diff --git a/cmake/gmxCFlags.cmake b/cmake/gmxCFlags.cmake
index b89bada654..8e1614be1a 100644
--- a/cmake/gmxCFlags.cmake
+++ b/cmake/gmxCFlags.cmake
@@ -134,7 +134,11 @@ macro (gmx_c_flags)
             # Problematic with CUDA
             # GMX_TEST_CXXFLAG(CXXFLAGS_WARN_EFFCXX "-Wnon-virtual-dtor" GMXC_CXXFLAGS)
             GMX_TEST_CXXFLAG(CXXFLAGS_WARN_EXTRA "-Wextra -Wno-missing-field-initializers -Wpointer-arith" GMXC_CXXFLAGS)
-            GMX_TEST_CXXFLAG(CXXFLAGS_WARN_UNDEF "-Wundef" GMXC_CXXFLAGS)
+            # CUDA versions prior to 7.5 come with a header (math_functions.h) which uses the _MSC_VER macro
+            # unconditionally, so we don't use -Wundef for earlier CUDA versions.
+            if(NOT(GMX_GPU AND CUDA_VERSION VERSION_LESS "7.5"))
+                GMX_TEST_CXXFLAG(CXXFLAGS_WARN_UNDEF "-Wundef" GMXC_CXXFLAGS)
+            endif()
             GMX_TEST_CFLAG(CXXFLAGS_WARN_REL "-Wno-array-bounds" GMXC_CXXFLAGS_RELEASE_ONLY)
         endif()
         # new in gcc 4.5
@@ -189,6 +193,15 @@ macro (gmx_c_flags)
                 GMX_TEST_CXXFLAG(CXXFLAGS_PRAGMA "-wd3180" GMXC_CXXFLAGS)
             endif()
             if (GMX_COMPILER_WARNINGS)
+                if (GMX_GPU)
+# Suppress warnings from CUDA headers
+# 7:   unrecognized token
+# 82:  storage class is not first
+# The below are also required for math_functions.h / math_functions.hpp at least until CUDA 8.0-RC
+# 193: zero used for undefined preprocessing identifer
+# 3346:dynamic exception specifiers are deprecated
+                    GMX_TEST_CXXFLAG(CXXFLAGS_WARN_OLD_GPU "-wd7 -wd82 -wd193 -wd3346" GMXC_CXXFLAGS)
+                endif()
 #All but the following warnings are identical for the C-compiler (see above)
 # 383: value copied to temporary, reference to temporary used
 # 444: destructor for base class ".." is not virtual
diff --git a/cmake/gmxManageGPU.cmake b/cmake/gmxManageGPU.cmake
index a6b5439303..60a74291ec 100644
--- a/cmake/gmxManageGPU.cmake
+++ b/cmake/gmxManageGPU.cmake
@@ -57,18 +57,8 @@ if ((GMX_GPU OR GMX_GPU_AUTO) AND NOT GMX_GPU_DETECTION_DONE)
     gmx_detect_gpu()
 endif()
 
-# CMake 3.0-3.1 has a bug in the following case, which breaks
-# configuration on at least BlueGene/Q. Fixed in 3.1.1
-if ((NOT CMAKE_VERSION VERSION_LESS "3.0.0") AND
-    (CMAKE_VERSION VERSION_LESS "3.1.1") AND
-        (CMAKE_CROSSCOMPILING AND NOT CMAKE_SYSTEM_PROCESSOR))
-    message(STATUS "Cannot search for CUDA because the CMake find package has a bug. Set a valid CMAKE_SYSTEM_PROCESSOR if you need to detect CUDA")
-else()
-    set(CAN_RUN_CUDA_FIND_PACKAGE 1)
-endif()
-
 # We need to call find_package even when we've already done the detection/setup
-if(GMX_GPU OR GMX_GPU_AUTO AND CAN_RUN_CUDA_FIND_PACKAGE)
+if(GMX_GPU OR GMX_GPU_AUTO)
     if(NOT GMX_GPU AND NOT GMX_DETECT_GPU_AVAILABLE)
         # Stay quiet when detection has occured and found no GPU.
         # Noise is acceptable when there is a GPU or the user required one.
@@ -82,23 +72,6 @@ if(GMX_GPU OR GMX_GPU_AUTO AND CAN_RUN_CUDA_FIND_PACKAGE)
     endif()
 
     find_package(CUDA ${REQUIRED_CUDA_VERSION} ${FIND_CUDA_QUIETLY})
-
-    # Cmake 2.8.12 (and CMake 3.0) introduced a new bug where the cuda
-    # library dir is added twice as an rpath on APPLE, which in turn causes
-    # the install_name_tool to wreck the binaries when it tries to remove this
-    # path. Since this is set inside the cuda module, we remove the extra rpath
-    # added in the library string - an rpath is not a library anyway, and at
-    # least for Gromacs this works on all CMake versions. This should be
-    # reasonably future-proof, since newer versions of CMake appear to handle
-    # the rpath automatically based on the provided library path, meaning
-    # the explicit rpath specification is no longer needed.
-    if(APPLE AND (CMAKE_VERSION VERSION_GREATER 2.8.11))
-        foreach(elem ${CUDA_LIBRARIES})
-            if(elem MATCHES "-Wl,.*")
-                list(REMOVE_ITEM CUDA_LIBRARIES ${elem})
-            endif()
-        endforeach(elem)
-    endif()
 endif()
 
 # Depending on the current vale of GMX_GPU and GMX_GPU_AUTO:
@@ -182,13 +155,13 @@ endif()
 # We need to mark these advanced outside the conditional, otherwise, if the
 # user turns GMX_GPU=OFF after a failed cmake pass, these variables will be
 # left behind in the cache.
-mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_SDK_ROOT_DIR CUDA_VERBOSE_BUILD # cmake 2.8.9 still spews these, check again when requirements change
-                 CUDA_SEPARABLE_COMPILATION      # not present at least with cmake 3.2, remove when required
-                 CUDA_USE_STATIC_CUDA_RUNTIME    # since cmake 3.3
-                 CUDA_dl_LIBRARY CUDA_rt_LIBRARY # - || -
+mark_as_advanced(CUDA_SDK_ROOT_DIR
+                 CUDA_USE_STATIC_CUDA_RUNTIME
+                 CUDA_dl_LIBRARY CUDA_rt_LIBRARY
                  )
 if(NOT GMX_GPU)
     mark_as_advanced(CUDA_TOOLKIT_ROOT_DIR)
+    mark_as_advanced(CUDA_HOST_COMPILER)
 endif()
 
 # Try to execute ${CUDA_NVCC_EXECUTABLE} --version and set the output
diff --git a/cmake/gmxManageLinearAlgebraLibraries.cmake b/cmake/gmxManageLinearAlgebraLibraries.cmake
index 8c8562b9c7..aeb7937c74 100644
--- a/cmake/gmxManageLinearAlgebraLibraries.cmake
+++ b/cmake/gmxManageLinearAlgebraLibraries.cmake
@@ -1,7 +1,7 @@
 #
 # This file is part of the GROMACS molecular simulation package.
 #
-# Copyright (c) 2013,2014, by the GROMACS development team, led by
+# Copyright (c) 2013,2014,2016, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
@@ -89,7 +89,7 @@ macro(manage_linear_algebra_library name function_in_library)
             set(CMAKE_REQUIRED_FLAGS "${FFT_LINKER_FLAGS}")
             # This may also not work correctly if the user changes
             # MKL_LIBRARIES after the first run. However,
-            # MKL_LIBRARIES is only needed for icc version < 11, or
+            # MKL_LIBRARIES is only needed
             # for trying to use MKL with a non-Intel compiler, and we
             # can live with that for now.
             check_function_exists(${function_in_library} _${name}_mkl_works)
@@ -110,8 +110,7 @@ macro(manage_linear_algebra_library name function_in_library)
         if (NOT _library_was_found)
             set(${name}_FIND_QUIETLY ${_find_quietly})
             # Note that this finds all kinds of system libraries,
-            # including Apple's Accelerate Framework (and perhaps MKL for
-            # icc < 11).
+            # including Apple's Accelerate Framework
             find_package(${name})
             if (${name}_FOUND)
                 set(_libraries_to_link ${${name}_LIBRARIES})
diff --git a/cmake/gmxManageMPI.cmake b/cmake/gmxManageMPI.cmake
index 1d3645a443..2fe4b52bb7 100644
--- a/cmake/gmxManageMPI.cmake
+++ b/cmake/gmxManageMPI.cmake
@@ -165,15 +165,6 @@ if(GMX_MPI)
       endif()
     endif()
     unset(MPINAME_BIN CACHE)
-
-    # Using find_file() runs the CMake standard module
-    # GetPrerequisites.cmake, which adds the file_cmd
-    # variable to the top-level CMake namespace. This is
-    # fixed in CMake 2.8.10. Meanwhile, clean up for it.
-    if(CMAKE_VERSION VERSION_LESS "2.8.10")
-        mark_as_advanced(file_cmd)
-    endif()
-
   else()
       message(FATAL_ERROR
         "MPI support requested, but no MPI compiler found. Either set the "
diff --git a/cmake/gmxManageNvccConfig.cmake b/cmake/gmxManageNvccConfig.cmake
index 34de4bad36..c8561f44fe 100644
--- a/cmake/gmxManageNvccConfig.cmake
+++ b/cmake/gmxManageNvccConfig.cmake
@@ -36,7 +36,7 @@
 # pain as much as possible:
 # - use the CUDA_HOST_COMPILER if defined by the user, otherwise
 # - auto-detect compatible nvcc host compiler and set nvcc -ccbin (if not MPI wrapper)
-# - set icc compatibility mode to gcc 4.6
+# - set icc compatibility mode to gcc 4.8.1
 # - (advanced) variables set:
 #   * CUDA_HOST_COMPILER            - the host compiler for nvcc (only with cmake <2.8.10)
 #   * CUDA_HOST_COMPILER_OPTIONS    - the full host-compiler related option list passed to nvcc
@@ -87,25 +87,7 @@ endfunction()
 
 # set up host compiler and its options
 if(CUDA_HOST_COMPILER_CHANGED)
-    # FindCUDA in CMake 2.8.10 sets the host compiler internally
-    if (CMAKE_VERSION VERSION_LESS "2.8.10")
-        set(CUDA_HOST_COMPILER ${CUDA_HOST_COMPILER}
-            CACHE PATH "Host compiler for nvcc")
-    endif()
-
-    # On *nix force icc in gcc 4.6 compatibility mode. This is needed
-    # as even with icc used as host compiler, when icc's gcc compatibility
-    # mode is higher than the max gcc version officially supported by CUDA,
-    # nvcc will freak out.
     set(CUDA_HOST_COMPILER_OPTIONS "")
-    if (UNIX AND
-            ((CMAKE_C_COMPILER_ID MATCHES "Intel" AND
-              (CUDA_HOST_COMPILER_AUTOSET OR CMAKE_C_COMPILER STREQUAL CUDA_HOST_COMPILER)) OR
-            (CMAKE_CXX_COMPILER_ID MATCHES "Intel" AND CMAKE_CXX_COMPILER STREQUAL CUDA_HOST_COMPILER))
-        )
-        message(STATUS "Setting Intel Compiler compatibity mode to gcc 4.6 for nvcc host compilation")
-        list(APPEND CUDA_HOST_COMPILER_OPTIONS "-Xcompiler;-gcc-version=460")
-    endif()
 
     if(APPLE AND CMAKE_C_COMPILER_ID MATCHES "GNU")
         # Some versions of gcc-4.8 and gcc-4.9 produce errors (in particular on OS X)
@@ -191,52 +173,11 @@ list(APPEND GMX_CUDA_NVCC_FLAGS "${GMX_CUDA_NVCC_GENCODE_FLAGS}")
 list(APPEND GMX_CUDA_NVCC_FLAGS "-use_fast_math")
 
 # assemble the CUDA host compiler flags
-# with CMake <2.8.10 the host compiler needs to be set on the nvcc command line
-if (CMAKE_VERSION VERSION_LESS "2.8.10")
-    list(APPEND GMX_CUDA_NVCC_FLAGS "-ccbin=${CUDA_HOST_COMPILER}")
-endif()
 list(APPEND GMX_CUDA_NVCC_FLAGS "${CUDA_HOST_COMPILER_OPTIONS}")
 
 # The flags are set as local variables which shadow the cache variables. The cache variables
 # (can be set by the user) are appended. This is done in a macro to set the flags when all
 # host compiler flags are already set.
 macro(GMX_SET_CUDA_NVCC_FLAGS)
-    if(CUDA_PROPAGATE_HOST_FLAGS)
-        set(CUDA_PROPAGATE_HOST_FLAGS OFF)
-
-        # When CUDA 6.5 is required we should use C++11 also for CUDA and also propagate
-        # the C++11 flag to CUDA. Then we can use the solution implemented in FindCUDA
-        # (starting with 3.3 - can be backported). For now we need to remove the C++11
-        # flag which means we need to manually propagate all other flags.
-        string(REGEX REPLACE "[-]+std=c\\+\\+0x" "" _CMAKE_CXX_FLAGS_SANITIZED "${CMAKE_CXX_FLAGS}")
-
-        # The IBM xlc compiler chokes if we use both altivec and Cuda. Solve
-        # this by not propagating the flag in this case.
-        if(CMAKE_CXX_COMPILER_ID MATCHES "XL")
-            string(REGEX REPLACE "-qaltivec" "" _CMAKE_CXX_FLAGS_SANITIZED "${_CMAKE_CXX_FLAGS_SANITIZED}")
-        endif()
-
-        # CUDA versions prior to 7.5 come with a header (math_functions.h) which uses the _MSC_VER macro
-        # unconditionally, so we strip -Wundef from the propagatest flags for earlier CUDA versions.
-        if (CUDA_VERSION VERSION_LESS "7.5")
-            string(REGEX REPLACE "-Wundef" "" _CMAKE_CXX_FLAGS_SANITIZED "${_CMAKE_CXX_FLAGS_SANITIZED}")
-        endif()
-
-        string(REPLACE " " "," _flags "${_CMAKE_CXX_FLAGS_SANITIZED}")
-        set(CUDA_NVCC_FLAGS "${GMX_CUDA_NVCC_FLAGS};${CUDA_NVCC_FLAGS};-Xcompiler;${_flags}")
-
-        # Create list of all possible configurations. For multi-configuration this is CMAKE_CONFIGURATION_TYPES
-        # and for single configuration CMAKE_BUILD_TYPE. Not sure why to add the default ones, but FindCUDA
-        # claims one should.
-        set(CUDA_configuration_types ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE} Debug MinSizeRel Release RelWithDebInfo)
-        list(REMOVE_DUPLICATES CUDA_configuration_types)
-
-        foreach(_config ${CUDA_configuration_types})
-            string(TOUPPER ${_config} _config_upper)
-            string(REPLACE " " "," _flags "${CMAKE_CXX_FLAGS_${_config_upper}}")
-            set(CUDA_NVCC_FLAGS_${_config_upper} "${CUDA_NVCC_FLAGS_${_config_upper}};-Xcompiler;${_flags}")
-        endforeach()
-    else()
-        set(CUDA_NVCC_FLAGS "${GMX_CUDA_NVCC_FLAGS};${CUDA_NVCC_FLAGS}")
-    endif()
+    set(CUDA_NVCC_FLAGS "${GMX_CUDA_NVCC_FLAGS};${CUDA_NVCC_FLAGS}")
 endmacro()
diff --git a/cmake/gmxManageOpenMP.cmake b/cmake/gmxManageOpenMP.cmake
index 5903957bef..f1b373159e 100644
--- a/cmake/gmxManageOpenMP.cmake
+++ b/cmake/gmxManageOpenMP.cmake
@@ -1,7 +1,7 @@
 #
 # This file is part of the GROMACS molecular simulation package.
 #
-# Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
@@ -37,40 +37,34 @@
 # and then does some additional tests for flags afterwards.
 
 if(GMX_OPENMP)
-    if(CMAKE_C_COMPILER_ID MATCHES "Cray" AND CMAKE_VERSION VERSION_LESS 3)
-        message(STATUS "OpenMP multithreading is not detected correctly for the Cray compiler with CMake before version 3.0 (see http://public.kitware.com/Bug/view.php?id=14567)")
-        set(GMX_OPENMP OFF CACHE BOOL
-            "OpenMP multithreading is not detected correctly for the Cray compiler with CMake before version 3.0 (see http://public.kitware.com/Bug/view.php?id=14567)" FORCE)
-    else()
-        # We should do OpenMP detection if we get here
-        # OpenMP check must come before other CFLAGS!
-        find_package(OpenMP)
-        if(OPENMP_FOUND)
-            # CMake on Windows doesn't support linker flags passed to target_link_libraries
-            # (i.e. it treats /openmp as \openmp library file). Also, no OpenMP linker flags are needed.
-            if(NOT (WIN32 AND NOT MINGW))
-                if(CMAKE_COMPILER_IS_GNUCC AND GMX_PREFER_STATIC_OPENMP AND NOT APPLE)
-                    set(OpenMP_LINKER_FLAGS "-Wl,-static -lgomp -lrt -Wl,-Bdynamic -lpthread")
-                    set(OpenMP_SHARED_LINKER_FLAGS "")
-                else()
-                    # Only set a linker flag if the user didn't set them manually
-                    if(NOT DEFINED OpenMP_LINKER_FLAGS)
-                        set(OpenMP_LINKER_FLAGS "${OpenMP_C_FLAGS}")
-                    endif()
-                    if(NOT DEFINED OpenMP_SHARED_LINKER_FLAGS)
-                        set(OpenMP_SHARED_LINKER_FLAGS "${OpenMP_C_FLAGS}")
-                    endif()
+    # We should do OpenMP detection if we get here
+    # OpenMP check must come before other CFLAGS!
+    find_package(OpenMP)
+    if(OPENMP_FOUND)
+        # CMake on Windows doesn't support linker flags passed to target_link_libraries
+        # (i.e. it treats /openmp as \openmp library file). Also, no OpenMP linker flags are needed.
+        if(NOT (WIN32 AND NOT MINGW))
+            if(CMAKE_COMPILER_IS_GNUCC AND GMX_PREFER_STATIC_OPENMP AND NOT APPLE)
+                set(OpenMP_LINKER_FLAGS "-Wl,-static -lgomp -lrt -Wl,-Bdynamic -lpthread")
+                set(OpenMP_SHARED_LINKER_FLAGS "")
+            else()
+                # Only set a linker flag if the user didn't set them manually
+                if(NOT DEFINED OpenMP_LINKER_FLAGS)
+                    set(OpenMP_LINKER_FLAGS "${OpenMP_C_FLAGS}")
+                endif()
+                if(NOT DEFINED OpenMP_SHARED_LINKER_FLAGS)
+                    set(OpenMP_SHARED_LINKER_FLAGS "${OpenMP_C_FLAGS}")
                 endif()
             endif()
-            if(MINGW)
-                #GCC Bug 48659
-                set(OpenMP_C_FLAGS "${OpenMP_C_FLAGS} -mstackrealign")
-            endif()
-        else()
-            message(WARNING
-                    "The compiler you are using does not support OpenMP parallelism. This might hurt your performance a lot, in particular with GPUs. Try using a more recent version, or a different compiler. For now, we are proceeding by turning off OpenMP.")
-            set(GMX_OPENMP OFF CACHE STRING "Whether GROMACS will use OpenMP parallelism." FORCE)
         endif()
+        if(MINGW)
+            #GCC Bug 48659
+            set(OpenMP_C_FLAGS "${OpenMP_C_FLAGS} -mstackrealign")
+        endif()
+    else()
+        message(WARNING
+                "The compiler you are using does not support OpenMP parallelism. This might hurt your performance a lot, in particular with GPUs. Try using a more recent version, or a different compiler. For now, we are proceeding by turning off OpenMP.")
+        set(GMX_OPENMP OFF CACHE STRING "Whether GROMACS will use OpenMP parallelism." FORCE)
     endif()
 endif()
 gmx_dependent_cache_variable(GMX_OPENMP_MAX_THREADS
diff --git a/cmake/gmxManageSimd.cmake b/cmake/gmxManageSimd.cmake
index fde53f779c..b862a71cd6 100644
--- a/cmake/gmxManageSimd.cmake
+++ b/cmake/gmxManageSimd.cmake
@@ -32,8 +32,6 @@
 # To help us fund GROMACS development, we humbly ask that you cite
 # the research papers on the package. Check out http://www.gromacs.org.
 
-# include avx test source, used if the AVX flags are set below
-include(gmxTestAVXMaskload)
 include(gmxFindFlagsForSource)
 
 # Macro that manages setting the respective C and C++ toolchain
@@ -247,8 +245,6 @@ elseif(GMX_SIMD STREQUAL "AVX_128_FMA")
     set(GMX_SIMD_X86_${GMX_SIMD} 1)
     set(SIMD_STATUS_MESSAGE "Enabling 128-bit AVX SIMD GROMACS SIMD (with fused-multiply add)")
 
-    gmx_test_avx_gcc_maskload_bug(GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG "${SIMD_C_FLAGS}")
-
 elseif(GMX_SIMD STREQUAL "AVX_256")
 
     prepare_x86_toolchain(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS)
@@ -269,8 +265,6 @@ elseif(GMX_SIMD STREQUAL "AVX_256")
     set(GMX_SIMD_X86_${GMX_SIMD} 1)
     set(SIMD_STATUS_MESSAGE "Enabling 256-bit AVX SIMD instructions")
 
-    gmx_test_avx_gcc_maskload_bug(GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG "${SIMD_C_FLAGS}")
-
 elseif(GMX_SIMD STREQUAL "AVX2_256")
 
     prepare_x86_toolchain(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS)
@@ -291,8 +285,6 @@ elseif(GMX_SIMD STREQUAL "AVX2_256")
     set(GMX_SIMD_X86_${GMX_SIMD} 1)
     set(SIMD_STATUS_MESSAGE "Enabling 256-bit AVX2 SIMD instructions")
 
-    # No need to test for Maskload bug - it was fixed before gcc added AVX2 support
-
 elseif(GMX_SIMD STREQUAL "MIC")
 
     # No flags needed. Not testing.
diff --git a/cmake/gmxManageTNG.cmake b/cmake/gmxManageTNG.cmake
index 12ada3d967..b14b013028 100644
--- a/cmake/gmxManageTNG.cmake
+++ b/cmake/gmxManageTNG.cmake
@@ -46,7 +46,8 @@ if(GMX_USE_TNG)
         include_directories(SYSTEM ${TNG_IO_INCLUDE_DIRS})
     else()
         include(${BUNDLED_TNG_LOCATION}/BuildTNG.cmake)
-        tng_get_source_list(TNG_SOURCES TNG_IO_DEFINITIONS)
+        tng_get_source_list(TNG_SOURCES TNG_IO_DEFINITIONS TNG_INCLUDE_DIRS)
+        include_directories(BEFORE SYSTEM "${TNG_INCLUDE_DIRS}")
 
         if (HAVE_ZLIB)
             list(APPEND GMX_EXTRA_LIBRARIES ${ZLIB_LIBRARIES})
diff --git a/cmake/gmxTestAVXMaskload.cmake b/cmake/gmxTestAVXMaskload.cmake
deleted file mode 100644
index a522d6c49b..0000000000
--- a/cmake/gmxTestAVXMaskload.cmake
+++ /dev/null
@@ -1,68 +0,0 @@
-#
-# This file is part of the GROMACS molecular simulation package.
-#
-# Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
-# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
-# and including many others, as listed in the AUTHORS file in the
-# top-level source directory and at http://www.gromacs.org.
-#
-# GROMACS is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public License
-# as published by the Free Software Foundation; either version 2.1
-# of the License, or (at your option) any later version.
-#
-# GROMACS is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with GROMACS; if not, see
-# http://www.gnu.org/licenses, or write to the Free Software Foundation,
-# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
-#
-# If you want to redistribute modifications to GROMACS, please
-# consider that scientific software is very special. Version
-# control is crucial - bugs must be traceable. We will be happy to
-# consider code for inclusion in the official distribution, but
-# derived work must not be called official GROMACS. Details are found
-# in the README & COPYING files - if they are missing, get the
-# official version at http://www.gromacs.org.
-#
-# To help us fund GROMACS development, we humbly ask that you cite
-# the research papers on the package. Check out http://www.gromacs.org.
-
-#  GMX_TEST_AVX_GCC_MASKLOAD_BUG(VARIABLE AVX_CFLAGS)
-#
-#  VARIABLE will be set if the compiler is a buggy version
-#  of GCC (prior to 4.5.3, and maybe 4.6) that has an incorrect second
-#  argument to the AVX _mm256_maskload_ps() intrinsic.
-#
-#  You need to use this variable in a cmakedefine, and then handle
-#  the case separately in your code - no automatic cure, unfortunately.
-#
-MACRO(GMX_TEST_AVX_GCC_MASKLOAD_BUG VARIABLE AVX_CFLAGS)
-    IF(NOT DEFINED ${VARIABLE})
-        MESSAGE(STATUS "Checking for gcc AVX maskload bug")
-        # some compilers like clang accept both cases, 
-        # so first try a normal compile to avoid flagging those as buggy.
-        TRY_COMPILE(${VARIABLE}_COMPILEOK "${CMAKE_BINARY_DIR}"
-                    "${CMAKE_SOURCE_DIR}/cmake/TestAVXMaskload.c"
-                    COMPILE_DEFINITIONS "${AVX_CFLAGS} -DGMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG=0" )
-        IF(${VARIABLE}_COMPILEOK)
-            SET(${VARIABLE} 0 CACHE INTERNAL "Work around GCC bug in AVX maskload argument" FORCE)
-            MESSAGE(STATUS "Checking for gcc AVX maskload bug - not present")
-        ELSE()
-            TRY_COMPILE(${VARIABLE}_COMPILEOK "${CMAKE_BINARY_DIR}"
-                        "${CMAKE_SOURCE_DIR}/cmake/TestAVXMaskload.c"
-                         COMPILE_DEFINITIONS "${AVX_CFLAGS} -DGMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG=1" )
-            IF(${VARIABLE}_COMPILEOK)
-                SET(${VARIABLE} 1 CACHE INTERNAL "Work around GCC bug in AVX maskload argument" FORCE)
-                MESSAGE(STATUS "Checking for gcc AVX maskload bug - found, will try to work around")
-            ELSE()
-                MESSAGE(WARNING "Cannot compile AVX code - assuming gcc AVX maskload bug not present." )
-                MESSAGE(STATUS "Checking for gcc AVX maskload bug - not present")
-            ENDIF()
-        ENDIF()
-    ENDIF()
-ENDMACRO()
diff --git a/cmake/gmxTestCXX11.cmake b/cmake/gmxTestCXX11.cmake
index 65a1598463..ea6bed874f 100644
--- a/cmake/gmxTestCXX11.cmake
+++ b/cmake/gmxTestCXX11.cmake
@@ -45,11 +45,11 @@ function(GMX_TEST_CXX11 CXX11_CXX_FLAG_NAME STDLIB_CXX_FLAG_NAME STDLIB_LIBRARIE
     # First check that the compiler is OK, and find the appropriate flag.
 
     if(WIN32 AND NOT MINGW)
-        set(CXX11_CXX_FLAG "/Qstd=c++0x")
+        set(CXX11_CXX_FLAG "/Qstd=c++11")
     elseif(CYGWIN)
-        set(CXX11_CXX_FLAG "-std=gnu++0x") #required for strdup
+        set(CXX11_CXX_FLAG "-std=gnu++11") #required for strdup
     else()
-        set(CXX11_CXX_FLAG "-std=c++0x")
+        set(CXX11_CXX_FLAG "-std=c++11")
     endif()
     CHECK_CXX_COMPILER_FLAG("${CXX11_CXX_FLAG}" CXXFLAG_STD_CXX0X)
     if(NOT CXXFLAG_STD_CXX0X)
@@ -106,7 +106,26 @@ int main() {
   int array[5] = { 1, 2, 3, 4, 5 };
   for (int& x : array)
     x *= 2;
+  // Test alignas
+  alignas(4*sizeof(int)) int y;
 }" CXX11_SUPPORTED)
+    if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
+        if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.8.1")
+            message(FATAL_ERROR "GROMACS requires version 4.8.1 or later of the GNU C++ compiler for complete C++11 support")
+        endif()
+    elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+        if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "3.3")
+            message(FATAL_ERROR "GROMACS requires version 3.3 or later of the Clang C++ compiler for complete C++11 support")
+        endif()
+    elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
+        if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "15.0")
+            message(FATAL_ERROR "GROMACS requires version 15.0 or later of the Intel C++ compiler for complete C++11 support")
+        endif()
+    elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
+        if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "19.0.23026")
+            message(FATAL_ERROR "GROMACS requires version 2015 (19.0.23026) or later of the MSVC C++ compiler for complete C++11 support")
+        endif()
+    endif()
     if(CXX11_SUPPORTED)
         set(${CXX11_CXX_FLAG_NAME} ${CXX11_CXX_FLAG} PARENT_SCOPE)
     else()
@@ -128,8 +147,8 @@ int main() {
   intPointer p(new int(10));
   std::map<int, std::unique_ptr<int>> m;
   m.insert(std::make_pair(5, std::move(p)));
-  auto start = std::chrono::system_clock::now();
-  if (std::chrono::system_clock::now() - start < std::chrono::seconds(2))
+  auto start = std::chrono::steady_clock::now();
+  if (std::chrono::steady_clock::now() - start < std::chrono::seconds(2))
   {
       std::thread t;
   }
diff --git a/cmake/gmxTestCompilerProblems.cmake b/cmake/gmxTestCompilerProblems.cmake
index 80bfc062f3..26eeb298df 100644
--- a/cmake/gmxTestCompilerProblems.cmake
+++ b/cmake/gmxTestCompilerProblems.cmake
@@ -44,37 +44,21 @@ macro(gmx_test_compiler_problems)
         message(WARNING "The versions of the C and C++ compilers do not match (${CMAKE_C_COMPILER_VERSION} and ${CMAKE_CXX_COMPILER_VERSION}, respectively). Mixing different C/C++ compilers can cause problems.")
     endif()
 
-    # clang 3.0 is buggy for some unknown reason detected during adding
-    # the SSE2 group kernels for GROMACS 4.6. If we ever work out what
-    # that is, we should replace these tests with a compiler feature test,
-    # update GROMACS Redmine task #1039 and perhaps report a clang bug.
-    #
-    # In the meantime, until we require CMake 2.8.10 we cannot rely on it to detect
-    # the compiler version for us. So we need a manual check for clang 3.0.
-    include(gmxDetectClang30)
-    gmx_detect_clang_3_0(COMPILER_IS_CLANG_3_0)
-    if(COMPILER_IS_CLANG_3_0)
-        message(FATAL_ERROR "Your compiler is clang version 3.0, which is known to be buggy for GROMACS. Use a different compiler.")
-    endif()
-
-    if (CMAKE_C_COMPILER_ID STREQUAL "PGI")
-        message(WARNING "Currently tested PGI compiler versions (up to 15.7) generate binaries that do not pass all regression test, and the generated binaries are significantly slower than with GCC, ICC or Clang. For now we do not recommend PGI beyond development testing - make sure to run the regressiontests.")
-    endif()
+    # Note that we've already tested that the compiler works with C++11
+    if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
 
-    if(CMAKE_COMPILER_IS_GNUCC AND
-            (CMAKE_C_COMPILER_VERSION VERSION_LESS "4.9.0" OR CMAKE_SIZEOF_VOID_P EQUAL 8)
-            AND (WIN32 OR CYGWIN)
-            AND GMX_SIMD MATCHES "AVX" AND NOT GMX_SIMD STREQUAL AVX_128_FMA)
-        message(WARNING "GCC on Windows (GCC older than 4.9 or any version when compiling for 64bit) with AVX (other than AVX_128_FMA) crashes. Choose a different GMX_SIMD or a different compiler.") # GCC bug 49001, 54412.
-    endif()
-
-    if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND WIN32)
-        if(CMAKE_VERSION VERSION_LESS 3.0.0)
-            message(WARNING "Clang on Windows requires cmake 3.0.0")
+        # GCC bug 49001, 54412 on Windows (just warn, since it might be fixed in later versions)
+        if((CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9.0" OR CMAKE_SIZEOF_VOID_P EQUAL 8)
+           AND (WIN32 OR CYGWIN)
+           AND (GMX_SIMD MATCHES "AVX") AND NOT (GMX_SIMD STREQUAL AVX_128_FMA))
+            message(WARNING "GCC on Windows (GCC older than 4.9 in 32-bit mode, or any version in 64-bit mode) with 256-bit AVX will probably crashes. You might want to choose a different GMX_SIMD or a different compiler.")
         endif()
-        if(CMAKE_C_COMPILER_VERSION VERSION_LESS 3.5.0)
-            message(WARNING "Clang on Windows requires clang 3.5.0")
+    elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+        if(WIN32 AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "3.5.0")
+            message(WARNING "Using Clang on Windows requires Clang 3.5.0")
         endif()
+    elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI")
+        message(WARNING "Currently tested PGI compiler versions (up to 15.7) generate binaries that do not pass all regression test, and the generated binaries are significantly slower than with GCC, ICC or Clang. For now we do not recommend PGI beyond development testing - make sure to run the regressiontests.")
     endif()
 
 endmacro(gmx_test_compiler_problems)
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
index ec6e4550ef..f769fb0cb1 100644
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -141,7 +141,7 @@ if (SPHINX_FOUND)
         EXTRA_VARS
             SPHINX_EXTENSION_PATH RELENG_PATH
             EXPECTED_DOXYGEN_VERSION
-            GMX_CMAKE_MINIMUM_REQUIRED_VERSION REQUIRED_CUDA_VERSION
+            CMAKE_MINIMUM_REQUIRED_VERSION REQUIRED_CUDA_VERSION
             REQUIRED_OPENCL_MIN_VERSION
             REQUIRED_CUDA_COMPUTE_CAPABILITY REGRESSIONTEST_VERSION
             SOURCE_MD5SUM REGRESSIONTEST_MD5SUM_STRING
diff --git a/docs/conf-vars.py.cmakein b/docs/conf-vars.py.cmakein
index a8bbe35e3c..b7c216a81b 100644
--- a/docs/conf-vars.py.cmakein
+++ b/docs/conf-vars.py.cmakein
@@ -39,7 +39,7 @@ gmx_version_string_full = '@GMX_VERSION_STRING_FULL@'
 regressiontest_version = '@REGRESSIONTEST_VERSION@'
 variables = [
         ('EXPECTED_DOXYGEN_VERSION', '@EXPECTED_DOXYGEN_VERSION@'),
-        ('GMX_CMAKE_MINIMUM_REQUIRED_VERSION', '@GMX_CMAKE_MINIMUM_REQUIRED_VERSION@'),
+        ('CMAKE_MINIMUM_REQUIRED_VERSION', '@CMAKE_MINIMUM_REQUIRED_VERSION@'),
         ('REQUIRED_CUDA_VERSION', '@REQUIRED_CUDA_VERSION@'),
         ('REQUIRED_CUDA_COMPUTE_CAPABILITY', '@REQUIRED_CUDA_COMPUTE_CAPABILITY@'),
         ('REQUIRED_OPENCL_MIN_VERSION', '@REQUIRED_OPENCL_MIN_VERSION@'),
diff --git a/docs/conf.py b/docs/conf.py
index 94c0012893..58a676adf4 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -161,6 +161,8 @@ rst_epilog += """
 .. _LAM-MPI: http://www.lam-mpi.org
 .. _OpenMP: http://en.wikipedia.org/wiki/OpenMP
 .. _CMake installation page: http://www.cmake.org/install/
+.. _Ubuntu toolchain ppa page: https://launchpad.net/~ubuntu-toolchain-r/+archive/ubuntu/test
+.. _EPEL page: https://fedoraproject.org/wiki/EPEL
 .. _running CMake: http://www.cmake.org/runningcmake/
 .. _CMake environment variables: http://cmake.org/Wiki/CMake_Useful_Variables#Environment_Variables
 .. _FFTW: http://www.fftw.org
diff --git a/docs/dev-manual/build-system.rst b/docs/dev-manual/build-system.rst
index df0a2095f5..a6762a5d83 100644
--- a/docs/dev-manual/build-system.rst
+++ b/docs/dev-manual/build-system.rst
@@ -4,7 +4,7 @@ Build system overview
 =====================
 
 The |Gromacs| build system uses CMake (version
-|GMX_CMAKE_MINIMUM_REQUIRED_VERSION| or newer is required) to generate the
+|CMAKE_MINIMUM_REQUIRED_VERSION| or newer is required) to generate the
 actual build system for the build tool choosen by the user.  See CMake
 documentation for general introduction to CMake and how to use it.  This
 documentation focuses on how the |Gromacs| build system is organized and
diff --git a/docs/dev-manual/language-features.rst b/docs/dev-manual/language-features.rst
index d4d3a9edd8..3fc98799c7 100644
--- a/docs/dev-manual/language-features.rst
+++ b/docs/dev-manual/language-features.rst
@@ -10,8 +10,9 @@ these standards fully.
 
 * MSVC supports only a subset of C99 and work-arounds are required in those cases.
 * Before 7.0 (partial support in 6.5) CUDA didn't support C++11. Therefore any
-  header file which is needed (or likly will be nedded) by CUDA should not use C++11.
-* C++11 features which are not widely implemented (including in MSVC 2015 and GCC 4.6)
-  should not be used.
+  header file which is needed (or likely will be nedded) by CUDA should not use C++11.
+* We should be able to use virtually all C++ features outside of the header files
+  required by CUDA code (and OpenCL kernels), since we have gradually moved to
+  compilers that have full support for C++11.
 
 .. TODO: Copy important points from http://www.gromacs.org/index.php?title=Developer_Zone/Programming_Guide/Allowed_C%2B%2B_Features
diff --git a/docs/install-guide/index.rst b/docs/install-guide/index.rst
index 7737fec084..ffa7047de3 100644
--- a/docs/install-guide/index.rst
+++ b/docs/install-guide/index.rst
@@ -15,7 +15,7 @@ These instructions pertain to building |Gromacs|
 Quick and dirty installation
 ----------------------------
 1. Get the latest version of your C and C++ compilers.
-2. Check that you have CMake version |GMX_CMAKE_MINIMUM_REQUIRED_VERSION| or later.
+2. Check that you have CMake version |CMAKE_MINIMUM_REQUIRED_VERSION| or later.
 3. Get and unpack the latest version of the |Gromacs| tarball.
 4. Make a separate build directory and change to it. 
 5. Run ``cmake`` with the path to the source as an argument
@@ -91,10 +91,11 @@ compiler. We recommend gcc, because it is free, widely available and
 frequently provides the best performance.
 
 You should strive to use the most recent version of your
-compiler. Minimum supported compiler versions are
-* GNU (gcc) 4.6
-* Intel (icc) 14
-* LLVM (clang) 3.4
+compiler. Since we require full C++11 support the minimum supported
+compiler versions are
+* GNU (gcc) 4.8.1
+* Intel (icc) 15.0
+* LLVM (clang) 3.3
 * Microsoft (MSVC) 2015
 Other compilers may work (Cray, Pathscale, older clang) but do
 not offer competitive performance. We recommend against PGI because
@@ -111,7 +112,7 @@ other compilers, read on.
 
 On Linux, both the Intel and clang compiler use the libstdc++ which
 comes with gcc as the default C++ library. For |Gromacs|, we require
-the compiler to support libstc++ version 4.6.1 or higher. To select a
+the compiler to support libstc++ version 4.8.1 or higher. To select a
 particular libstdc++ library, use:
 
 * For Intel: ``-DGMX_STDLIB_CXX_FLAGS=-gcc-name=/path/to/gcc/binary``
@@ -140,6 +141,11 @@ For all non-x86 platforms, your best option is typically to use gcc or
 the vendor's default or recommended compiler, and check for
 specialized information below.
 
+For updated versions of gcc to add to your Linux OS, see
+
+* Ubuntu: `Ubuntu toolchain ppa page`_
+* RHEL/CentOS: `EPEL page`_ or the RedHat Developer Toolset
+
 Compiling with parallelization options
 --------------------------------------
 
@@ -151,8 +157,10 @@ generally built into your compiler and detected automatically.
 GPU support
 ^^^^^^^^^^^
 |Gromacs| has excellent support for NVIDIA GPUs supported via CUDA.
-NVIDIA's CUDA_ version |REQUIRED_CUDA_VERSION| software development kit is required,
-and the latest version is strongly encouraged. NVIDIA GPUs with at
+On Linux with gcc, NVIDIA's CUDA_ version |REQUIRED_CUDA_VERSION|
+software development kit is required, and the latest
+version is strongly encouraged. Using Intel or Microsoft compilers
+requires version 7.0 and 8.0, respectively. NVIDIA GPUs with at
 least NVIDIA compute capability |REQUIRED_CUDA_COMPUTE_CAPABILITY| are
 required, e.g. Fermi, Kepler, Maxwell or Pascal cards. You are strongly recommended to
 get the latest CUDA version and driver supported by your hardware, but
@@ -209,7 +217,7 @@ CMake
 -----
 
 |Gromacs| builds with the CMake build system, requiring at least
-version |GMX_CMAKE_MINIMUM_REQUIRED_VERSION|. You can check whether
+version |CMAKE_MINIMUM_REQUIRED_VERSION|. You can check whether
 CMake is installed, and what version it is, with ``cmake
 --version``. If you need to install CMake, then first check whether
 your platform's package management system provides a suitable version,
@@ -1126,9 +1134,9 @@ much everywhere, it is important that we tell you where we really know
 it works because we have tested it. We do test on Linux, Windows, and
 Mac with a range of compilers and libraries for a range of our
 configuration options. Every commit in our git source code repository
-is currently tested on x86 with gcc versions ranging from 4.6 through
-5.2, and versions 16 of the Intel compiler as well as Clang
-version 3.4 through 3.8. For this, we use a variety of GNU/Linux
+is currently tested on x86 with a number of gcc versions ranging from 4.8.1
+through 6.1, versions 16 of the Intel compiler, and Clang
+versions 3.4 through 3.8. For this, we use a variety of GNU/Linux
 flavors and versions as well as recent versions of Windows. Under
 Windows, we test both MSVC 2015 and version 16 of the Intel compiler.
 For details, you can
diff --git a/src/config.h.cmakein b/src/config.h.cmakein
index c7bd5fb3da..0885ec38c4 100644
--- a/src/config.h.cmakein
+++ b/src/config.h.cmakein
@@ -74,9 +74,6 @@
 /** Define if we are building for Cygwin */
 #cmakedefine01 GMX_CYGWIN
 
-/* GCC bug in AVX maskload/maskstore arguments - worked around internally */
-#cmakedefine01 GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG
-
 /* SSE2 was selected for SIMD instruction set level */
 #cmakedefine01 GMX_SIMD_X86_SSE2
 
diff --git a/src/external/tng_io/BuildTNG.cmake b/src/external/tng_io/BuildTNG.cmake
index 5a7432cd82..8fbd77170f 100644
--- a/src/external/tng_io/BuildTNG.cmake
+++ b/src/external/tng_io/BuildTNG.cmake
@@ -22,9 +22,9 @@ test_big_endian(TNG_INTEGER_BIG_ENDIAN)
 include(CheckIncludeFile)
 check_include_file(inttypes.h TNG_HAVE_INTTYPES_H)
 
-macro(TNG_GET_SOURCE_LIST TNG_SOURCELIST TNG_COMPILEDEFS)
-    include_directories(BEFORE ${TNG_ROOT_SOURCE_DIR}/include)
-    include_directories(BEFORE ${TNG_ROOT_BINARY_DIR}/include)
+# TODO propagate changes involving TNG_INCLUDE_DIRS to TNG repo
+macro(TNG_GET_SOURCE_LIST TNG_SOURCELIST TNG_COMPILEDEFS TNG_INCLUDE_DIRS)
+    set(${TNG_INCLUDE_DIRS} ${TNG_ROOT_SOURCE_DIR}/include ${TNG_ROOT_BINARY_DIR}/include)
     set(_tng_compression_sources bwlzh.c bwt.c coder.c dict.c fixpoint.c huffman.c huffmem.c lz77.c merge_sort.c mtf.c rle.c tng_compress.c vals16.c warnmalloc.c widemuldiv.c xtc2.c xtc3.c)
     set(_tng_io_sources tng_io.c md5.c)
     set(${TNG_SOURCELIST})
diff --git a/src/external/tng_io/CMakeLists.txt b/src/external/tng_io/CMakeLists.txt
index 87c10be8be..5c43e6082f 100644
--- a/src/external/tng_io/CMakeLists.txt
+++ b/src/external/tng_io/CMakeLists.txt
@@ -27,7 +27,9 @@ include(CheckIncludeFile)
 check_include_file(inttypes.h   HAVE_INTTYPES_H)
 
 include(BuildTNG.cmake)
-tng_get_source_list(TNG_SOURCES TNG_COMPILE_DEFS)
+# TODO propagate changes involving TNG_INCLUDE_DIRS to TNG repo
+tng_get_source_list(TNG_SOURCES TNG_COMPILE_DEFS TNG_INCLUDE_DIRS)
+include_directories(BEFORE "${TNG_INCLUDE_DIRS}")
 
 tng_set_source_properties(WITH_ZLIB ${ZLIB_FOUND})
 
diff --git a/src/gromacs/gmxana/legacytests/gmx_traj_tests.cpp b/src/gromacs/gmxana/legacytests/gmx_traj_tests.cpp
index 820ba06471..6ac20f7a11 100644
--- a/src/gromacs/gmxana/legacytests/gmx_traj_tests.cpp
+++ b/src/gromacs/gmxana/legacytests/gmx_traj_tests.cpp
@@ -150,10 +150,6 @@ const char *trajectoryFileNames[] = {
     "spc2-traj.g96"
 };
 
-#ifdef __INTEL_COMPILER
-#pragma warning( disable : 177 )
-#endif
-
 INSTANTIATE_TEST_CASE_P(NoFatalErrorWhenWritingFrom,
                         GmxTraj,
                             ::testing::ValuesIn(gmx::ArrayRef<const char*>(trajectoryFileNames)));
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/kernelutil_x86_avx_128_fma_single.h b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/kernelutil_x86_avx_128_fma_single.h
index 25a7c83cd1..9285b21fa4 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/kernelutil_x86_avx_128_fma_single.h
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/kernelutil_x86_avx_128_fma_single.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -49,18 +49,10 @@
 #define gmx_mm_castsi128_ps   _mm_castsi128_ps
 #define gmx_mm_extract_epi32  _mm_extract_epi32
 
-/* Work around gcc bug with wrong type for mask formal parameter to maskload/maskstore */
-#if GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG
-#    define gmx_mm_maskload_ps(mem, mask)       _mm_maskload_ps((mem), _mm_castsi128_ps(mask))
-#    define gmx_mm_maskstore_ps(mem, mask, x)    _mm_maskstore_ps((mem), _mm_castsi128_ps(mask), (x))
-#    define gmx_mm256_maskload_ps(mem, mask)    _mm256_maskload_ps((mem), _mm256_castsi256_ps(mask))
-#    define gmx_mm256_maskstore_ps(mem, mask, x) _mm256_maskstore_ps((mem), _mm256_castsi256_ps(mask), (x))
-#else
-#    define gmx_mm_maskload_ps(mem, mask)       _mm_maskload_ps((mem), (mask))
-#    define gmx_mm_maskstore_ps(mem, mask, x)    _mm_maskstore_ps((mem), (mask), (x))
-#    define gmx_mm256_maskload_ps(mem, mask)    _mm256_maskload_ps((mem), (mask))
-#    define gmx_mm256_maskstore_ps(mem, mask, x) _mm256_maskstore_ps((mem), (mask), (x))
-#endif
+#define gmx_mm_maskload_ps(mem, mask)       _mm_maskload_ps((mem), (mask))
+#define gmx_mm_maskstore_ps(mem, mask, x)    _mm_maskstore_ps((mem), (mask), (x))
+#define gmx_mm256_maskload_ps(mem, mask)    _mm256_maskload_ps((mem), (mask))
+#define gmx_mm256_maskstore_ps(mem, mask, x) _mm256_maskstore_ps((mem), (mask), (x))
 
 /* Normal sum of four xmm registers */
 #define gmx_mm_sum4_ps(t0, t1, t2, t3)  _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3))
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/kernelutil_x86_avx_256_single.h b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/kernelutil_x86_avx_256_single.h
index bfbccd54de..ac489ea64b 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/kernelutil_x86_avx_256_single.h
+++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/kernelutil_x86_avx_256_single.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -59,18 +59,10 @@ gmx_mm256_set_m128(__m128 hi, __m128 lo)
     return _mm256_insertf128_ps(_mm256_castps128_ps256(lo), hi, 0x1);
 }
 
-/* Work around gcc bug with wrong type for mask formal parameter to maskload/maskstore */
-#if GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG
-#    define gmx_mm_maskload_ps(mem, mask)       _mm_maskload_ps((mem), _mm_castsi128_ps(mask))
-#    define gmx_mm_maskstore_ps(mem, mask, x)    _mm_maskstore_ps((mem), _mm_castsi128_ps(mask), (x))
-#    define gmx_mm256_maskload_ps(mem, mask)    _mm256_maskload_ps((mem), _mm256_castsi256_ps(mask))
-#    define gmx_mm256_maskstore_ps(mem, mask, x) _mm256_maskstore_ps((mem), _mm256_castsi256_ps(mask), (x))
-#else
-#    define gmx_mm_maskload_ps(mem, mask)       _mm_maskload_ps((mem), (mask))
-#    define gmx_mm_maskstore_ps(mem, mask, x)    _mm_maskstore_ps((mem), (mask), (x))
-#    define gmx_mm256_maskload_ps(mem, mask)    _mm256_maskload_ps((mem), (mask))
-#    define gmx_mm256_maskstore_ps(mem, mask, x) _mm256_maskstore_ps((mem), (mask), (x))
-#endif
+#define gmx_mm_maskload_ps(mem, mask)       _mm_maskload_ps((mem), (mask))
+#define gmx_mm_maskstore_ps(mem, mask, x)    _mm_maskstore_ps((mem), (mask), (x))
+#define gmx_mm256_maskload_ps(mem, mask)    _mm256_maskload_ps((mem), (mask))
+#define gmx_mm256_maskstore_ps(mem, mask, x) _mm256_maskstore_ps((mem), (mask), (x))
 
 /* Transpose lower/upper half of 256-bit registers separately */
 #define GMX_MM256_HALFTRANSPOSE4_PS(ymm0, ymm1, ymm2, ymm3) {            \
diff --git a/src/gromacs/gpu_utils/cudautils.cu b/src/gromacs/gpu_utils/cudautils.cu
index 5a43f00963..8ac8938075 100644
--- a/src/gromacs/gpu_utils/cudautils.cu
+++ b/src/gromacs/gpu_utils/cudautils.cu
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014,2015,2016, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -85,18 +85,6 @@ int cu_copy_D2H_async(void * h_dest, void * d_src, size_t bytes, cudaStream_t s
     return cu_copy_D2H_generic(h_dest, d_src, bytes, true, s);
 }
 
-int cu_copy_D2H_alloc(void ** h_dest, void * d_src, size_t bytes)
-{
-    if (h_dest == NULL || d_src == NULL || bytes == 0)
-    {
-        return -1;
-    }
-
-    smalloc(*h_dest, bytes);
-
-    return cu_copy_D2H(*h_dest, d_src, bytes);
-}
-
 /*! Launches synchronous or asynchronous device to host memory copy.
  *
  *  The copy is launched in stream s or if not specified, in stream 0.
@@ -138,21 +126,6 @@ int cu_copy_H2D_async(void * d_dest, void * h_src, size_t bytes, cudaStream_t s
     return cu_copy_H2D_generic(d_dest, h_src, bytes, true, s);
 }
 
-int cu_copy_H2D_alloc(void ** d_dest, void * h_src, size_t bytes)
-{
-    cudaError_t stat;
-
-    if (d_dest == NULL || h_src == NULL || bytes == 0)
-    {
-        return -1;
-    }
-
-    stat = cudaMalloc(d_dest, bytes);
-    CU_RET_ERR(stat, "cudaMalloc failed in cu_copy_H2D_alloc");
-
-    return cu_copy_H2D(*d_dest, h_src, bytes);
-}
-
 float cu_event_elapsed(cudaEvent_t start, cudaEvent_t end)
 {
     float       t = 0.0;
diff --git a/src/gromacs/gpu_utils/cudautils.cuh b/src/gromacs/gpu_utils/cudautils.cuh
index 5daa040d28..6408b551f8 100644
--- a/src/gromacs/gpu_utils/cudautils.cuh
+++ b/src/gromacs/gpu_utils/cudautils.cuh
@@ -137,9 +137,6 @@ int cu_copy_D2H(void * /*h_dest*/, void * /*d_src*/, size_t /*bytes*/);
 /*! Launches asynchronous host to device memory copy in stream s. */
 int cu_copy_D2H_async(void * /*h_dest*/, void * /*d_src*/, size_t /*bytes*/, cudaStream_t /*s = 0*/);
 
-/*! Allocates host memory and launches synchronous host to device memory copy. */
-int cu_copy_D2H_alloc(void ** /*h_dest*/, void * /*d_src*/, size_t /*bytes*/);
-
 
 /*! Launches synchronous host to device memory copy. */
 int cu_copy_H2D(void * /*d_dest*/, void * /*h_src*/, size_t /*bytes*/);
@@ -147,9 +144,6 @@ int cu_copy_H2D(void * /*d_dest*/, void * /*h_src*/, size_t /*bytes*/);
 /*! Launches asynchronous host to device memory copy in stream s. */
 int cu_copy_H2D_async(void * /*d_dest*/, void * /*h_src*/, size_t /*bytes*/, cudaStream_t /*s = 0*/);
 
-/*! Allocates device memory and launches synchronous host to device memory copy. */
-int cu_copy_H2D_alloc(void ** /*d_dest*/, void * /*h_src*/, size_t /*bytes*/);
-
 /*! Frees device memory and resets the size and allocation size to -1. */
 void cu_free_buffered(void *d_ptr, int *n = NULL, int *nalloc = NULL);
 
diff --git a/src/gromacs/hardware/detecthardware.cpp b/src/gromacs/hardware/detecthardware.cpp
index c2f90ac54f..87504467a5 100644
--- a/src/gromacs/hardware/detecthardware.cpp
+++ b/src/gromacs/hardware/detecthardware.cpp
@@ -859,14 +859,12 @@ static void
 spinUpCore() noexcept
 {
 #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) && defined(_SC_NPROCESSORS_ONLN)
-    // steady_clock is better than system_clock, but unsupported in gcc-4.6.4.
-    // For release-2017 we can retire gcc-4.6 support and move to steady_clock.
     float dummy           = 0.1;
     int   countConfigured = sysconf(_SC_NPROCESSORS_CONF);    // noexcept
-    auto  start           = std::chrono::system_clock::now(); // noexcept
+    auto  start           = std::chrono::steady_clock::now(); // noexcept
 
     while (sysconf(_SC_NPROCESSORS_ONLN) < countConfigured &&
-           std::chrono::system_clock::now() - start < std::chrono::seconds(2))
+           std::chrono::steady_clock::now() - start < std::chrono::seconds(2))
     {
         for (int i = 1; i < 10000; i++)
         {
diff --git a/src/gromacs/linearalgebra/gmx_lapack/dbdsqr.cpp b/src/gromacs/linearalgebra/gmx_lapack/dbdsqr.cpp
index 950d9392aa..7c9fdb5aae 100644
--- a/src/gromacs/linearalgebra/gmx_lapack/dbdsqr.cpp
+++ b/src/gromacs/linearalgebra/gmx_lapack/dbdsqr.cpp
@@ -48,7 +48,7 @@ F77_FUNC(dbdsqr,DBDSQR)(const char *uplo,
     double unfl, sinl, cosr, smin, smax, sinr;
     double oldcs;
     int oldll;
-    double shift, sigmn, oldsn;
+    double shift, sigmn, oldsn = 0.;
     int maxit;
     double sminl;
     double sigmx;
diff --git a/src/gromacs/linearalgebra/gmx_lapack/sbdsqr.cpp b/src/gromacs/linearalgebra/gmx_lapack/sbdsqr.cpp
index 2cd91a2fe4..18f302a515 100644
--- a/src/gromacs/linearalgebra/gmx_lapack/sbdsqr.cpp
+++ b/src/gromacs/linearalgebra/gmx_lapack/sbdsqr.cpp
@@ -48,7 +48,7 @@ F77_FUNC(sbdsqr,SBDSQR)(const char *uplo,
     float unfl, sinl, cosr, smin, smax, sinr;
     float oldcs;
     int oldll;
-    float shift, sigmn, oldsn;
+    float shift, sigmn, oldsn = 0.;
     int maxit;
     float sminl;
     float sigmx;
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu
index bd49f6aba2..ace5973946 100644
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu
@@ -435,8 +435,8 @@ void nbnxn_gpu_launch_kernel(gmx_nbnxn_cuda_t       *nb,
 
     if (debug)
     {
-        fprintf(debug, "GPU launch configuration:\n\tThread block: %dx%dx%d\n\t"
-                "\tGrid: %dx%d\n\t#Super-clusters/clusters: %d/%d (%d)\n"
+        fprintf(debug, "GPU launch configuration:\n\tThread block: %ux%ux%u\n\t"
+                "\tGrid: %ux%u\n\t#Super-clusters/clusters: %d/%d (%d)\n"
                 "\tShMem: %d\n",
                 dim_block.x, dim_block.y, dim_block.z,
                 dim_grid.x, dim_grid.y, plist->nsci*c_numClPerSupercl,
diff --git a/src/gromacs/mdlib/tests/settle.cpp b/src/gromacs/mdlib/tests/settle.cpp
index 4fcb49b719..5ea75bd36d 100644
--- a/src/gromacs/mdlib/tests/settle.cpp
+++ b/src/gromacs/mdlib/tests/settle.cpp
@@ -309,39 +309,15 @@ TEST_P(SettleTest, SatisfiesConstraints)
     }
 }
 
-using ::testing::Bool;
 // Scan the full Cartesian product of numbers of SETTLE interactions
 // (4 and 17 are chosen to test cases that do and do not match
 // hardware SIMD widths), and whether or not we use PBC, velocities or
-// calculate the virial contribution. It would be nicer to generate
-// these combinations with ::testing::Combine, but gcc 4.6 can't cope
-// with the template meta-programming required to generate the tuples.
+// calculate the virial contribution.
 INSTANTIATE_TEST_CASE_P(WithParameters, SettleTest,
-                            ::testing::Values(SettleTestParameters(1,  true,  true,  true),
-                                              SettleTestParameters(4,  true,  true,  true),
-                                              SettleTestParameters(17, true,  true,  true),
-                                              SettleTestParameters(1,  false, true,  true),
-                                              SettleTestParameters(4,  false, true,  true),
-                                              SettleTestParameters(17, false, true,  true),
-                                              SettleTestParameters(1,  true,  false, true),
-                                              SettleTestParameters(4,  true,  false, true),
-                                              SettleTestParameters(17, true,  false, true),
-                                              SettleTestParameters(1,  false, false, true),
-                                              SettleTestParameters(4,  false, false, true),
-                                              SettleTestParameters(17, false, false, true),
-                                              SettleTestParameters(1,  true,  true,  false),
-                                              SettleTestParameters(4,  true,  true,  false),
-                                              SettleTestParameters(17, true,  true,  false),
-                                              SettleTestParameters(1,  false, true,  false),
-                                              SettleTestParameters(4,  false, true,  false),
-                                              SettleTestParameters(17, false, true,  false),
-                                              SettleTestParameters(1,  true,  false, false),
-                                              SettleTestParameters(4,  true,  false, false),
-                                              SettleTestParameters(17, true,  false, false),
-                                              SettleTestParameters(1,  false, false, false),
-                                              SettleTestParameters(4,  false, false, false),
-                                              SettleTestParameters(17, false, false, false)));
-
+                            ::testing::Combine(::testing::Values(1, 4, 7),
+                                                   ::testing::Bool(),
+                                                   ::testing::Bool(),
+                                                   ::testing::Bool()));
 
 } // namespace
 } // namespace
diff --git a/src/gromacs/random/gammadistribution.h b/src/gromacs/random/gammadistribution.h
index bb05dd0b28..ac850cfeb8 100644
--- a/src/gromacs/random/gammadistribution.h
+++ b/src/gromacs/random/gammadistribution.h
@@ -96,8 +96,9 @@ namespace gmx
  *  produce errors. Even for newer compilers, libstdc++ and libc++ appear to
  *  use different algorithms to generate it, which means their values differ
  *  in contrast to the uniform and normal distributions where they are
- *  identical. To avoid both the gcc-4.6 bug and make it easier to use GROMACS
- *   unit tests that depend on random numbers we have our own implementation.
+ *  identical. To avoid both compiler bugs and make it easier to use
+ *  GROMACS unit tests that depend on random numbers, we have our
+ *  own implementation.
  *
  *  Be warned that the gamma distribution works like the standard
  *  normal distribution and keeps drawing values from the random engine
diff --git a/src/gromacs/simd/impl_x86_avx_128_fma/impl_x86_avx_128_fma_definitions.h b/src/gromacs/simd/impl_x86_avx_128_fma/impl_x86_avx_128_fma_definitions.h
index bf81c8b3b3..f2b5a20d92 100644
--- a/src/gromacs/simd/impl_x86_avx_128_fma/impl_x86_avx_128_fma_definitions.h
+++ b/src/gromacs/simd/impl_x86_avx_128_fma/impl_x86_avx_128_fma_definitions.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2016, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -36,8 +36,6 @@
 #ifndef GMX_SIMD_IMPL_X86_AVX_128_FMA_DEFINITIONS_H
 #define GMX_SIMD_IMPL_X86_AVX_128_FMA_DEFINITIONS_H
 
-#include "config.h"
-
 // Capability definitions for AVX-128-FMA
 #define GMX_SIMD                                1
 #define GMX_SIMD_HAVE_FLOAT                     1
@@ -82,13 +80,7 @@
 #define GMX_SIMD_RSQRT_BITS                    11
 #define GMX_SIMD_RCP_BITS                      11
 
-// Work around gcc bug with wrong type for mask formal parameter to maskload/maskstore
-#if GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG
-#    define gmx_mm_maskload_ps(mem, mask)       _mm_maskload_ps((mem), _mm_castsi128_ps(mask))
-#    define gmx_mm_maskstore_ps(mem, mask, x)   _mm_maskstore_ps((mem), _mm_castsi128_ps(mask), (x))
-#else
-#    define gmx_mm_maskload_ps(mem, mask)       _mm_maskload_ps((mem), (mask))
-#    define gmx_mm_maskstore_ps(mem, mask, x)   _mm_maskstore_ps((mem), (mask), (x))
-#endif
+#define gmx_mm_maskload_ps(mem, mask)       _mm_maskload_ps((mem), (mask))
+#define gmx_mm_maskstore_ps(mem, mask, x)   _mm_maskstore_ps((mem), (mask), (x))
 
 #endif  // GMX_SIMD_IMPL_X86_AVX_128_FMA_DEFINITIONS_H
diff --git a/src/gromacs/utility/basedefinitions.h b/src/gromacs/utility/basedefinitions.h
index 47a820a045..509b8747a1 100644
--- a/src/gromacs/utility/basedefinitions.h
+++ b/src/gromacs/utility/basedefinitions.h
@@ -255,18 +255,10 @@ typedef uint64_t gmx_uint64_t;
    \endcode
  */
 
-#if (defined(__GNUC__) && !defined(__clang__)) || defined(__ibmxl__) || defined(__xlC__) || defined(__PATHCC__)
-// Gcc-4.6.4 does not support alignas, but both gcc, pathscale and xlc
-// support the standard GNU alignment attributes. PGI also sets __GNUC__ now,
-// and mostly supports it. clang 3.2 does not support the GCC alignment attribute.
-#    define GMX_ALIGNED(type, alignment) __attribute__ ((aligned(alignment*sizeof(type)))) type
-#else
-// If nothing else works we rely on C++11. This will for instance work for MSVC2015 and later.
+// We rely on C++11. This will for instance work for MSVC2015 and later.
 // If you get an error here, find out what attribute to use to get your compiler to align
 // data properly and add it as a case.
-#    define GMX_ALIGNED(type, alignment) alignas(alignment*alignof(type)) type
-#endif
-
+#define GMX_ALIGNED(type, alignment) alignas(alignment*sizeof(type)) type
 
 /*! \brief
  * Macro to explicitly ignore an unused value.
-- 
2.11.4.GIT