SSE4.1 single-precision kernels
commit1c766d617afcffa2865329f4efc32c5748a82d6e
authorErik Lindahl <erik@kth.se>
Thu, 15 Nov 2012 23:09:11 +0000 (16 00:09 +0100)
committerGerrit Code Review <gerrit@gerrit.gromacs.org>
Mon, 19 Nov 2012 14:58:13 +0000 (19 15:58 +0100)
tree21f25e8d1766b1afdb6265876a9da0af58e6c04c
parent3cd6352667114cb6520cd9360c1a6454d78a1294
SSE4.1 single-precision kernels

Kernels using SSE4.1 instructions on modern Intel and AMD x86 CPUs.
These are not a whole lot faster than SSE2 right now in default
interactions, but for generalized born they probably are due to
better selection instructions. Padding and other properties are
similar to the SSE2 kernels just commited.

Change-Id: I3d6c09c70d16006128320ca24079f7f3851a882e
115 files changed:
include/gmx_x86_sse4_1.h
src/gmxlib/nonbonded/CMakeLists.txt
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/kernelutil_x86_sse4_1_single.h [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/make_nb_kernel_sse4_1_single.py [new file with mode: 0755]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwNone_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwNone_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwNone_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwNone_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwNone_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwLJ_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwLJ_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwLJ_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwLJ_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwLJ_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwNone_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwNone_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwNone_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwNone_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwNone_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecNone_VdwLJ_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwLJ_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwLJ_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwLJ_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwLJ_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwLJ_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwNone_GeomP1P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwNone_GeomW3P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwNone_GeomW3W3_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwNone_GeomW4P1_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwNone_GeomW4W4_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_template_sse4_1_single.pre [new file with mode: 0644]
src/gmxlib/nonbonded/nonbonded.c