src/gromacs/ewald/pme-internal.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief This file contains function declarations necessary for
  40  * computing energies and forces for the PME long-ranged part (Coulomb
  41  * and LJ).
  42  *
  43  * \author Berk Hess <hess@kth.se>
  44  * \author Mark Abraham <mark.j.abraham@gmail.com>
  45  * \ingroup module_ewald
  46  */
  47
  48 /* TODO This file is a temporary holding area for stuff local to the
  49  * PME code, before it acquires some more normal ewald/file.c and
  50  * ewald/file.h structure.  In future clean up, get rid of this file,
  51  * to build more normal. */
  52
  53 #ifndef GMX_EWALD_PME_INTERNAL_H
  54 #define GMX_EWALD_PME_INTERNAL_H
  55
  56 #include "config.h"
  57
  58 #include "gromacs/math/gmxcomplex.h"
  59 #include "gromacs/utility/basedefinitions.h"
  60 #include "gromacs/utility/gmxmpi.h"
  61
  62 #include "pme-gpu-types-host.h"
  63
  64 //! A repeat of typedef from parallel_3dfft.h
  65 typedef struct gmx_parallel_3dfft *gmx_parallel_3dfft_t;
  66
  67 struct t_commrec;
  68 struct t_inputrec;
  69 struct PmeGpu;
  70
  71 //@{
  72 //! Grid indices for A state for charge and Lennard-Jones C6
  73 #define PME_GRID_QA    0
  74 #define PME_GRID_C6A   2
  75 //@}
  76
  77 //@{
  78 /*! \brief Flags that indicate the number of PME grids in use */
  79 #define DO_Q           2 /* Electrostatic grids have index q<2 */
  80 #define DO_Q_AND_LJ    4 /* non-LB LJ grids have index 2 <= q < 4 */
  81 #define DO_Q_AND_LJ_LB 9 /* With LB rules we need a total of 2+7 grids */
  82 //@}
  83
  84 /*! \brief Pascal triangle coefficients scaled with (1/2)^6 for LJ-PME with LB-rules */
  85 static const real lb_scale_factor[] = {
  86     1.0/64, 6.0/64, 15.0/64, 20.0/64,
  87     15.0/64, 6.0/64, 1.0/64
  88 };
  89
  90 /*! \brief Pascal triangle coefficients used in solve_pme_lj_yzx, only need to do 4 calculations due to symmetry */
  91 static const real lb_scale_factor_symm[] = { 2.0/64, 12.0/64, 30.0/64, 20.0/64 };
  92
  93 /*! \brief We only define a maximum to be able to use local arrays without allocation.
  94  * An order larger than 12 should never be needed, even for test cases.
  95  * If needed it can be changed here.
  96  */
  97 #define PME_ORDER_MAX 12
  98
  99 /*! \brief As gmx_pme_init, but takes most settings, except the grid/Ewald coefficients, from pme_src.
 100  * This is only called when the PME cut-off/grid size changes.
 101  */
 102 void gmx_pme_reinit(struct gmx_pme_t **pmedata,
 103                     const t_commrec   *cr,
 104                     struct gmx_pme_t * pme_src,
 105                     const t_inputrec * ir,
 106                     const ivec         grid_size,
 107                     real               ewaldcoeff_q,
 108                     real               ewaldcoeff_lj);
 109
 110
 111 /* Temporary suppression until these structs become opaque and don't live in
 112  * a header that is included by other headers. Also, until then I have no
 113  * idea what some of the names mean. */
 114
 115 //! @cond Doxygen_Suppress
 116
 117 /*! \brief Data structure for grid communication */
 118 struct pme_grid_comm_t
 119 {
 120     int send_id;         //!< Source rank id
 121     int send_index0;
 122     int send_nindex;
 123     int recv_id;         //!< Destination rank id
 124     int recv_index0;
 125     int recv_nindex;
 126     int recv_size = 0;   //!< Receive buffer width, used with OpenMP
 127 };
 128
 129 /*! \brief Data structure for grid overlap communication in a single dimension */
 130 struct pme_overlap_t
 131 {
 132 #if GMX_MPI
 133     MPI_Comm                     mpi_comm;       //!< MPI communcator
 134 #endif
 135     int                          nnodes;         //!< Number of ranks
 136     int                          nodeid;         //!< Unique rank identifcator
 137     std::vector<int>             s2g0;           //!< The local interpolation grid start
 138     std::vector<int>             s2g1;           //!< The local interpolation grid end
 139     int                          send_size;      //!< Send buffer width, used with OpenMP
 140     std::vector<pme_grid_comm_t> comm_data;      //!< All the individual communication data for each rank
 141     std::vector<real>            sendbuf;        //!< Shared buffer for sending
 142     std::vector<real>            recvbuf;        //!< Shared buffer for receiving
 143 };
 144
 145 /*! \brief Data structure for organizing particle allocation to threads */
 146 typedef struct {
 147     int *n;      /* Cumulative counts of the number of particles per thread */
 148     int  nalloc; /* Allocation size of i */
 149     int *i;      /* Particle indices ordered on thread index (n) */
 150 } thread_plist_t;
 151
 152 /*! \brief Helper typedef for spline vectors */
 153 typedef real *splinevec[DIM];
 154
 155 /*! \brief Data structure for beta-spline interpolation */
 156 typedef struct {
 157     int       n;
 158     int      *ind;
 159     splinevec theta;
 160     real     *ptr_theta_z;
 161     splinevec dtheta;
 162     real     *ptr_dtheta_z;
 163 } splinedata_t;
 164
 165 /*! \brief Data structure for coordinating transfer between PP and PME ranks*/
 166 struct pme_atomcomm_t{
 167     int      dimind;        /* The index of the dimension, 0=x, 1=y */
 168     int      nslab;
 169     int      nodeid;
 170 #if GMX_MPI
 171     MPI_Comm mpi_comm;
 172 #endif
 173
 174     int     *node_dest;     /* The nodes to send x and q to with DD */
 175     int     *node_src;      /* The nodes to receive x and q from with DD */
 176     int     *buf_index;     /* Index for commnode into the buffers */
 177
 178     int      maxshift;
 179
 180     int      npd;
 181     int      pd_nalloc;
 182     int     *pd;
 183     int     *count;         /* The number of atoms to send to each node */
 184     int    **count_thread;
 185     int     *rcount;        /* The number of atoms to receive */
 186
 187     int      n;
 188     int      nalloc;
 189     rvec    *x;
 190     real    *coefficient;
 191     rvec    *f;
 192     gmx_bool bSpread;       /* These coordinates are used for spreading */
 193     int      pme_order;
 194     ivec    *idx;
 195     rvec    *fractx;            /* Fractional coordinate relative to
 196                                  * the lower cell boundary
 197                                  */
 198     int             nthread;
 199     int            *thread_idx; /* Which thread should spread which coefficient */
 200     thread_plist_t *thread_plist;
 201     splinedata_t   *spline;
 202 };
 203
 204 /*! \brief Data structure for a single PME grid */
 205 struct pmegrid_t{
 206     ivec  ci;     /* The spatial location of this grid         */
 207     ivec  n;      /* The used size of *grid, including order-1 */
 208     ivec  offset; /* The grid offset from the full node grid   */
 209     int   order;  /* PME spreading order                       */
 210     ivec  s;      /* The allocated size of *grid, s >= n       */
 211     real *grid;   /* The grid local thread, size n             */
 212 };
 213
 214 /*! \brief Data structures for PME grids */
 215 struct pmegrids_t{
 216     pmegrid_t  grid;         /* The full node grid (non thread-local)            */
 217     int        nthread;      /* The number of threads operating on this grid     */
 218     ivec       nc;           /* The local spatial decomposition over the threads */
 219     pmegrid_t *grid_th;      /* Array of grids for each thread                   */
 220     real      *grid_all;     /* Allocated array for the grids in *grid_th        */
 221     int       *g2t[DIM];     /* The grid to thread index                         */
 222     ivec       nthread_comm; /* The number of threads to communicate with        */
 223 };
 224
 225 /*! \brief Data structure for spline-interpolation working buffers */
 226 struct pme_spline_work;
 227
 228 /*! \brief Data structure for working buffers */
 229 struct pme_solve_work_t;
 230
 231 /*! \brief Master PME data structure */
 232 struct gmx_pme_t {            //NOLINT(clang-analyzer-optin.performance.Padding)
 233     int           ndecompdim; /* The number of decomposition dimensions */
 234     int           nodeid;     /* Our nodeid in mpi->mpi_comm */
 235     int           nodeid_major;
 236     int           nodeid_minor;
 237     int           nnodes;    /* The number of nodes doing PME */
 238     int           nnodes_major;
 239     int           nnodes_minor;
 240
 241     MPI_Comm      mpi_comm;
 242     MPI_Comm      mpi_comm_d[2]; /* Indexed on dimension, 0=x, 1=y */
 243 #if GMX_MPI
 244     MPI_Datatype  rvec_mpi;      /* the pme vector's MPI type */
 245 #endif
 246
 247     gmx_bool   bUseThreads;   /* Does any of the PME ranks have nthread>1 ?  */
 248     int        nthread;       /* The number of threads doing PME on our rank */
 249
 250     gmx_bool   bPPnode;       /* Node also does particle-particle forces */
 251     bool       doCoulomb;     /* Apply PME to electrostatics */
 252     bool       doLJ;          /* Apply PME to Lennard-Jones r^-6 interactions */
 253     gmx_bool   bFEP;          /* Compute Free energy contribution */
 254     gmx_bool   bFEP_q;
 255     gmx_bool   bFEP_lj;
 256     int        nkx, nky, nkz; /* Grid dimensions */
 257     gmx_bool   bP3M;          /* Do P3M: optimize the influence function */
 258     int        pme_order;
 259     real       ewaldcoeff_q;  /* Ewald splitting coefficient for Coulomb */
 260     real       ewaldcoeff_lj; /* Ewald splitting coefficient for r^-6 */
 261     real       epsilon_r;
 262
 263
 264     enum PmeRunMode runMode; /* Which codepath is the PME runner taking - CPU, GPU, mixed;
 265                               * TODO: this is the information that should be owned by the task scheduler,
 266                               * and ideally not be duplicated here.
 267                               */
 268
 269     PmeGpu      *gpu;        /* A pointer to the GPU data.
 270                               * TODO: this should be unique or a shared pointer.
 271                               * Currently in practice there is a single gmx_pme_t instance while a code
 272                               * is partially set up for many of them. The PME tuning calls gmx_pme_reinit()
 273                               * which fully reinitializes the one and only PME structure anew while maybe
 274                               * keeping the old grid buffers if they were already large enough.
 275                               * This small choice should be made clear in the later refactoring -
 276                               * do we store many PME objects for different grid sizes,
 277                               * or a single PME object that handles different grid sizes gracefully.
 278                               */
 279
 280
 281     class EwaldBoxZScaler *boxScaler;   /**< The scaling data Ewald uses with walls (set at pme_init constant for the entire run) */
 282
 283
 284     int        ljpme_combination_rule;  /* Type of combination rule in LJ-PME */
 285
 286     int        ngrids;                  /* number of grids we maintain for pmegrid, (c)fftgrid and pfft_setups*/
 287
 288     pmegrids_t pmegrid[DO_Q_AND_LJ_LB]; /* Grids on which we do spreading/interpolation,
 289                                          * includes overlap Grid indices are ordered as
 290                                          * follows:
 291                                          * 0: Coloumb PME, state A
 292                                          * 1: Coloumb PME, state B
 293                                          * 2-8: LJ-PME
 294                                          * This can probably be done in a better way
 295                                          * but this simple hack works for now
 296                                          */
 297
 298     /* The PME coefficient spreading grid sizes/strides, includes pme_order-1 */
 299     int        pmegrid_nx, pmegrid_ny, pmegrid_nz;
 300     /* pmegrid_nz might be larger than strictly necessary to ensure
 301      * memory alignment, pmegrid_nz_base gives the real base size.
 302      */
 303     int     pmegrid_nz_base;
 304     /* The local PME grid starting indices */
 305     int     pmegrid_start_ix, pmegrid_start_iy, pmegrid_start_iz;
 306
 307     /* Work data for spreading and gathering */
 308     pme_spline_work          *spline_work;
 309
 310     real                    **fftgrid; /* Grids for FFT. With 1D FFT decomposition this can be a pointer */
 311     /* inside the interpolation grid, but separate for 2D PME decomp. */
 312     int                       fftgrid_nx, fftgrid_ny, fftgrid_nz;
 313
 314     t_complex               **cfftgrid; /* Grids for complex FFT data */
 315
 316     int                       cfftgrid_nx, cfftgrid_ny, cfftgrid_nz;
 317
 318     gmx_parallel_3dfft_t     *pfft_setup;
 319
 320     int                      *nnx, *nny, *nnz;
 321     real                     *fshx, *fshy, *fshz;
 322
 323     pme_atomcomm_t            atc[2]; /* Indexed on decomposition index */
 324     matrix                    recipbox;
 325     real                      boxVolume;
 326     splinevec                 bsp_mod;
 327     /* Buffers to store data for local atoms for L-B combination rule
 328      * calculations in LJ-PME. lb_buf1 stores either the coefficients
 329      * for spreading/gathering (in serial), or the C6 coefficient for
 330      * local atoms (in parallel).  lb_buf2 is only used in parallel,
 331      * and stores the sigma values for local atoms. */
 332     real                 *lb_buf1, *lb_buf2;
 333     int                   lb_buf_nalloc; /* Allocation size for the above buffers. */
 334
 335     pme_overlap_t         overlap[2];    /* Indexed on dimension, 0=x, 1=y */
 336
 337     pme_atomcomm_t        atc_energy;    /* Only for gmx_pme_calc_energy */
 338
 339     rvec                 *bufv;          /* Communication buffer */
 340     real                 *bufr;          /* Communication buffer */
 341     int                   buf_nalloc;    /* The communication buffer size */
 342
 343     /* thread local work data for solve_pme */
 344     struct pme_solve_work_t *solve_work;
 345
 346     /* Work data for sum_qgrid */
 347     real *   sum_qgrid_tmp;
 348     real *   sum_qgrid_dd_tmp;
 349 };
 350
 351 //! @endcond
 352
 353 /*! \brief
 354  * Finds out if PME is currently running on GPU.
 355  * TODO: should this be removed eventually?
 356  *
 357  * \param[in] pme  The PME structure.
 358  * \returns        True if PME runs on GPU currently, false otherwise.
 359  */
 360 inline bool pme_gpu_active(const gmx_pme_t *pme)
 361 {
 362     return (pme != nullptr) && (pme->runMode != PmeRunMode::CPU);
 363 }
 364
 365 /*! \brief Tell our PME-only node to switch to a new grid size */
 366 void gmx_pme_send_switchgrid(const t_commrec *cr,
 367                              ivec             grid_size,
 368                              real             ewaldcoeff_q,
 369                              real             ewaldcoeff_lj);
 370
 371 #endif