Add coolquotes
[gromacs.git] / src / gromacs / ewald / pme-redistribute.cpp
blobbbef6bf975ae20594779900ba36e1e1be0afd84c
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2018, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include "pme-redistribute.h"
41 #include "config.h"
43 #include <algorithm>
45 #include "gromacs/math/vec.h"
46 #include "gromacs/mdtypes/commrec.h"
47 #include "gromacs/utility/exceptions.h"
48 #include "gromacs/utility/fatalerror.h"
49 #include "gromacs/utility/gmxmpi.h"
50 #include "gromacs/utility/smalloc.h"
52 #include "pme-internal.h"
53 #include "pme-simd.h"
55 static void pme_calc_pidx(int start, int end,
56 matrix recipbox, rvec x[],
57 pme_atomcomm_t *atc, int *count)
59 int nslab, i;
60 int si;
61 real *xptr, s;
62 real rxx, ryx, rzx, ryy, rzy;
63 int *pd;
65 /* Calculate PME task index (pidx) for each grid index.
66 * Here we always assign equally sized slabs to each node
67 * for load balancing reasons (the PME grid spacing is not used).
70 nslab = atc->nslab;
71 pd = atc->pd;
73 /* Reset the count */
74 for (i = 0; i < nslab; i++)
76 count[i] = 0;
79 if (atc->dimind == 0)
81 rxx = recipbox[XX][XX];
82 ryx = recipbox[YY][XX];
83 rzx = recipbox[ZZ][XX];
84 /* Calculate the node index in x-dimension */
85 for (i = start; i < end; i++)
87 xptr = x[i];
88 /* Fractional coordinates along box vectors */
89 s = nslab*(xptr[XX]*rxx + xptr[YY]*ryx + xptr[ZZ]*rzx);
90 si = static_cast<int>(s + 2*nslab) % nslab;
91 pd[i] = si;
92 count[si]++;
95 else
97 ryy = recipbox[YY][YY];
98 rzy = recipbox[ZZ][YY];
99 /* Calculate the node index in y-dimension */
100 for (i = start; i < end; i++)
102 xptr = x[i];
103 /* Fractional coordinates along box vectors */
104 s = nslab*(xptr[YY]*ryy + xptr[ZZ]*rzy);
105 si = static_cast<int>(s + 2*nslab) % nslab;
106 pd[i] = si;
107 count[si]++;
112 static void pme_calc_pidx_wrapper(int natoms, matrix recipbox, rvec x[],
113 pme_atomcomm_t *atc)
115 int nthread, thread, slab;
117 nthread = atc->nthread;
119 #pragma omp parallel for num_threads(nthread) schedule(static)
120 for (thread = 0; thread < nthread; thread++)
124 pme_calc_pidx(natoms* thread /nthread,
125 natoms*(thread+1)/nthread,
126 recipbox, x, atc, atc->count_thread[thread]);
128 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
130 /* Non-parallel reduction, since nslab is small */
132 for (thread = 1; thread < nthread; thread++)
134 for (slab = 0; slab < atc->nslab; slab++)
136 atc->count_thread[0][slab] += atc->count_thread[thread][slab];
141 static void realloc_splinevec(splinevec th, real **ptr_z, int nalloc)
143 const int padding = 4;
144 int i;
146 srenew(th[XX], nalloc);
147 srenew(th[YY], nalloc);
148 /* In z we add padding, this is only required for the aligned SIMD code */
149 sfree_aligned(*ptr_z);
150 snew_aligned(*ptr_z, nalloc+2*padding, SIMD4_ALIGNMENT);
151 th[ZZ] = *ptr_z + padding;
153 for (i = 0; i < padding; i++)
155 (*ptr_z)[ i] = 0;
156 (*ptr_z)[padding+nalloc+i] = 0;
160 static void pme_realloc_splinedata(splinedata_t *spline, pme_atomcomm_t *atc)
162 int i;
164 srenew(spline->ind, atc->nalloc);
165 /* Initialize the index to identity so it works without threads */
166 for (i = 0; i < atc->nalloc; i++)
168 spline->ind[i] = i;
171 realloc_splinevec(spline->theta, &spline->ptr_theta_z,
172 atc->pme_order*atc->nalloc);
173 realloc_splinevec(spline->dtheta, &spline->ptr_dtheta_z,
174 atc->pme_order*atc->nalloc);
177 void pme_realloc_atomcomm_things(pme_atomcomm_t *atc)
179 int nalloc_old, i;
181 /* We have to avoid a NULL pointer for atc->x to avoid
182 * possible fatal errors in MPI routines.
184 if (atc->n > atc->nalloc || atc->nalloc == 0)
186 nalloc_old = atc->nalloc;
187 atc->nalloc = over_alloc_dd(std::max(atc->n, 1));
189 if (atc->nslab > 1)
191 srenew(atc->x, atc->nalloc);
192 srenew(atc->coefficient, atc->nalloc);
193 srenew(atc->f, atc->nalloc);
194 for (i = nalloc_old; i < atc->nalloc; i++)
196 clear_rvec(atc->f[i]);
199 if (atc->bSpread)
201 srenew(atc->fractx, atc->nalloc);
202 srenew(atc->idx, atc->nalloc);
204 if (atc->nthread > 1)
206 srenew(atc->thread_idx, atc->nalloc);
209 for (i = 0; i < atc->nthread; i++)
211 pme_realloc_splinedata(&atc->spline[i], atc);
217 static void pme_dd_sendrecv(pme_atomcomm_t gmx_unused *atc,
218 gmx_bool gmx_unused bBackward, int gmx_unused shift,
219 void gmx_unused *buf_s, int gmx_unused nbyte_s,
220 void gmx_unused *buf_r, int gmx_unused nbyte_r)
222 #if GMX_MPI
223 int dest, src;
224 MPI_Status stat;
226 if (!bBackward)
228 dest = atc->node_dest[shift];
229 src = atc->node_src[shift];
231 else
233 dest = atc->node_src[shift];
234 src = atc->node_dest[shift];
237 if (nbyte_s > 0 && nbyte_r > 0)
239 MPI_Sendrecv(buf_s, nbyte_s, MPI_BYTE,
240 dest, shift,
241 buf_r, nbyte_r, MPI_BYTE,
242 src, shift,
243 atc->mpi_comm, &stat);
245 else if (nbyte_s > 0)
247 MPI_Send(buf_s, nbyte_s, MPI_BYTE,
248 dest, shift,
249 atc->mpi_comm);
251 else if (nbyte_r > 0)
253 MPI_Recv(buf_r, nbyte_r, MPI_BYTE,
254 src, shift,
255 atc->mpi_comm, &stat);
257 #endif
260 static void dd_pmeredist_pos_coeffs(struct gmx_pme_t *pme,
261 int n, gmx_bool bX, rvec *x, const real *data,
262 pme_atomcomm_t *atc)
264 int *commnode, *buf_index;
265 int nnodes_comm, i, nsend, local_pos, buf_pos, node, scount, rcount;
267 commnode = atc->node_dest;
268 buf_index = atc->buf_index;
270 nnodes_comm = std::min(2*atc->maxshift, atc->nslab-1);
272 nsend = 0;
273 for (i = 0; i < nnodes_comm; i++)
275 buf_index[commnode[i]] = nsend;
276 nsend += atc->count[commnode[i]];
278 if (bX)
280 if (atc->count[atc->nodeid] + nsend != n)
282 gmx_fatal(FARGS, "%d particles communicated to PME rank %d are more than 2/3 times the cut-off out of the domain decomposition cell of their charge group in dimension %c.\n"
283 "This usually means that your system is not well equilibrated.",
284 n - (atc->count[atc->nodeid] + nsend),
285 pme->nodeid, 'x'+atc->dimind);
288 if (nsend > pme->buf_nalloc)
290 pme->buf_nalloc = over_alloc_dd(nsend);
291 srenew(pme->bufv, pme->buf_nalloc);
292 srenew(pme->bufr, pme->buf_nalloc);
295 atc->n = atc->count[atc->nodeid];
296 for (i = 0; i < nnodes_comm; i++)
298 scount = atc->count[commnode[i]];
299 /* Communicate the count */
300 if (debug)
302 fprintf(debug, "dimind %d PME rank %d send to rank %d: %d\n",
303 atc->dimind, atc->nodeid, commnode[i], scount);
305 pme_dd_sendrecv(atc, FALSE, i,
306 &scount, sizeof(int),
307 &atc->rcount[i], sizeof(int));
308 atc->n += atc->rcount[i];
311 pme_realloc_atomcomm_things(atc);
314 local_pos = 0;
315 for (i = 0; i < n; i++)
317 node = atc->pd[i];
318 if (node == atc->nodeid)
320 /* Copy direct to the receive buffer */
321 if (bX)
323 copy_rvec(x[i], atc->x[local_pos]);
325 atc->coefficient[local_pos] = data[i];
326 local_pos++;
328 else
330 /* Copy to the send buffer */
331 if (bX)
333 copy_rvec(x[i], pme->bufv[buf_index[node]]);
335 pme->bufr[buf_index[node]] = data[i];
336 buf_index[node]++;
340 buf_pos = 0;
341 for (i = 0; i < nnodes_comm; i++)
343 scount = atc->count[commnode[i]];
344 rcount = atc->rcount[i];
345 if (scount > 0 || rcount > 0)
347 if (bX)
349 /* Communicate the coordinates */
350 pme_dd_sendrecv(atc, FALSE, i,
351 pme->bufv[buf_pos], scount*sizeof(rvec),
352 atc->x[local_pos], rcount*sizeof(rvec));
354 /* Communicate the coefficients */
355 pme_dd_sendrecv(atc, FALSE, i,
356 pme->bufr+buf_pos, scount*sizeof(real),
357 atc->coefficient+local_pos, rcount*sizeof(real));
358 buf_pos += scount;
359 local_pos += atc->rcount[i];
364 void dd_pmeredist_f(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
365 int n, rvec *f,
366 gmx_bool bAddF)
368 int *commnode, *buf_index;
369 int nnodes_comm, local_pos, buf_pos, i, scount, rcount, node;
371 commnode = atc->node_dest;
372 buf_index = atc->buf_index;
374 nnodes_comm = std::min(2*atc->maxshift, atc->nslab-1);
376 local_pos = atc->count[atc->nodeid];
377 buf_pos = 0;
378 for (i = 0; i < nnodes_comm; i++)
380 scount = atc->rcount[i];
381 rcount = atc->count[commnode[i]];
382 if (scount > 0 || rcount > 0)
384 /* Communicate the forces */
385 pme_dd_sendrecv(atc, TRUE, i,
386 atc->f[local_pos], scount*sizeof(rvec),
387 pme->bufv[buf_pos], rcount*sizeof(rvec));
388 local_pos += scount;
390 buf_index[commnode[i]] = buf_pos;
391 buf_pos += rcount;
394 local_pos = 0;
395 if (bAddF)
397 for (i = 0; i < n; i++)
399 node = atc->pd[i];
400 if (node == atc->nodeid)
402 /* Add from the local force array */
403 rvec_inc(f[i], atc->f[local_pos]);
404 local_pos++;
406 else
408 /* Add from the receive buffer */
409 rvec_inc(f[i], pme->bufv[buf_index[node]]);
410 buf_index[node]++;
414 else
416 for (i = 0; i < n; i++)
418 node = atc->pd[i];
419 if (node == atc->nodeid)
421 /* Copy from the local force array */
422 copy_rvec(atc->f[local_pos], f[i]);
423 local_pos++;
425 else
427 /* Copy from the receive buffer */
428 copy_rvec(pme->bufv[buf_index[node]], f[i]);
429 buf_index[node]++;
435 void
436 do_redist_pos_coeffs(struct gmx_pme_t *pme, const t_commrec *cr, int start, int homenr,
437 gmx_bool bFirst, rvec x[], real *data)
439 int d;
440 pme_atomcomm_t *atc;
441 atc = &pme->atc[0];
443 for (d = pme->ndecompdim - 1; d >= 0; d--)
445 int n_d;
446 rvec *x_d;
447 real *param_d;
449 if (d == pme->ndecompdim - 1)
451 n_d = homenr;
452 x_d = x + start;
453 param_d = data;
455 else
457 n_d = pme->atc[d + 1].n;
458 x_d = atc->x;
459 param_d = atc->coefficient;
461 atc = &pme->atc[d];
462 atc->npd = n_d;
463 if (atc->npd > atc->pd_nalloc)
465 atc->pd_nalloc = over_alloc_dd(atc->npd);
466 srenew(atc->pd, atc->pd_nalloc);
468 pme_calc_pidx_wrapper(n_d, pme->recipbox, x_d, atc);
469 /* Redistribute x (only once) and qA/c6A or qB/c6B */
470 if (DOMAINDECOMP(cr))
472 dd_pmeredist_pos_coeffs(pme, n_d, bFirst, x_d, param_d, atc);