2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2018, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 #include "pme-redistribute.h"
45 #include "gromacs/math/vec.h"
46 #include "gromacs/mdtypes/commrec.h"
47 #include "gromacs/utility/exceptions.h"
48 #include "gromacs/utility/fatalerror.h"
49 #include "gromacs/utility/gmxmpi.h"
50 #include "gromacs/utility/smalloc.h"
52 #include "pme-internal.h"
55 static void pme_calc_pidx(int start
, int end
,
56 matrix recipbox
, rvec x
[],
57 pme_atomcomm_t
*atc
, int *count
)
62 real rxx
, ryx
, rzx
, ryy
, rzy
;
65 /* Calculate PME task index (pidx) for each grid index.
66 * Here we always assign equally sized slabs to each node
67 * for load balancing reasons (the PME grid spacing is not used).
74 for (i
= 0; i
< nslab
; i
++)
81 rxx
= recipbox
[XX
][XX
];
82 ryx
= recipbox
[YY
][XX
];
83 rzx
= recipbox
[ZZ
][XX
];
84 /* Calculate the node index in x-dimension */
85 for (i
= start
; i
< end
; i
++)
88 /* Fractional coordinates along box vectors */
89 s
= nslab
*(xptr
[XX
]*rxx
+ xptr
[YY
]*ryx
+ xptr
[ZZ
]*rzx
);
90 si
= static_cast<int>(s
+ 2*nslab
) % nslab
;
97 ryy
= recipbox
[YY
][YY
];
98 rzy
= recipbox
[ZZ
][YY
];
99 /* Calculate the node index in y-dimension */
100 for (i
= start
; i
< end
; i
++)
103 /* Fractional coordinates along box vectors */
104 s
= nslab
*(xptr
[YY
]*ryy
+ xptr
[ZZ
]*rzy
);
105 si
= static_cast<int>(s
+ 2*nslab
) % nslab
;
112 static void pme_calc_pidx_wrapper(int natoms
, matrix recipbox
, rvec x
[],
115 int nthread
, thread
, slab
;
117 nthread
= atc
->nthread
;
119 #pragma omp parallel for num_threads(nthread) schedule(static)
120 for (thread
= 0; thread
< nthread
; thread
++)
124 pme_calc_pidx(natoms
* thread
/nthread
,
125 natoms
*(thread
+1)/nthread
,
126 recipbox
, x
, atc
, atc
->count_thread
[thread
]);
128 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
;
130 /* Non-parallel reduction, since nslab is small */
132 for (thread
= 1; thread
< nthread
; thread
++)
134 for (slab
= 0; slab
< atc
->nslab
; slab
++)
136 atc
->count_thread
[0][slab
] += atc
->count_thread
[thread
][slab
];
141 static void realloc_splinevec(splinevec th
, real
**ptr_z
, int nalloc
)
143 const int padding
= 4;
146 srenew(th
[XX
], nalloc
);
147 srenew(th
[YY
], nalloc
);
148 /* In z we add padding, this is only required for the aligned SIMD code */
149 sfree_aligned(*ptr_z
);
150 snew_aligned(*ptr_z
, nalloc
+2*padding
, SIMD4_ALIGNMENT
);
151 th
[ZZ
] = *ptr_z
+ padding
;
153 for (i
= 0; i
< padding
; i
++)
156 (*ptr_z
)[padding
+nalloc
+i
] = 0;
160 static void pme_realloc_splinedata(splinedata_t
*spline
, pme_atomcomm_t
*atc
)
164 srenew(spline
->ind
, atc
->nalloc
);
165 /* Initialize the index to identity so it works without threads */
166 for (i
= 0; i
< atc
->nalloc
; i
++)
171 realloc_splinevec(spline
->theta
, &spline
->ptr_theta_z
,
172 atc
->pme_order
*atc
->nalloc
);
173 realloc_splinevec(spline
->dtheta
, &spline
->ptr_dtheta_z
,
174 atc
->pme_order
*atc
->nalloc
);
177 void pme_realloc_atomcomm_things(pme_atomcomm_t
*atc
)
181 /* We have to avoid a NULL pointer for atc->x to avoid
182 * possible fatal errors in MPI routines.
184 if (atc
->n
> atc
->nalloc
|| atc
->nalloc
== 0)
186 nalloc_old
= atc
->nalloc
;
187 atc
->nalloc
= over_alloc_dd(std::max(atc
->n
, 1));
191 srenew(atc
->x
, atc
->nalloc
);
192 srenew(atc
->coefficient
, atc
->nalloc
);
193 srenew(atc
->f
, atc
->nalloc
);
194 for (i
= nalloc_old
; i
< atc
->nalloc
; i
++)
196 clear_rvec(atc
->f
[i
]);
201 srenew(atc
->fractx
, atc
->nalloc
);
202 srenew(atc
->idx
, atc
->nalloc
);
204 if (atc
->nthread
> 1)
206 srenew(atc
->thread_idx
, atc
->nalloc
);
209 for (i
= 0; i
< atc
->nthread
; i
++)
211 pme_realloc_splinedata(&atc
->spline
[i
], atc
);
217 static void pme_dd_sendrecv(pme_atomcomm_t gmx_unused
*atc
,
218 gmx_bool gmx_unused bBackward
, int gmx_unused shift
,
219 void gmx_unused
*buf_s
, int gmx_unused nbyte_s
,
220 void gmx_unused
*buf_r
, int gmx_unused nbyte_r
)
228 dest
= atc
->node_dest
[shift
];
229 src
= atc
->node_src
[shift
];
233 dest
= atc
->node_src
[shift
];
234 src
= atc
->node_dest
[shift
];
237 if (nbyte_s
> 0 && nbyte_r
> 0)
239 MPI_Sendrecv(buf_s
, nbyte_s
, MPI_BYTE
,
241 buf_r
, nbyte_r
, MPI_BYTE
,
243 atc
->mpi_comm
, &stat
);
245 else if (nbyte_s
> 0)
247 MPI_Send(buf_s
, nbyte_s
, MPI_BYTE
,
251 else if (nbyte_r
> 0)
253 MPI_Recv(buf_r
, nbyte_r
, MPI_BYTE
,
255 atc
->mpi_comm
, &stat
);
260 static void dd_pmeredist_pos_coeffs(struct gmx_pme_t
*pme
,
261 int n
, gmx_bool bX
, rvec
*x
, const real
*data
,
264 int *commnode
, *buf_index
;
265 int nnodes_comm
, i
, nsend
, local_pos
, buf_pos
, node
, scount
, rcount
;
267 commnode
= atc
->node_dest
;
268 buf_index
= atc
->buf_index
;
270 nnodes_comm
= std::min(2*atc
->maxshift
, atc
->nslab
-1);
273 for (i
= 0; i
< nnodes_comm
; i
++)
275 buf_index
[commnode
[i
]] = nsend
;
276 nsend
+= atc
->count
[commnode
[i
]];
280 if (atc
->count
[atc
->nodeid
] + nsend
!= n
)
282 gmx_fatal(FARGS
, "%d particles communicated to PME rank %d are more than 2/3 times the cut-off out of the domain decomposition cell of their charge group in dimension %c.\n"
283 "This usually means that your system is not well equilibrated.",
284 n
- (atc
->count
[atc
->nodeid
] + nsend
),
285 pme
->nodeid
, 'x'+atc
->dimind
);
288 if (nsend
> pme
->buf_nalloc
)
290 pme
->buf_nalloc
= over_alloc_dd(nsend
);
291 srenew(pme
->bufv
, pme
->buf_nalloc
);
292 srenew(pme
->bufr
, pme
->buf_nalloc
);
295 atc
->n
= atc
->count
[atc
->nodeid
];
296 for (i
= 0; i
< nnodes_comm
; i
++)
298 scount
= atc
->count
[commnode
[i
]];
299 /* Communicate the count */
302 fprintf(debug
, "dimind %d PME rank %d send to rank %d: %d\n",
303 atc
->dimind
, atc
->nodeid
, commnode
[i
], scount
);
305 pme_dd_sendrecv(atc
, FALSE
, i
,
306 &scount
, sizeof(int),
307 &atc
->rcount
[i
], sizeof(int));
308 atc
->n
+= atc
->rcount
[i
];
311 pme_realloc_atomcomm_things(atc
);
315 for (i
= 0; i
< n
; i
++)
318 if (node
== atc
->nodeid
)
320 /* Copy direct to the receive buffer */
323 copy_rvec(x
[i
], atc
->x
[local_pos
]);
325 atc
->coefficient
[local_pos
] = data
[i
];
330 /* Copy to the send buffer */
333 copy_rvec(x
[i
], pme
->bufv
[buf_index
[node
]]);
335 pme
->bufr
[buf_index
[node
]] = data
[i
];
341 for (i
= 0; i
< nnodes_comm
; i
++)
343 scount
= atc
->count
[commnode
[i
]];
344 rcount
= atc
->rcount
[i
];
345 if (scount
> 0 || rcount
> 0)
349 /* Communicate the coordinates */
350 pme_dd_sendrecv(atc
, FALSE
, i
,
351 pme
->bufv
[buf_pos
], scount
*sizeof(rvec
),
352 atc
->x
[local_pos
], rcount
*sizeof(rvec
));
354 /* Communicate the coefficients */
355 pme_dd_sendrecv(atc
, FALSE
, i
,
356 pme
->bufr
+buf_pos
, scount
*sizeof(real
),
357 atc
->coefficient
+local_pos
, rcount
*sizeof(real
));
359 local_pos
+= atc
->rcount
[i
];
364 void dd_pmeredist_f(struct gmx_pme_t
*pme
, pme_atomcomm_t
*atc
,
368 int *commnode
, *buf_index
;
369 int nnodes_comm
, local_pos
, buf_pos
, i
, scount
, rcount
, node
;
371 commnode
= atc
->node_dest
;
372 buf_index
= atc
->buf_index
;
374 nnodes_comm
= std::min(2*atc
->maxshift
, atc
->nslab
-1);
376 local_pos
= atc
->count
[atc
->nodeid
];
378 for (i
= 0; i
< nnodes_comm
; i
++)
380 scount
= atc
->rcount
[i
];
381 rcount
= atc
->count
[commnode
[i
]];
382 if (scount
> 0 || rcount
> 0)
384 /* Communicate the forces */
385 pme_dd_sendrecv(atc
, TRUE
, i
,
386 atc
->f
[local_pos
], scount
*sizeof(rvec
),
387 pme
->bufv
[buf_pos
], rcount
*sizeof(rvec
));
390 buf_index
[commnode
[i
]] = buf_pos
;
397 for (i
= 0; i
< n
; i
++)
400 if (node
== atc
->nodeid
)
402 /* Add from the local force array */
403 rvec_inc(f
[i
], atc
->f
[local_pos
]);
408 /* Add from the receive buffer */
409 rvec_inc(f
[i
], pme
->bufv
[buf_index
[node
]]);
416 for (i
= 0; i
< n
; i
++)
419 if (node
== atc
->nodeid
)
421 /* Copy from the local force array */
422 copy_rvec(atc
->f
[local_pos
], f
[i
]);
427 /* Copy from the receive buffer */
428 copy_rvec(pme
->bufv
[buf_index
[node
]], f
[i
]);
436 do_redist_pos_coeffs(struct gmx_pme_t
*pme
, const t_commrec
*cr
, int start
, int homenr
,
437 gmx_bool bFirst
, rvec x
[], real
*data
)
443 for (d
= pme
->ndecompdim
- 1; d
>= 0; d
--)
449 if (d
== pme
->ndecompdim
- 1)
457 n_d
= pme
->atc
[d
+ 1].n
;
459 param_d
= atc
->coefficient
;
463 if (atc
->npd
> atc
->pd_nalloc
)
465 atc
->pd_nalloc
= over_alloc_dd(atc
->npd
);
466 srenew(atc
->pd
, atc
->pd_nalloc
);
468 pme_calc_pidx_wrapper(n_d
, pme
->recipbox
, x_d
, atc
);
469 /* Redistribute x (only once) and qA/c6A or qB/c6B */
470 if (DOMAINDECOMP(cr
))
472 dd_pmeredist_pos_coeffs(pme
, n_d
, bFirst
, x_d
, param_d
, atc
);