added os-specific defines from cmake required by memtestG80
[gromacs/qmmm-gamess-us.git] / src / mdlib / domdec.c
blob68e09532b57c8c7a7415161b95d3f72cc1aac81b
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
15 * And Hey:
16 * Gnomes, ROck Monsters And Chili Sauce
19 #ifdef HAVE_CONFIG_H
20 #include <config.h>
21 #endif
23 #include <stdio.h>
24 #include <time.h>
25 #include <math.h>
26 #include <string.h>
27 #include <stdlib.h>
28 #include "typedefs.h"
29 #include "smalloc.h"
30 #include "vec.h"
31 #include "domdec.h"
32 #include "domdec_network.h"
33 #include "nrnb.h"
34 #include "pbc.h"
35 #include "chargegroup.h"
36 #include "constr.h"
37 #include "mdatoms.h"
38 #include "names.h"
39 #include "pdbio.h"
40 #include "futil.h"
41 #include "force.h"
42 #include "pme.h"
43 #include "pull.h"
44 #include "gmx_wallcycle.h"
45 #include "mdrun.h"
46 #include "nsgrid.h"
47 #include "shellfc.h"
48 #include "mtop_util.h"
49 #include "gmxfio.h"
50 #include "gmx_ga2la.h"
52 #ifdef GMX_LIB_MPI
53 #include <mpi.h>
54 #endif
55 #ifdef GMX_THREADS
56 #include "tmpi.h"
57 #endif
59 #define DDRANK(dd,rank) (rank)
60 #define DDMASTERRANK(dd) (dd->masterrank)
62 typedef struct gmx_domdec_master
64 /* The cell boundaries */
65 real **cell_x;
66 /* The global charge group division */
67 int *ncg; /* Number of home charge groups for each node */
68 int *index; /* Index of nnodes+1 into cg */
69 int *cg; /* Global charge group index */
70 int *nat; /* Number of home atoms for each node. */
71 int *ibuf; /* Buffer for communication */
72 rvec *vbuf; /* Buffer for state scattering and gathering */
73 } gmx_domdec_master_t;
75 typedef struct
77 /* The numbers of charge groups to send and receive for each cell
78 * that requires communication, the last entry contains the total
79 * number of atoms that needs to be communicated.
81 int nsend[DD_MAXIZONE+2];
82 int nrecv[DD_MAXIZONE+2];
83 /* The charge groups to send */
84 int *index;
85 int nalloc;
86 /* The atom range for non-in-place communication */
87 int cell2at0[DD_MAXIZONE];
88 int cell2at1[DD_MAXIZONE];
89 } gmx_domdec_ind_t;
91 typedef struct
93 int np; /* Number of grid pulses in this dimension */
94 int np_dlb; /* For dlb, for use with edlbAUTO */
95 gmx_domdec_ind_t *ind; /* The indices to communicate, size np */
96 int np_nalloc;
97 bool bInPlace; /* Can we communicate in place? */
98 } gmx_domdec_comm_dim_t;
100 typedef struct
102 bool *bCellMin; /* Temp. var.: is this cell size at the limit */
103 real *cell_f; /* State var.: cell boundaries, box relative */
104 real *old_cell_f; /* Temp. var.: old cell size */
105 real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
106 real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
107 real *bound_min; /* Temp. var.: lower limit for cell boundary */
108 real *bound_max; /* Temp. var.: upper limit for cell boundary */
109 bool bLimited; /* State var.: is DLB limited in this dim and row */
110 real *buf_ncd; /* Temp. var. */
111 } gmx_domdec_root_t;
113 #define DD_NLOAD_MAX 9
115 /* Here floats are accurate enough, since these variables
116 * only influence the load balancing, not the actual MD results.
118 typedef struct
120 int nload;
121 float *load;
122 float sum;
123 float max;
124 float sum_m;
125 float cvol_min;
126 float mdf;
127 float pme;
128 int flags;
129 } gmx_domdec_load_t;
131 typedef struct
133 int nsc;
134 int ind_gl;
135 int ind;
136 } gmx_cgsort_t;
138 typedef struct
140 gmx_cgsort_t *sort1,*sort2;
141 int sort_nalloc;
142 gmx_cgsort_t *sort_new;
143 int sort_new_nalloc;
144 int *ibuf;
145 int ibuf_nalloc;
146 } gmx_domdec_sort_t;
148 typedef struct
150 rvec *v;
151 int nalloc;
152 } vec_rvec_t;
154 /* This enum determines the order of the coordinates.
155 * ddnatHOME and ddnatZONE should be first and second,
156 * the others can be ordered as wanted.
158 enum { ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR };
160 enum { edlbAUTO, edlbNO, edlbYES, edlbNR };
161 const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
163 typedef struct
165 int dimind; /* The dimension index */
166 int nslab; /* The number of PME slabs in this dimension */
167 real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB */
168 int *pp_min; /* The minimum pp node location, size nslab */
169 int *pp_max; /* The maximum pp node location,size nslab */
170 int maxshift; /* The maximum shift for coordinate redistribution in PME */
171 } gmx_ddpme_t;
173 typedef struct
175 real min0; /* The minimum bottom of this zone */
176 real max1; /* The maximum top of this zone */
177 real mch0; /* The maximum bottom communicaton height for this zone */
178 real mch1; /* The maximum top communicaton height for this zone */
179 real p1_0; /* The bottom value of the first cell in this zone */
180 real p1_1; /* The top value of the first cell in this zone */
181 } gmx_ddzone_t;
183 typedef struct gmx_domdec_comm
185 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
186 * unless stated otherwise.
189 /* The number of decomposition dimensions for PME, 0: no PME */
190 int npmedecompdim;
191 /* The number of nodes doing PME (PP/PME or only PME) */
192 int npmenodes;
193 int npmenodes_major;
194 int npmenodes_minor;
195 /* The communication setup including the PME only nodes */
196 bool bCartesianPP_PME;
197 ivec ntot;
198 int cartpmedim;
199 int *pmenodes; /* size npmenodes */
200 int *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
201 * but with bCartesianPP_PME */
202 gmx_ddpme_t ddpme[2];
204 /* The DD particle-particle nodes only */
205 bool bCartesianPP;
206 int *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
208 /* The global charge groups */
209 t_block cgs_gl;
211 /* Should we sort the cgs */
212 int nstSortCG;
213 gmx_domdec_sort_t *sort;
215 /* Are there bonded and multi-body interactions between charge groups? */
216 bool bInterCGBondeds;
217 bool bInterCGMultiBody;
219 /* Data for the optional bonded interaction atom communication range */
220 bool bBondComm;
221 t_blocka *cglink;
222 char *bLocalCG;
224 /* The DLB option */
225 int eDLB;
226 /* Are we actually using DLB? */
227 bool bDynLoadBal;
229 /* Cell sizes for static load balancing, first index cartesian */
230 real **slb_frac;
232 /* The width of the communicated boundaries */
233 real cutoff_mbody;
234 real cutoff;
235 /* The minimum cell size (including triclinic correction) */
236 rvec cellsize_min;
237 /* For dlb, for use with edlbAUTO */
238 rvec cellsize_min_dlb;
239 /* The lower limit for the DD cell size with DLB */
240 real cellsize_limit;
241 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
242 bool bVacDLBNoLimit;
244 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
245 ivec tric_dir;
246 /* box0 and box_size are required with dim's without pbc and -gcom */
247 rvec box0;
248 rvec box_size;
250 /* The cell boundaries */
251 rvec cell_x0;
252 rvec cell_x1;
254 /* The old location of the cell boundaries, to check cg displacements */
255 rvec old_cell_x0;
256 rvec old_cell_x1;
258 /* The communication setup and charge group boundaries for the zones */
259 gmx_domdec_zones_t zones;
261 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
262 * cell boundaries of neighboring cells for dynamic load balancing.
264 gmx_ddzone_t zone_d1[2];
265 gmx_ddzone_t zone_d2[2][2];
267 /* The coordinate/force communication setup and indices */
268 gmx_domdec_comm_dim_t cd[DIM];
269 /* The maximum number of cells to communicate with in one dimension */
270 int maxpulse;
272 /* Which cg distribution is stored on the master node */
273 int master_cg_ddp_count;
275 /* The number of cg's received from the direct neighbors */
276 int zone_ncg1[DD_MAXZONE];
278 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
279 int nat[ddnatNR];
281 /* Communication buffer for general use */
282 int *buf_int;
283 int nalloc_int;
285 /* Communication buffer for general use */
286 vec_rvec_t vbuf;
288 /* Communication buffers only used with multiple grid pulses */
289 int *buf_int2;
290 int nalloc_int2;
291 vec_rvec_t vbuf2;
293 /* Communication buffers for local redistribution */
294 int **cggl_flag;
295 int cggl_flag_nalloc[DIM*2];
296 rvec **cgcm_state;
297 int cgcm_state_nalloc[DIM*2];
299 /* Cell sizes for dynamic load balancing */
300 gmx_domdec_root_t **root;
301 real *cell_f_row;
302 real cell_f0[DIM];
303 real cell_f1[DIM];
304 real cell_f_max0[DIM];
305 real cell_f_min1[DIM];
307 /* Stuff for load communication */
308 bool bRecordLoad;
309 gmx_domdec_load_t *load;
310 #ifdef GMX_MPI
311 MPI_Comm *mpi_comm_load;
312 #endif
313 /* Cycle counters */
314 float cycl[ddCyclNr];
315 int cycl_n[ddCyclNr];
316 float cycl_max[ddCyclNr];
317 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
318 int eFlop;
319 double flop;
320 int flop_n;
321 /* Have often have did we have load measurements */
322 int n_load_have;
323 /* Have often have we collected the load measurements */
324 int n_load_collect;
326 /* Statistics */
327 double sum_nat[ddnatNR-ddnatZONE];
328 int ndecomp;
329 int nload;
330 double load_step;
331 double load_sum;
332 double load_max;
333 ivec load_lim;
334 double load_mdf;
335 double load_pme;
337 /* The last partition step */
338 gmx_large_int_t partition_step;
340 /* Debugging */
341 int nstDDDump;
342 int nstDDDumpGrid;
343 int DD_debug;
344 } gmx_domdec_comm_t;
346 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
347 #define DD_CGIBS 2
349 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
350 #define DD_FLAG_NRCG 65535
351 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
352 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
354 /* Zone permutation required to obtain consecutive charge groups
355 * for neighbor searching.
357 static const int zone_perm[3][4] = { {0,0,0,0},{1,0,0,0},{3,0,1,2} };
359 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
360 * components see only j zones with that component 0.
363 /* The DD zone order */
364 static const ivec dd_zo[DD_MAXZONE] =
365 {{0,0,0},{1,0,0},{1,1,0},{0,1,0},{0,1,1},{0,0,1},{1,0,1},{1,1,1}};
367 /* The 3D setup */
368 #define dd_z3n 8
369 #define dd_zp3n 4
370 static const ivec dd_zp3[dd_zp3n] = {{0,0,8},{1,3,6},{2,5,6},{3,5,7}};
372 /* The 2D setup */
373 #define dd_z2n 4
374 #define dd_zp2n 2
375 static const ivec dd_zp2[dd_zp2n] = {{0,0,4},{1,3,4}};
377 /* The 1D setup */
378 #define dd_z1n 2
379 #define dd_zp1n 1
380 static const ivec dd_zp1[dd_zp1n] = {{0,0,2}};
382 /* Factors used to avoid problems due to rounding issues */
383 #define DD_CELL_MARGIN 1.0001
384 #define DD_CELL_MARGIN2 1.00005
385 /* Factor to account for pressure scaling during nstlist steps */
386 #define DD_PRES_SCALE_MARGIN 1.02
388 /* Allowed performance loss before we DLB or warn */
389 #define DD_PERF_LOSS 0.05
391 #define DD_CELL_F_SIZE(dd,di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
393 /* Use separate MPI send and receive commands
394 * when nnodes <= GMX_DD_NNODES_SENDRECV.
395 * This saves memory (and some copying for small nnodes).
396 * For high parallelization scatter and gather calls are used.
398 #define GMX_DD_NNODES_SENDRECV 4
402 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
404 static void index2xyz(ivec nc,int ind,ivec xyz)
406 xyz[XX] = ind % nc[XX];
407 xyz[YY] = (ind / nc[XX]) % nc[YY];
408 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
412 /* This order is required to minimize the coordinate communication in PME
413 * which uses decomposition in the x direction.
415 #define dd_index(n,i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
417 static void ddindex2xyz(ivec nc,int ind,ivec xyz)
419 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
420 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
421 xyz[ZZ] = ind % nc[ZZ];
424 static int ddcoord2ddnodeid(gmx_domdec_t *dd,ivec c)
426 int ddindex;
427 int ddnodeid=-1;
429 ddindex = dd_index(dd->nc,c);
430 if (dd->comm->bCartesianPP_PME)
432 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
434 else if (dd->comm->bCartesianPP)
436 #ifdef GMX_MPI
437 MPI_Cart_rank(dd->mpi_comm_all,c,&ddnodeid);
438 #endif
440 else
442 ddnodeid = ddindex;
445 return ddnodeid;
448 static bool dynamic_dd_box(gmx_ddbox_t *ddbox,t_inputrec *ir)
450 return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
453 int ddglatnr(gmx_domdec_t *dd,int i)
455 int atnr;
457 if (dd == NULL)
459 atnr = i + 1;
461 else
463 if (i >= dd->comm->nat[ddnatNR-1])
465 gmx_fatal(FARGS,"glatnr called with %d, which is larger than the local number of atoms (%d)",i,dd->comm->nat[ddnatNR-1]);
467 atnr = dd->gatindex[i] + 1;
470 return atnr;
473 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
475 return &dd->comm->cgs_gl;
478 static void vec_rvec_init(vec_rvec_t *v)
480 v->nalloc = 0;
481 v->v = NULL;
484 static void vec_rvec_check_alloc(vec_rvec_t *v,int n)
486 if (n > v->nalloc)
488 v->nalloc = over_alloc_dd(n);
489 srenew(v->v,v->nalloc);
493 void dd_store_state(gmx_domdec_t *dd,t_state *state)
495 int i;
497 if (state->ddp_count != dd->ddp_count)
499 gmx_incons("The state does not the domain decomposition state");
502 state->ncg_gl = dd->ncg_home;
503 if (state->ncg_gl > state->cg_gl_nalloc)
505 state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
506 srenew(state->cg_gl,state->cg_gl_nalloc);
508 for(i=0; i<state->ncg_gl; i++)
510 state->cg_gl[i] = dd->index_gl[i];
513 state->ddp_count_cg_gl = dd->ddp_count;
516 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
518 return &dd->comm->zones;
521 void dd_get_ns_ranges(gmx_domdec_t *dd,int icg,
522 int *jcg0,int *jcg1,ivec shift0,ivec shift1)
524 gmx_domdec_zones_t *zones;
525 int izone,d,dim;
527 zones = &dd->comm->zones;
529 izone = 0;
530 while (icg >= zones->izone[izone].cg1)
532 izone++;
535 if (izone == 0)
537 *jcg0 = icg;
539 else if (izone < zones->nizone)
541 *jcg0 = zones->izone[izone].jcg0;
543 else
545 gmx_fatal(FARGS,"DD icg %d out of range: izone (%d) >= nizone (%d)",
546 icg,izone,zones->nizone);
549 *jcg1 = zones->izone[izone].jcg1;
551 for(d=0; d<dd->ndim; d++)
553 dim = dd->dim[d];
554 shift0[dim] = zones->izone[izone].shift0[dim];
555 shift1[dim] = zones->izone[izone].shift1[dim];
556 if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
558 /* A conservative approach, this can be optimized */
559 shift0[dim] -= 1;
560 shift1[dim] += 1;
565 int dd_natoms_vsite(gmx_domdec_t *dd)
567 return dd->comm->nat[ddnatVSITE];
570 void dd_get_constraint_range(gmx_domdec_t *dd,int *at_start,int *at_end)
572 *at_start = dd->comm->nat[ddnatCON-1];
573 *at_end = dd->comm->nat[ddnatCON];
576 void dd_move_x(gmx_domdec_t *dd,matrix box,rvec x[])
578 int nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
579 int *index,*cgindex;
580 gmx_domdec_comm_t *comm;
581 gmx_domdec_comm_dim_t *cd;
582 gmx_domdec_ind_t *ind;
583 rvec shift={0,0,0},*buf,*rbuf;
584 bool bPBC,bScrew;
586 comm = dd->comm;
588 cgindex = dd->cgindex;
590 buf = comm->vbuf.v;
592 nzone = 1;
593 nat_tot = dd->nat_home;
594 for(d=0; d<dd->ndim; d++)
596 bPBC = (dd->ci[dd->dim[d]] == 0);
597 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
598 if (bPBC)
600 copy_rvec(box[dd->dim[d]],shift);
602 cd = &comm->cd[d];
603 for(p=0; p<cd->np; p++)
605 ind = &cd->ind[p];
606 index = ind->index;
607 n = 0;
608 if (!bPBC)
610 for(i=0; i<ind->nsend[nzone]; i++)
612 at0 = cgindex[index[i]];
613 at1 = cgindex[index[i]+1];
614 for(j=at0; j<at1; j++)
616 copy_rvec(x[j],buf[n]);
617 n++;
621 else if (!bScrew)
623 for(i=0; i<ind->nsend[nzone]; i++)
625 at0 = cgindex[index[i]];
626 at1 = cgindex[index[i]+1];
627 for(j=at0; j<at1; j++)
629 /* We need to shift the coordinates */
630 rvec_add(x[j],shift,buf[n]);
631 n++;
635 else
637 for(i=0; i<ind->nsend[nzone]; i++)
639 at0 = cgindex[index[i]];
640 at1 = cgindex[index[i]+1];
641 for(j=at0; j<at1; j++)
643 /* Shift x */
644 buf[n][XX] = x[j][XX] + shift[XX];
645 /* Rotate y and z.
646 * This operation requires a special shift force
647 * treatment, which is performed in calc_vir.
649 buf[n][YY] = box[YY][YY] - x[j][YY];
650 buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
651 n++;
656 if (cd->bInPlace)
658 rbuf = x + nat_tot;
660 else
662 rbuf = comm->vbuf2.v;
664 /* Send and receive the coordinates */
665 dd_sendrecv_rvec(dd, d, dddirBackward,
666 buf, ind->nsend[nzone+1],
667 rbuf, ind->nrecv[nzone+1]);
668 if (!cd->bInPlace)
670 j = 0;
671 for(zone=0; zone<nzone; zone++)
673 for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
675 copy_rvec(rbuf[j],x[i]);
676 j++;
680 nat_tot += ind->nrecv[nzone+1];
682 nzone += nzone;
686 void dd_move_f(gmx_domdec_t *dd,rvec f[],rvec *fshift)
688 int nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
689 int *index,*cgindex;
690 gmx_domdec_comm_t *comm;
691 gmx_domdec_comm_dim_t *cd;
692 gmx_domdec_ind_t *ind;
693 rvec *buf,*sbuf;
694 ivec vis;
695 int is;
696 bool bPBC,bScrew;
698 comm = dd->comm;
700 cgindex = dd->cgindex;
702 buf = comm->vbuf.v;
704 n = 0;
705 nzone = comm->zones.n/2;
706 nat_tot = dd->nat_tot;
707 for(d=dd->ndim-1; d>=0; d--)
709 bPBC = (dd->ci[dd->dim[d]] == 0);
710 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
711 if (fshift == NULL && !bScrew)
713 bPBC = FALSE;
715 /* Determine which shift vector we need */
716 clear_ivec(vis);
717 vis[dd->dim[d]] = 1;
718 is = IVEC2IS(vis);
720 cd = &comm->cd[d];
721 for(p=cd->np-1; p>=0; p--) {
722 ind = &cd->ind[p];
723 nat_tot -= ind->nrecv[nzone+1];
724 if (cd->bInPlace)
726 sbuf = f + nat_tot;
728 else
730 sbuf = comm->vbuf2.v;
731 j = 0;
732 for(zone=0; zone<nzone; zone++)
734 for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
736 copy_rvec(f[i],sbuf[j]);
737 j++;
741 /* Communicate the forces */
742 dd_sendrecv_rvec(dd, d, dddirForward,
743 sbuf, ind->nrecv[nzone+1],
744 buf, ind->nsend[nzone+1]);
745 index = ind->index;
746 /* Add the received forces */
747 n = 0;
748 if (!bPBC)
750 for(i=0; i<ind->nsend[nzone]; i++)
752 at0 = cgindex[index[i]];
753 at1 = cgindex[index[i]+1];
754 for(j=at0; j<at1; j++)
756 rvec_inc(f[j],buf[n]);
757 n++;
761 else if (!bScrew)
763 for(i=0; i<ind->nsend[nzone]; i++)
765 at0 = cgindex[index[i]];
766 at1 = cgindex[index[i]+1];
767 for(j=at0; j<at1; j++)
769 rvec_inc(f[j],buf[n]);
770 /* Add this force to the shift force */
771 rvec_inc(fshift[is],buf[n]);
772 n++;
776 else
778 for(i=0; i<ind->nsend[nzone]; i++)
780 at0 = cgindex[index[i]];
781 at1 = cgindex[index[i]+1];
782 for(j=at0; j<at1; j++)
784 /* Rotate the force */
785 f[j][XX] += buf[n][XX];
786 f[j][YY] -= buf[n][YY];
787 f[j][ZZ] -= buf[n][ZZ];
788 if (fshift)
790 /* Add this force to the shift force */
791 rvec_inc(fshift[is],buf[n]);
793 n++;
798 nzone /= 2;
802 void dd_atom_spread_real(gmx_domdec_t *dd,real v[])
804 int nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
805 int *index,*cgindex;
806 gmx_domdec_comm_t *comm;
807 gmx_domdec_comm_dim_t *cd;
808 gmx_domdec_ind_t *ind;
809 real *buf,*rbuf;
811 comm = dd->comm;
813 cgindex = dd->cgindex;
815 buf = &comm->vbuf.v[0][0];
817 nzone = 1;
818 nat_tot = dd->nat_home;
819 for(d=0; d<dd->ndim; d++)
821 cd = &comm->cd[d];
822 for(p=0; p<cd->np; p++)
824 ind = &cd->ind[p];
825 index = ind->index;
826 n = 0;
827 for(i=0; i<ind->nsend[nzone]; i++)
829 at0 = cgindex[index[i]];
830 at1 = cgindex[index[i]+1];
831 for(j=at0; j<at1; j++)
833 buf[n] = v[j];
834 n++;
838 if (cd->bInPlace)
840 rbuf = v + nat_tot;
842 else
844 rbuf = &comm->vbuf2.v[0][0];
846 /* Send and receive the coordinates */
847 dd_sendrecv_real(dd, d, dddirBackward,
848 buf, ind->nsend[nzone+1],
849 rbuf, ind->nrecv[nzone+1]);
850 if (!cd->bInPlace)
852 j = 0;
853 for(zone=0; zone<nzone; zone++)
855 for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
857 v[i] = rbuf[j];
858 j++;
862 nat_tot += ind->nrecv[nzone+1];
864 nzone += nzone;
868 void dd_atom_sum_real(gmx_domdec_t *dd,real v[])
870 int nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
871 int *index,*cgindex;
872 gmx_domdec_comm_t *comm;
873 gmx_domdec_comm_dim_t *cd;
874 gmx_domdec_ind_t *ind;
875 real *buf,*sbuf;
877 comm = dd->comm;
879 cgindex = dd->cgindex;
881 buf = &comm->vbuf.v[0][0];
883 n = 0;
884 nzone = comm->zones.n/2;
885 nat_tot = dd->nat_tot;
886 for(d=dd->ndim-1; d>=0; d--)
888 cd = &comm->cd[d];
889 for(p=cd->np-1; p>=0; p--) {
890 ind = &cd->ind[p];
891 nat_tot -= ind->nrecv[nzone+1];
892 if (cd->bInPlace)
894 sbuf = v + nat_tot;
896 else
898 sbuf = &comm->vbuf2.v[0][0];
899 j = 0;
900 for(zone=0; zone<nzone; zone++)
902 for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
904 sbuf[j] = v[i];
905 j++;
909 /* Communicate the forces */
910 dd_sendrecv_real(dd, d, dddirForward,
911 sbuf, ind->nrecv[nzone+1],
912 buf, ind->nsend[nzone+1]);
913 index = ind->index;
914 /* Add the received forces */
915 n = 0;
916 for(i=0; i<ind->nsend[nzone]; i++)
918 at0 = cgindex[index[i]];
919 at1 = cgindex[index[i]+1];
920 for(j=at0; j<at1; j++)
922 v[j] += buf[n];
923 n++;
927 nzone /= 2;
931 static void print_ddzone(FILE *fp,int d,int i,int j,gmx_ddzone_t *zone)
933 fprintf(fp,"zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
934 d,i,j,
935 zone->min0,zone->max1,
936 zone->mch0,zone->mch0,
937 zone->p1_0,zone->p1_1);
940 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
941 int ddimind,int direction,
942 gmx_ddzone_t *buf_s,int n_s,
943 gmx_ddzone_t *buf_r,int n_r)
945 rvec vbuf_s[5*2],vbuf_r[5*2];
946 int i;
948 for(i=0; i<n_s; i++)
950 vbuf_s[i*2 ][0] = buf_s[i].min0;
951 vbuf_s[i*2 ][1] = buf_s[i].max1;
952 vbuf_s[i*2 ][2] = buf_s[i].mch0;
953 vbuf_s[i*2+1][0] = buf_s[i].mch1;
954 vbuf_s[i*2+1][1] = buf_s[i].p1_0;
955 vbuf_s[i*2+1][2] = buf_s[i].p1_1;
958 dd_sendrecv_rvec(dd, ddimind, direction,
959 vbuf_s, n_s*2,
960 vbuf_r, n_r*2);
962 for(i=0; i<n_r; i++)
964 buf_r[i].min0 = vbuf_r[i*2 ][0];
965 buf_r[i].max1 = vbuf_r[i*2 ][1];
966 buf_r[i].mch0 = vbuf_r[i*2 ][2];
967 buf_r[i].mch1 = vbuf_r[i*2+1][0];
968 buf_r[i].p1_0 = vbuf_r[i*2+1][1];
969 buf_r[i].p1_1 = vbuf_r[i*2+1][2];
973 static void dd_move_cellx(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
974 rvec cell_ns_x0,rvec cell_ns_x1)
976 int d,d1,dim,dim1,pos,buf_size,i,j,k,p,npulse,npulse_min;
977 gmx_ddzone_t *zp,buf_s[5],buf_r[5],buf_e[5];
978 rvec extr_s[2],extr_r[2];
979 rvec dh;
980 real dist_d,c=0,det;
981 gmx_domdec_comm_t *comm;
982 bool bPBC,bUse;
984 comm = dd->comm;
986 for(d=1; d<dd->ndim; d++)
988 dim = dd->dim[d];
989 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
990 zp->min0 = cell_ns_x0[dim];
991 zp->max1 = cell_ns_x1[dim];
992 zp->mch0 = cell_ns_x0[dim];
993 zp->mch1 = cell_ns_x1[dim];
994 zp->p1_0 = cell_ns_x0[dim];
995 zp->p1_1 = cell_ns_x1[dim];
998 for(d=dd->ndim-2; d>=0; d--)
1000 dim = dd->dim[d];
1001 bPBC = (dim < ddbox->npbcdim);
1003 /* Use an rvec to store two reals */
1004 extr_s[d][0] = comm->cell_f0[d+1];
1005 extr_s[d][1] = comm->cell_f1[d+1];
1006 extr_s[d][2] = 0;
1008 pos = 0;
1009 /* Store the extremes in the backward sending buffer,
1010 * so the get updated separately from the forward communication.
1012 for(d1=d; d1<dd->ndim-1; d1++)
1014 /* We invert the order to be able to use the same loop for buf_e */
1015 buf_s[pos].min0 = extr_s[d1][1];
1016 buf_s[pos].max1 = extr_s[d1][0];
1017 buf_s[pos].mch0 = 0;
1018 buf_s[pos].mch1 = 0;
1019 /* Store the cell corner of the dimension we communicate along */
1020 buf_s[pos].p1_0 = comm->cell_x0[dim];
1021 buf_s[pos].p1_1 = 0;
1022 pos++;
1025 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
1026 pos++;
1028 if (dd->ndim == 3 && d == 0)
1030 buf_s[pos] = comm->zone_d2[0][1];
1031 pos++;
1032 buf_s[pos] = comm->zone_d1[0];
1033 pos++;
1036 /* We only need to communicate the extremes
1037 * in the forward direction
1039 npulse = comm->cd[d].np;
1040 if (bPBC)
1042 /* Take the minimum to avoid double communication */
1043 npulse_min = min(npulse,dd->nc[dim]-1-npulse);
1045 else
1047 /* Without PBC we should really not communicate over
1048 * the boundaries, but implementing that complicates
1049 * the communication setup and therefore we simply
1050 * do all communication, but ignore some data.
1052 npulse_min = npulse;
1054 for(p=0; p<npulse_min; p++)
1056 /* Communicate the extremes forward */
1057 bUse = (bPBC || dd->ci[dim] > 0);
1059 dd_sendrecv_rvec(dd, d, dddirForward,
1060 extr_s+d, dd->ndim-d-1,
1061 extr_r+d, dd->ndim-d-1);
1063 if (bUse)
1065 for(d1=d; d1<dd->ndim-1; d1++)
1067 extr_s[d1][0] = max(extr_s[d1][0],extr_r[d1][0]);
1068 extr_s[d1][1] = min(extr_s[d1][1],extr_r[d1][1]);
1073 buf_size = pos;
1074 for(p=0; p<npulse; p++)
1076 /* Communicate all the zone information backward */
1077 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
1079 dd_sendrecv_ddzone(dd, d, dddirBackward,
1080 buf_s, buf_size,
1081 buf_r, buf_size);
1083 clear_rvec(dh);
1084 if (p > 0)
1086 for(d1=d+1; d1<dd->ndim; d1++)
1088 /* Determine the decrease of maximum required
1089 * communication height along d1 due to the distance along d,
1090 * this avoids a lot of useless atom communication.
1092 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
1094 if (ddbox->tric_dir[dim])
1096 /* c is the off-diagonal coupling between the cell planes
1097 * along directions d and d1.
1099 c = ddbox->v[dim][dd->dim[d1]][dim];
1101 else
1103 c = 0;
1105 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
1106 if (det > 0)
1108 dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
1110 else
1112 /* A negative value signals out of range */
1113 dh[d1] = -1;
1118 /* Accumulate the extremes over all pulses */
1119 for(i=0; i<buf_size; i++)
1121 if (p == 0)
1123 buf_e[i] = buf_r[i];
1125 else
1127 if (bUse)
1129 buf_e[i].min0 = min(buf_e[i].min0,buf_r[i].min0);
1130 buf_e[i].max1 = max(buf_e[i].max1,buf_r[i].max1);
1133 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
1135 d1 = 1;
1137 else
1139 d1 = d + 1;
1141 if (bUse && dh[d1] >= 0)
1143 buf_e[i].mch0 = max(buf_e[i].mch0,buf_r[i].mch0-dh[d1]);
1144 buf_e[i].mch1 = max(buf_e[i].mch1,buf_r[i].mch1-dh[d1]);
1147 /* Copy the received buffer to the send buffer,
1148 * to pass the data through with the next pulse.
1150 buf_s[i] = buf_r[i];
1152 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
1153 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
1155 /* Store the extremes */
1156 pos = 0;
1158 for(d1=d; d1<dd->ndim-1; d1++)
1160 extr_s[d1][1] = min(extr_s[d1][1],buf_e[pos].min0);
1161 extr_s[d1][0] = max(extr_s[d1][0],buf_e[pos].max1);
1162 pos++;
1165 if (d == 1 || (d == 0 && dd->ndim == 3))
1167 for(i=d; i<2; i++)
1169 comm->zone_d2[1-d][i] = buf_e[pos];
1170 pos++;
1173 if (d == 0)
1175 comm->zone_d1[1] = buf_e[pos];
1176 pos++;
1182 if (dd->ndim >= 2)
1184 dim = dd->dim[1];
1185 for(i=0; i<2; i++)
1187 if (debug)
1189 print_ddzone(debug,1,i,0,&comm->zone_d1[i]);
1191 cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d1[i].min0);
1192 cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d1[i].max1);
1195 if (dd->ndim >= 3)
1197 dim = dd->dim[2];
1198 for(i=0; i<2; i++)
1200 for(j=0; j<2; j++)
1202 if (debug)
1204 print_ddzone(debug,2,i,j,&comm->zone_d2[i][j]);
1206 cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d2[i][j].min0);
1207 cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d2[i][j].max1);
1211 for(d=1; d<dd->ndim; d++)
1213 comm->cell_f_max0[d] = extr_s[d-1][0];
1214 comm->cell_f_min1[d] = extr_s[d-1][1];
1215 if (debug)
1217 fprintf(debug,"Cell fraction d %d, max0 %f, min1 %f\n",
1218 d,comm->cell_f_max0[d],comm->cell_f_min1[d]);
1223 static void dd_collect_cg(gmx_domdec_t *dd,
1224 t_state *state_local)
1226 gmx_domdec_master_t *ma=NULL;
1227 int buf2[2],*ibuf,i,ncg_home=0,*cg=NULL,nat_home=0;
1228 t_block *cgs_gl;
1230 if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
1232 /* The master has the correct distribution */
1233 return;
1236 if (state_local->ddp_count == dd->ddp_count)
1238 ncg_home = dd->ncg_home;
1239 cg = dd->index_gl;
1240 nat_home = dd->nat_home;
1242 else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
1244 cgs_gl = &dd->comm->cgs_gl;
1246 ncg_home = state_local->ncg_gl;
1247 cg = state_local->cg_gl;
1248 nat_home = 0;
1249 for(i=0; i<ncg_home; i++)
1251 nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
1254 else
1256 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1259 buf2[0] = dd->ncg_home;
1260 buf2[1] = dd->nat_home;
1261 if (DDMASTER(dd))
1263 ma = dd->ma;
1264 ibuf = ma->ibuf;
1266 else
1268 ibuf = NULL;
1270 /* Collect the charge group and atom counts on the master */
1271 dd_gather(dd,2*sizeof(int),buf2,ibuf);
1273 if (DDMASTER(dd))
1275 ma->index[0] = 0;
1276 for(i=0; i<dd->nnodes; i++)
1278 ma->ncg[i] = ma->ibuf[2*i];
1279 ma->nat[i] = ma->ibuf[2*i+1];
1280 ma->index[i+1] = ma->index[i] + ma->ncg[i];
1283 /* Make byte counts and indices */
1284 for(i=0; i<dd->nnodes; i++)
1286 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
1287 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
1289 if (debug)
1291 fprintf(debug,"Initial charge group distribution: ");
1292 for(i=0; i<dd->nnodes; i++)
1293 fprintf(debug," %d",ma->ncg[i]);
1294 fprintf(debug,"\n");
1298 /* Collect the charge group indices on the master */
1299 dd_gatherv(dd,
1300 dd->ncg_home*sizeof(int),dd->index_gl,
1301 DDMASTER(dd) ? ma->ibuf : NULL,
1302 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
1303 DDMASTER(dd) ? ma->cg : NULL);
1305 dd->comm->master_cg_ddp_count = state_local->ddp_count;
1308 static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
1309 rvec *lv,rvec *v)
1311 gmx_domdec_master_t *ma;
1312 int n,i,c,a,nalloc=0;
1313 rvec *buf=NULL;
1314 t_block *cgs_gl;
1316 ma = dd->ma;
1318 if (!DDMASTER(dd))
1320 #ifdef GMX_MPI
1321 MPI_Send(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
1322 dd->rank,dd->mpi_comm_all);
1323 #endif
1324 } else {
1325 /* Copy the master coordinates to the global array */
1326 cgs_gl = &dd->comm->cgs_gl;
1328 n = DDMASTERRANK(dd);
1329 a = 0;
1330 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1332 for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
1334 copy_rvec(lv[a++],v[c]);
1338 for(n=0; n<dd->nnodes; n++)
1340 if (n != dd->rank)
1342 if (ma->nat[n] > nalloc)
1344 nalloc = over_alloc_dd(ma->nat[n]);
1345 srenew(buf,nalloc);
1347 #ifdef GMX_MPI
1348 MPI_Recv(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,DDRANK(dd,n),
1349 n,dd->mpi_comm_all,MPI_STATUS_IGNORE);
1350 #endif
1351 a = 0;
1352 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1354 for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
1356 copy_rvec(buf[a++],v[c]);
1361 sfree(buf);
1365 static void get_commbuffer_counts(gmx_domdec_t *dd,
1366 int **counts,int **disps)
1368 gmx_domdec_master_t *ma;
1369 int n;
1371 ma = dd->ma;
1373 /* Make the rvec count and displacment arrays */
1374 *counts = ma->ibuf;
1375 *disps = ma->ibuf + dd->nnodes;
1376 for(n=0; n<dd->nnodes; n++)
1378 (*counts)[n] = ma->nat[n]*sizeof(rvec);
1379 (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
1383 static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
1384 rvec *lv,rvec *v)
1386 gmx_domdec_master_t *ma;
1387 int *rcounts=NULL,*disps=NULL;
1388 int n,i,c,a;
1389 rvec *buf=NULL;
1390 t_block *cgs_gl;
1392 ma = dd->ma;
1394 if (DDMASTER(dd))
1396 get_commbuffer_counts(dd,&rcounts,&disps);
1398 buf = ma->vbuf;
1401 dd_gatherv(dd,dd->nat_home*sizeof(rvec),lv,rcounts,disps,buf);
1403 if (DDMASTER(dd))
1405 cgs_gl = &dd->comm->cgs_gl;
1407 a = 0;
1408 for(n=0; n<dd->nnodes; n++)
1410 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1412 for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
1414 copy_rvec(buf[a++],v[c]);
1421 void dd_collect_vec(gmx_domdec_t *dd,
1422 t_state *state_local,rvec *lv,rvec *v)
1424 gmx_domdec_master_t *ma;
1425 int n,i,c,a,nalloc=0;
1426 rvec *buf=NULL;
1428 dd_collect_cg(dd,state_local);
1430 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1432 dd_collect_vec_sendrecv(dd,lv,v);
1434 else
1436 dd_collect_vec_gatherv(dd,lv,v);
1441 void dd_collect_state(gmx_domdec_t *dd,
1442 t_state *state_local,t_state *state)
1444 int est,i,j,nh;
1446 nh = state->nhchainlength;
1448 if (DDMASTER(dd))
1450 state->lambda = state_local->lambda;
1451 state->veta = state_local->veta;
1452 state->vol0 = state_local->vol0;
1453 copy_mat(state_local->box,state->box);
1454 copy_mat(state_local->boxv,state->boxv);
1455 copy_mat(state_local->svir_prev,state->svir_prev);
1456 copy_mat(state_local->fvir_prev,state->fvir_prev);
1457 copy_mat(state_local->pres_prev,state->pres_prev);
1460 for(i=0; i<state_local->ngtc; i++)
1462 for(j=0; j<nh; j++) {
1463 state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
1464 state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
1466 state->therm_integral[i] = state_local->therm_integral[i];
1468 for(i=0; i<state_local->nnhpres; i++)
1470 for(j=0; j<nh; j++) {
1471 state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
1472 state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
1476 for(est=0; est<estNR; est++)
1478 if (EST_DISTR(est) && state_local->flags & (1<<est))
1480 switch (est) {
1481 case estX:
1482 dd_collect_vec(dd,state_local,state_local->x,state->x);
1483 break;
1484 case estV:
1485 dd_collect_vec(dd,state_local,state_local->v,state->v);
1486 break;
1487 case estSDX:
1488 dd_collect_vec(dd,state_local,state_local->sd_X,state->sd_X);
1489 break;
1490 case estCGP:
1491 dd_collect_vec(dd,state_local,state_local->cg_p,state->cg_p);
1492 break;
1493 case estLD_RNG:
1494 if (state->nrngi == 1)
1496 if (DDMASTER(dd))
1498 for(i=0; i<state_local->nrng; i++)
1500 state->ld_rng[i] = state_local->ld_rng[i];
1504 else
1506 dd_gather(dd,state_local->nrng*sizeof(state->ld_rng[0]),
1507 state_local->ld_rng,state->ld_rng);
1509 break;
1510 case estLD_RNGI:
1511 if (state->nrngi == 1)
1513 if (DDMASTER(dd))
1515 state->ld_rngi[0] = state_local->ld_rngi[0];
1518 else
1520 dd_gather(dd,sizeof(state->ld_rngi[0]),
1521 state_local->ld_rngi,state->ld_rngi);
1523 break;
1524 case estDISRE_INITF:
1525 case estDISRE_RM3TAV:
1526 case estORIRE_INITF:
1527 case estORIRE_DTAV:
1528 break;
1529 default:
1530 gmx_incons("Unknown state entry encountered in dd_collect_state");
1536 static void dd_realloc_fr_cg(t_forcerec *fr,int nalloc)
1538 if (debug)
1540 fprintf(debug,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr->cg_nalloc,nalloc,over_alloc_dd(nalloc));
1542 fr->cg_nalloc = over_alloc_dd(nalloc);
1543 srenew(fr->cg_cm,fr->cg_nalloc);
1544 srenew(fr->cginfo,fr->cg_nalloc);
1547 static void dd_realloc_state(t_state *state,rvec **f,int nalloc)
1549 int est;
1551 if (debug)
1553 fprintf(debug,"Reallocating state: currently %d, required %d, allocating %d\n",state->nalloc,nalloc,over_alloc_dd(nalloc));
1556 state->nalloc = over_alloc_dd(nalloc);
1558 for(est=0; est<estNR; est++)
1560 if (EST_DISTR(est) && state->flags & (1<<est))
1562 switch(est) {
1563 case estX:
1564 srenew(state->x,state->nalloc);
1565 break;
1566 case estV:
1567 srenew(state->v,state->nalloc);
1568 break;
1569 case estSDX:
1570 srenew(state->sd_X,state->nalloc);
1571 break;
1572 case estCGP:
1573 srenew(state->cg_p,state->nalloc);
1574 break;
1575 case estLD_RNG:
1576 case estLD_RNGI:
1577 case estDISRE_INITF:
1578 case estDISRE_RM3TAV:
1579 case estORIRE_INITF:
1580 case estORIRE_DTAV:
1581 /* No reallocation required */
1582 break;
1583 default:
1584 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1589 if (f != NULL)
1591 srenew(*f,state->nalloc);
1595 static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd,t_block *cgs,
1596 rvec *v,rvec *lv)
1598 gmx_domdec_master_t *ma;
1599 int n,i,c,a,nalloc=0;
1600 rvec *buf=NULL;
1602 if (DDMASTER(dd))
1604 ma = dd->ma;
1606 for(n=0; n<dd->nnodes; n++)
1608 if (n != dd->rank)
1610 if (ma->nat[n] > nalloc)
1612 nalloc = over_alloc_dd(ma->nat[n]);
1613 srenew(buf,nalloc);
1615 /* Use lv as a temporary buffer */
1616 a = 0;
1617 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1619 for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
1621 copy_rvec(v[c],buf[a++]);
1624 if (a != ma->nat[n])
1626 gmx_fatal(FARGS,"Internal error a (%d) != nat (%d)",
1627 a,ma->nat[n]);
1630 #ifdef GMX_MPI
1631 MPI_Send(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,
1632 DDRANK(dd,n),n,dd->mpi_comm_all);
1633 #endif
1636 sfree(buf);
1637 n = DDMASTERRANK(dd);
1638 a = 0;
1639 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1641 for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
1643 copy_rvec(v[c],lv[a++]);
1647 else
1649 #ifdef GMX_MPI
1650 MPI_Recv(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
1651 MPI_ANY_TAG,dd->mpi_comm_all,MPI_STATUS_IGNORE);
1652 #endif
1656 static void dd_distribute_vec_scatterv(gmx_domdec_t *dd,t_block *cgs,
1657 rvec *v,rvec *lv)
1659 gmx_domdec_master_t *ma;
1660 int *scounts=NULL,*disps=NULL;
1661 int n,i,c,a,nalloc=0;
1662 rvec *buf=NULL;
1664 if (DDMASTER(dd))
1666 ma = dd->ma;
1668 get_commbuffer_counts(dd,&scounts,&disps);
1670 buf = ma->vbuf;
1671 a = 0;
1672 for(n=0; n<dd->nnodes; n++)
1674 for(i=ma->index[n]; i<ma->index[n+1]; i++)
1676 for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
1678 copy_rvec(v[c],buf[a++]);
1684 dd_scatterv(dd,scounts,disps,buf,dd->nat_home*sizeof(rvec),lv);
1687 static void dd_distribute_vec(gmx_domdec_t *dd,t_block *cgs,rvec *v,rvec *lv)
1689 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
1691 dd_distribute_vec_sendrecv(dd,cgs,v,lv);
1693 else
1695 dd_distribute_vec_scatterv(dd,cgs,v,lv);
1699 static void dd_distribute_state(gmx_domdec_t *dd,t_block *cgs,
1700 t_state *state,t_state *state_local,
1701 rvec **f)
1703 int i,j,ngtch,ngtcp,nh;
1705 nh = state->nhchainlength;
1707 if (DDMASTER(dd))
1709 state_local->lambda = state->lambda;
1710 state_local->veta = state->veta;
1711 state_local->vol0 = state->vol0;
1712 copy_mat(state->box,state_local->box);
1713 copy_mat(state->box_rel,state_local->box_rel);
1714 copy_mat(state->boxv,state_local->boxv);
1715 copy_mat(state->svir_prev,state_local->svir_prev);
1716 copy_mat(state->fvir_prev,state_local->fvir_prev);
1717 for(i=0; i<state_local->ngtc; i++)
1719 for(j=0; j<nh; j++) {
1720 state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
1721 state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
1723 state_local->therm_integral[i] = state->therm_integral[i];
1725 for(i=0; i<state_local->nnhpres; i++)
1727 for(j=0; j<nh; j++) {
1728 state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
1729 state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
1733 dd_bcast(dd,sizeof(real),&state_local->lambda);
1734 dd_bcast(dd,sizeof(real),&state_local->veta);
1735 dd_bcast(dd,sizeof(real),&state_local->vol0);
1736 dd_bcast(dd,sizeof(state_local->box),state_local->box);
1737 dd_bcast(dd,sizeof(state_local->box_rel),state_local->box_rel);
1738 dd_bcast(dd,sizeof(state_local->boxv),state_local->boxv);
1739 dd_bcast(dd,sizeof(state_local->svir_prev),state_local->svir_prev);
1740 dd_bcast(dd,sizeof(state_local->fvir_prev),state_local->fvir_prev);
1741 dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_xi);
1742 dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_vxi);
1743 dd_bcast(dd,state_local->ngtc*sizeof(double),state_local->therm_integral);
1744 dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_xi);
1745 dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_vxi);
1747 if (dd->nat_home > state_local->nalloc)
1749 dd_realloc_state(state_local,f,dd->nat_home);
1751 for(i=0; i<estNR; i++)
1753 if (EST_DISTR(i) && state_local->flags & (1<<i))
1755 switch (i) {
1756 case estX:
1757 dd_distribute_vec(dd,cgs,state->x,state_local->x);
1758 break;
1759 case estV:
1760 dd_distribute_vec(dd,cgs,state->v,state_local->v);
1761 break;
1762 case estSDX:
1763 dd_distribute_vec(dd,cgs,state->sd_X,state_local->sd_X);
1764 break;
1765 case estCGP:
1766 dd_distribute_vec(dd,cgs,state->cg_p,state_local->cg_p);
1767 break;
1768 case estLD_RNG:
1769 if (state->nrngi == 1)
1771 dd_bcastc(dd,
1772 state_local->nrng*sizeof(state_local->ld_rng[0]),
1773 state->ld_rng,state_local->ld_rng);
1775 else
1777 dd_scatter(dd,
1778 state_local->nrng*sizeof(state_local->ld_rng[0]),
1779 state->ld_rng,state_local->ld_rng);
1781 break;
1782 case estLD_RNGI:
1783 if (state->nrngi == 1)
1785 dd_bcastc(dd,sizeof(state_local->ld_rngi[0]),
1786 state->ld_rngi,state_local->ld_rngi);
1788 else
1790 dd_scatter(dd,sizeof(state_local->ld_rngi[0]),
1791 state->ld_rngi,state_local->ld_rngi);
1793 break;
1794 case estDISRE_INITF:
1795 case estDISRE_RM3TAV:
1796 case estORIRE_INITF:
1797 case estORIRE_DTAV:
1798 /* Not implemented yet */
1799 break;
1800 default:
1801 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1807 static char dim2char(int dim)
1809 char c='?';
1811 switch (dim)
1813 case XX: c = 'X'; break;
1814 case YY: c = 'Y'; break;
1815 case ZZ: c = 'Z'; break;
1816 default: gmx_fatal(FARGS,"Unknown dim %d",dim);
1819 return c;
1822 static void write_dd_grid_pdb(const char *fn,gmx_large_int_t step,
1823 gmx_domdec_t *dd,matrix box,gmx_ddbox_t *ddbox)
1825 rvec grid_s[2],*grid_r=NULL,cx,r;
1826 char fname[STRLEN],format[STRLEN],buf[22];
1827 FILE *out;
1828 int a,i,d,z,y,x;
1829 matrix tric;
1830 real vol;
1832 copy_rvec(dd->comm->cell_x0,grid_s[0]);
1833 copy_rvec(dd->comm->cell_x1,grid_s[1]);
1835 if (DDMASTER(dd))
1837 snew(grid_r,2*dd->nnodes);
1840 dd_gather(dd,2*sizeof(rvec),grid_s[0],DDMASTER(dd) ? grid_r[0] : NULL);
1842 if (DDMASTER(dd))
1844 for(d=0; d<DIM; d++)
1846 for(i=0; i<DIM; i++)
1848 if (d == i)
1850 tric[d][i] = 1;
1852 else
1854 if (dd->nc[d] > 1 && d < ddbox->npbcdim)
1856 tric[d][i] = box[i][d]/box[i][i];
1858 else
1860 tric[d][i] = 0;
1865 sprintf(fname,"%s_%s.pdb",fn,gmx_step_str(step,buf));
1866 sprintf(format,"%s%s\n",pdbformat,"%6.2f%6.2f");
1867 out = gmx_fio_fopen(fname,"w");
1868 gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
1869 a = 1;
1870 for(i=0; i<dd->nnodes; i++)
1872 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1873 for(d=0; d<DIM; d++)
1875 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1877 for(z=0; z<2; z++)
1879 for(y=0; y<2; y++)
1881 for(x=0; x<2; x++)
1883 cx[XX] = grid_r[i*2+x][XX];
1884 cx[YY] = grid_r[i*2+y][YY];
1885 cx[ZZ] = grid_r[i*2+z][ZZ];
1886 mvmul(tric,cx,r);
1887 fprintf(out,format,"ATOM",a++,"CA","GLY",' ',1+i,
1888 10*r[XX],10*r[YY],10*r[ZZ],1.0,vol);
1892 for(d=0; d<DIM; d++)
1894 for(x=0; x<4; x++)
1896 switch(d)
1898 case 0: y = 1 + i*8 + 2*x; break;
1899 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1900 case 2: y = 1 + i*8 + x; break;
1902 fprintf(out,"%6s%5d%5d\n","CONECT",y,y+(1<<d));
1906 gmx_fio_fclose(out);
1907 sfree(grid_r);
1911 void write_dd_pdb(const char *fn,gmx_large_int_t step,const char *title,
1912 gmx_mtop_t *mtop,t_commrec *cr,
1913 int natoms,rvec x[],matrix box)
1915 char fname[STRLEN],format[STRLEN],format4[STRLEN],buf[22];
1916 FILE *out;
1917 int i,ii,resnr,c;
1918 char *atomname,*resname;
1919 real b;
1920 gmx_domdec_t *dd;
1922 dd = cr->dd;
1923 if (natoms == -1)
1925 natoms = dd->comm->nat[ddnatVSITE];
1928 sprintf(fname,"%s_%s_n%d.pdb",fn,gmx_step_str(step,buf),cr->sim_nodeid);
1930 sprintf(format,"%s%s\n",pdbformat,"%6.2f%6.2f");
1931 sprintf(format4,"%s%s\n",pdbformat4,"%6.2f%6.2f");
1933 out = gmx_fio_fopen(fname,"w");
1935 fprintf(out,"TITLE %s\n",title);
1936 gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
1937 for(i=0; i<natoms; i++)
1939 ii = dd->gatindex[i];
1940 gmx_mtop_atominfo_global(mtop,ii,&atomname,&resnr,&resname);
1941 if (i < dd->comm->nat[ddnatZONE])
1943 c = 0;
1944 while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
1946 c++;
1948 b = c;
1950 else if (i < dd->comm->nat[ddnatVSITE])
1952 b = dd->comm->zones.n;
1954 else
1956 b = dd->comm->zones.n + 1;
1958 fprintf(out,strlen(atomname)<4 ? format : format4,
1959 "ATOM",(ii+1)%100000,
1960 atomname,resname,' ',(resnr+1)%10000,' ',
1961 10*x[i][XX],10*x[i][YY],10*x[i][ZZ],1.0,b);
1963 fprintf(out,"TER\n");
1965 gmx_fio_fclose(out);
1968 real dd_cutoff_mbody(gmx_domdec_t *dd)
1970 gmx_domdec_comm_t *comm;
1971 int di;
1972 real r;
1974 comm = dd->comm;
1976 r = -1;
1977 if (comm->bInterCGBondeds)
1979 if (comm->cutoff_mbody > 0)
1981 r = comm->cutoff_mbody;
1983 else
1985 /* cutoff_mbody=0 means we do not have DLB */
1986 r = comm->cellsize_min[dd->dim[0]];
1987 for(di=1; di<dd->ndim; di++)
1989 r = min(r,comm->cellsize_min[dd->dim[di]]);
1991 if (comm->bBondComm)
1993 r = max(r,comm->cutoff_mbody);
1995 else
1997 r = min(r,comm->cutoff);
2002 return r;
2005 real dd_cutoff_twobody(gmx_domdec_t *dd)
2007 real r_mb;
2009 r_mb = dd_cutoff_mbody(dd);
2011 return max(dd->comm->cutoff,r_mb);
2015 static void dd_cart_coord2pmecoord(gmx_domdec_t *dd,ivec coord,ivec coord_pme)
2017 int nc,ntot;
2019 nc = dd->nc[dd->comm->cartpmedim];
2020 ntot = dd->comm->ntot[dd->comm->cartpmedim];
2021 copy_ivec(coord,coord_pme);
2022 coord_pme[dd->comm->cartpmedim] =
2023 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
2026 static int low_ddindex2pmeindex(int ndd,int npme,int ddindex)
2028 /* Here we assign a PME node to communicate with this DD node
2029 * by assuming that the major index of both is x.
2030 * We add cr->npmenodes/2 to obtain an even distribution.
2032 return (ddindex*npme + npme/2)/ndd;
2035 static int ddindex2pmeindex(const gmx_domdec_t *dd,int ddindex)
2037 return low_ddindex2pmeindex(dd->nnodes,dd->comm->npmenodes,ddindex);
2040 static int cr_ddindex2pmeindex(const t_commrec *cr,int ddindex)
2042 return low_ddindex2pmeindex(cr->dd->nnodes,cr->npmenodes,ddindex);
2045 static int *dd_pmenodes(t_commrec *cr)
2047 int *pmenodes;
2048 int n,i,p0,p1;
2050 snew(pmenodes,cr->npmenodes);
2051 n = 0;
2052 for(i=0; i<cr->dd->nnodes; i++) {
2053 p0 = cr_ddindex2pmeindex(cr,i);
2054 p1 = cr_ddindex2pmeindex(cr,i+1);
2055 if (i+1 == cr->dd->nnodes || p1 > p0) {
2056 if (debug)
2057 fprintf(debug,"pmenode[%d] = %d\n",n,i+1+n);
2058 pmenodes[n] = i + 1 + n;
2059 n++;
2063 return pmenodes;
2066 static int gmx_ddcoord2pmeindex(t_commrec *cr,int x,int y,int z)
2068 gmx_domdec_t *dd;
2069 ivec coords,coords_pme,nc;
2070 int slab;
2072 dd = cr->dd;
2074 if (dd->comm->bCartesian) {
2075 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2076 dd_coords2pmecoords(dd,coords,coords_pme);
2077 copy_ivec(dd->ntot,nc);
2078 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2079 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2081 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2082 } else {
2083 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2086 coords[XX] = x;
2087 coords[YY] = y;
2088 coords[ZZ] = z;
2089 slab = ddindex2pmeindex(dd,dd_index(dd->nc,coords));
2091 return slab;
2094 static int ddcoord2simnodeid(t_commrec *cr,int x,int y,int z)
2096 gmx_domdec_comm_t *comm;
2097 ivec coords;
2098 int ddindex,nodeid=-1;
2100 comm = cr->dd->comm;
2102 coords[XX] = x;
2103 coords[YY] = y;
2104 coords[ZZ] = z;
2105 if (comm->bCartesianPP_PME)
2107 #ifdef GMX_MPI
2108 MPI_Cart_rank(cr->mpi_comm_mysim,coords,&nodeid);
2109 #endif
2111 else
2113 ddindex = dd_index(cr->dd->nc,coords);
2114 if (comm->bCartesianPP)
2116 nodeid = comm->ddindex2simnodeid[ddindex];
2118 else
2120 if (comm->pmenodes)
2122 nodeid = ddindex + gmx_ddcoord2pmeindex(cr,x,y,z);
2124 else
2126 nodeid = ddindex;
2131 return nodeid;
2134 static int dd_simnode2pmenode(t_commrec *cr,int sim_nodeid)
2136 gmx_domdec_t *dd;
2137 gmx_domdec_comm_t *comm;
2138 ivec coord,coord_pme;
2139 int i;
2140 int pmenode=-1;
2142 dd = cr->dd;
2143 comm = dd->comm;
2145 /* This assumes a uniform x domain decomposition grid cell size */
2146 if (comm->bCartesianPP_PME)
2148 #ifdef GMX_MPI
2149 MPI_Cart_coords(cr->mpi_comm_mysim,sim_nodeid,DIM,coord);
2150 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
2152 /* This is a PP node */
2153 dd_cart_coord2pmecoord(dd,coord,coord_pme);
2154 MPI_Cart_rank(cr->mpi_comm_mysim,coord_pme,&pmenode);
2156 #endif
2158 else if (comm->bCartesianPP)
2160 if (sim_nodeid < dd->nnodes)
2162 pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
2165 else
2167 /* This assumes DD cells with identical x coordinates
2168 * are numbered sequentially.
2170 if (dd->comm->pmenodes == NULL)
2172 if (sim_nodeid < dd->nnodes)
2174 /* The DD index equals the nodeid */
2175 pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
2178 else
2180 i = 0;
2181 while (sim_nodeid > dd->comm->pmenodes[i])
2183 i++;
2185 if (sim_nodeid < dd->comm->pmenodes[i])
2187 pmenode = dd->comm->pmenodes[i];
2192 return pmenode;
2195 bool gmx_pmeonlynode(t_commrec *cr,int sim_nodeid)
2197 bool bPMEOnlyNode;
2199 if (DOMAINDECOMP(cr))
2201 bPMEOnlyNode = (dd_simnode2pmenode(cr,sim_nodeid) == -1);
2203 else
2205 bPMEOnlyNode = FALSE;
2208 return bPMEOnlyNode;
2211 void get_pme_ddnodes(t_commrec *cr,int pmenodeid,
2212 int *nmy_ddnodes,int **my_ddnodes,int *node_peer)
2214 gmx_domdec_t *dd;
2215 int x,y,z;
2216 ivec coord,coord_pme;
2218 dd = cr->dd;
2220 snew(*my_ddnodes,(dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
2222 *nmy_ddnodes = 0;
2223 for(x=0; x<dd->nc[XX]; x++)
2225 for(y=0; y<dd->nc[YY]; y++)
2227 for(z=0; z<dd->nc[ZZ]; z++)
2229 if (dd->comm->bCartesianPP_PME)
2231 coord[XX] = x;
2232 coord[YY] = y;
2233 coord[ZZ] = z;
2234 dd_cart_coord2pmecoord(dd,coord,coord_pme);
2235 if (dd->ci[XX] == coord_pme[XX] &&
2236 dd->ci[YY] == coord_pme[YY] &&
2237 dd->ci[ZZ] == coord_pme[ZZ])
2238 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
2240 else
2242 /* The slab corresponds to the nodeid in the PME group */
2243 if (gmx_ddcoord2pmeindex(cr,x,y,z) == pmenodeid)
2245 (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
2252 /* The last PP-only node is the peer node */
2253 *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
2255 if (debug)
2257 fprintf(debug,"Receive coordinates from PP nodes:");
2258 for(x=0; x<*nmy_ddnodes; x++)
2260 fprintf(debug," %d",(*my_ddnodes)[x]);
2262 fprintf(debug,"\n");
2266 static bool receive_vir_ener(t_commrec *cr)
2268 gmx_domdec_comm_t *comm;
2269 int pmenode,coords[DIM],rank;
2270 bool bReceive;
2272 bReceive = TRUE;
2273 if (cr->npmenodes < cr->dd->nnodes)
2275 comm = cr->dd->comm;
2276 if (comm->bCartesianPP_PME)
2278 pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
2279 #ifdef GMX_MPI
2280 MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,coords);
2281 coords[comm->cartpmedim]++;
2282 if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
2284 MPI_Cart_rank(cr->mpi_comm_mysim,coords,&rank);
2285 if (dd_simnode2pmenode(cr,rank) == pmenode)
2287 /* This is not the last PP node for pmenode */
2288 bReceive = FALSE;
2291 #endif
2293 else
2295 pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
2296 if (cr->sim_nodeid+1 < cr->nnodes &&
2297 dd_simnode2pmenode(cr,cr->sim_nodeid+1) == pmenode)
2299 /* This is not the last PP node for pmenode */
2300 bReceive = FALSE;
2305 return bReceive;
2308 static void set_zones_ncg_home(gmx_domdec_t *dd)
2310 gmx_domdec_zones_t *zones;
2311 int i;
2313 zones = &dd->comm->zones;
2315 zones->cg_range[0] = 0;
2316 for(i=1; i<zones->n+1; i++)
2318 zones->cg_range[i] = dd->ncg_home;
2322 static void rebuild_cgindex(gmx_domdec_t *dd,int *gcgs_index,t_state *state)
2324 int nat,i,*ind,*dd_cg_gl,*cgindex,cg_gl;
2326 ind = state->cg_gl;
2327 dd_cg_gl = dd->index_gl;
2328 cgindex = dd->cgindex;
2329 nat = 0;
2330 cgindex[0] = nat;
2331 for(i=0; i<state->ncg_gl; i++)
2333 cgindex[i] = nat;
2334 cg_gl = ind[i];
2335 dd_cg_gl[i] = cg_gl;
2336 nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
2338 cgindex[i] = nat;
2340 dd->ncg_home = state->ncg_gl;
2341 dd->nat_home = nat;
2343 set_zones_ncg_home(dd);
2346 static int ddcginfo(const cginfo_mb_t *cginfo_mb,int cg)
2348 while (cg >= cginfo_mb->cg_end)
2350 cginfo_mb++;
2353 return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
2356 static void dd_set_cginfo(int *index_gl,int cg0,int cg1,
2357 t_forcerec *fr,char *bLocalCG)
2359 cginfo_mb_t *cginfo_mb;
2360 int *cginfo;
2361 int cg;
2363 if (fr != NULL)
2365 cginfo_mb = fr->cginfo_mb;
2366 cginfo = fr->cginfo;
2368 for(cg=cg0; cg<cg1; cg++)
2370 cginfo[cg] = ddcginfo(cginfo_mb,index_gl[cg]);
2374 if (bLocalCG != NULL)
2376 for(cg=cg0; cg<cg1; cg++)
2378 bLocalCG[index_gl[cg]] = TRUE;
2383 static void make_dd_indices(gmx_domdec_t *dd,int *gcgs_index,int cg_start)
2385 int nzone,zone,zone1,cg0,cg,cg_gl,a,a_gl;
2386 int *zone2cg,*zone_ncg1,*index_gl,*gatindex;
2387 gmx_ga2la_t *ga2la;
2388 char *bLocalCG;
2390 bLocalCG = dd->comm->bLocalCG;
2392 if (dd->nat_tot > dd->gatindex_nalloc)
2394 dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
2395 srenew(dd->gatindex,dd->gatindex_nalloc);
2398 nzone = dd->comm->zones.n;
2399 zone2cg = dd->comm->zones.cg_range;
2400 zone_ncg1 = dd->comm->zone_ncg1;
2401 index_gl = dd->index_gl;
2402 gatindex = dd->gatindex;
2404 if (zone2cg[1] != dd->ncg_home)
2406 gmx_incons("dd->ncg_zone is not up to date");
2409 /* Make the local to global and global to local atom index */
2410 a = dd->cgindex[cg_start];
2411 for(zone=0; zone<nzone; zone++)
2413 if (zone == 0)
2415 cg0 = cg_start;
2417 else
2419 cg0 = zone2cg[zone];
2421 for(cg=cg0; cg<zone2cg[zone+1]; cg++)
2423 zone1 = zone;
2424 if (cg - cg0 >= zone_ncg1[zone])
2426 /* Signal that this cg is from more than one zone away */
2427 zone1 += nzone;
2429 cg_gl = index_gl[cg];
2430 for(a_gl=gcgs_index[cg_gl]; a_gl<gcgs_index[cg_gl+1]; a_gl++)
2432 gatindex[a] = a_gl;
2433 ga2la_set(dd->ga2la,a_gl,a,zone1);
2434 a++;
2440 static int check_bLocalCG(gmx_domdec_t *dd,int ncg_sys,const char *bLocalCG,
2441 const char *where)
2443 int ncg,i,ngl,nerr;
2445 nerr = 0;
2446 if (bLocalCG == NULL)
2448 return nerr;
2450 for(i=0; i<dd->ncg_tot; i++)
2452 if (!bLocalCG[dd->index_gl[i]])
2454 fprintf(stderr,
2455 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n",dd->rank,where,i+1,dd->index_gl[i]+1,dd->ncg_home);
2456 nerr++;
2459 ngl = 0;
2460 for(i=0; i<ncg_sys; i++)
2462 if (bLocalCG[i])
2464 ngl++;
2467 if (ngl != dd->ncg_tot)
2469 fprintf(stderr,"DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n",dd->rank,where,ngl,dd->ncg_tot);
2470 nerr++;
2473 return nerr;
2476 static void check_index_consistency(gmx_domdec_t *dd,
2477 int natoms_sys,int ncg_sys,
2478 const char *where)
2480 int nerr,ngl,i,a,cell;
2481 int *have;
2483 nerr = 0;
2485 if (dd->comm->DD_debug > 1)
2487 snew(have,natoms_sys);
2488 for(a=0; a<dd->nat_tot; a++)
2490 if (have[dd->gatindex[a]] > 0)
2492 fprintf(stderr,"DD node %d: global atom %d occurs twice: index %d and %d\n",dd->rank,dd->gatindex[a]+1,have[dd->gatindex[a]],a+1);
2494 else
2496 have[dd->gatindex[a]] = a + 1;
2499 sfree(have);
2502 snew(have,dd->nat_tot);
2504 ngl = 0;
2505 for(i=0; i<natoms_sys; i++)
2507 if (ga2la_get(dd->ga2la,i,&a,&cell))
2509 if (a >= dd->nat_tot)
2511 fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n",dd->rank,i+1,a+1,dd->nat_tot);
2512 nerr++;
2514 else
2516 have[a] = 1;
2517 if (dd->gatindex[a] != i)
2519 fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n",dd->rank,i+1,a+1,dd->gatindex[a]+1);
2520 nerr++;
2523 ngl++;
2526 if (ngl != dd->nat_tot)
2528 fprintf(stderr,
2529 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2530 dd->rank,where,ngl,dd->nat_tot);
2532 for(a=0; a<dd->nat_tot; a++)
2534 if (have[a] == 0)
2536 fprintf(stderr,
2537 "DD node %d, %s: local atom %d, global %d has no global index\n",
2538 dd->rank,where,a+1,dd->gatindex[a]+1);
2541 sfree(have);
2543 nerr += check_bLocalCG(dd,ncg_sys,dd->comm->bLocalCG,where);
2545 if (nerr > 0) {
2546 gmx_fatal(FARGS,"DD node %d, %s: %d atom/cg index inconsistencies",
2547 dd->rank,where,nerr);
2551 static void clear_dd_indices(gmx_domdec_t *dd,int cg_start,int a_start)
2553 int i;
2554 char *bLocalCG;
2556 if (a_start == 0)
2558 /* Clear the whole list without searching */
2559 ga2la_clear(dd->ga2la);
2561 else
2563 for(i=a_start; i<dd->nat_tot; i++)
2565 ga2la_del(dd->ga2la,dd->gatindex[i]);
2569 bLocalCG = dd->comm->bLocalCG;
2570 if (bLocalCG)
2572 for(i=cg_start; i<dd->ncg_tot; i++)
2574 bLocalCG[dd->index_gl[i]] = FALSE;
2578 dd_clear_local_vsite_indices(dd);
2580 if (dd->constraints)
2582 dd_clear_local_constraint_indices(dd);
2586 static real grid_jump_limit(gmx_domdec_comm_t *comm,int dim_ind)
2588 real grid_jump_limit;
2590 /* The distance between the boundaries of cells at distance
2591 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2592 * and by the fact that cells should not be shifted by more than
2593 * half their size, such that cg's only shift by one cell
2594 * at redecomposition.
2596 grid_jump_limit = comm->cellsize_limit;
2597 if (!comm->bVacDLBNoLimit)
2599 grid_jump_limit = max(grid_jump_limit,
2600 comm->cutoff/comm->cd[dim_ind].np);
2603 return grid_jump_limit;
2606 static void check_grid_jump(gmx_large_int_t step,gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
2608 gmx_domdec_comm_t *comm;
2609 int d,dim;
2610 real limit,bfac;
2612 comm = dd->comm;
2614 for(d=1; d<dd->ndim; d++)
2616 dim = dd->dim[d];
2617 limit = grid_jump_limit(comm,d);
2618 bfac = ddbox->box_size[dim];
2619 if (ddbox->tric_dir[dim])
2621 bfac *= ddbox->skew_fac[dim];
2623 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
2624 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
2626 char buf[22];
2627 gmx_fatal(FARGS,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d\n",
2628 gmx_step_str(step,buf),
2629 dim2char(dim),dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
2634 static int dd_load_count(gmx_domdec_comm_t *comm)
2636 return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
2639 static float dd_force_load(gmx_domdec_comm_t *comm)
2641 float load;
2643 if (comm->eFlop)
2645 load = comm->flop;
2646 if (comm->eFlop > 1)
2648 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
2651 else
2653 load = comm->cycl[ddCyclF];
2654 if (comm->cycl_n[ddCyclF] > 1)
2656 /* Subtract the maximum of the last n cycle counts
2657 * to get rid of possible high counts due to other soures,
2658 * for instance system activity, that would otherwise
2659 * affect the dynamic load balancing.
2661 load -= comm->cycl_max[ddCyclF];
2665 return load;
2668 static void set_slb_pme_dim_f(gmx_domdec_t *dd,int dimind,real **dim_f)
2670 gmx_domdec_comm_t *comm;
2671 int i;
2673 comm = dd->comm;
2675 if (dd->dim[dimind] != dimind)
2677 *dim_f = NULL;
2678 return;
2681 snew(*dim_f,dd->nc[dimind]+1);
2682 (*dim_f)[0] = 0;
2683 for(i=1; i<dd->nc[dimind]; i++)
2685 if (comm->slb_frac[dimind])
2687 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dimind][i-1];
2689 else
2691 (*dim_f)[i] = (real)i/(real)dd->nc[dimind];
2694 (*dim_f)[dd->nc[dimind]] = 1;
2697 static void init_ddpme(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,
2698 int dimind,int nslab)
2700 int pmeindex,slab,nso,i;
2701 ivec xyz;
2703 ddpme->dimind = dimind;
2704 ddpme->nslab = nslab;
2706 if (nslab <= 1)
2708 return;
2711 nso = dd->comm->npmenodes/nslab;
2712 /* Determine for each PME slab the PP location range for dimension dim */
2713 snew(ddpme->pp_min,nslab);
2714 snew(ddpme->pp_max,nslab);
2715 for(slab=0; slab<nslab; slab++) {
2716 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
2717 ddpme->pp_max[slab] = 0;
2719 for(i=0; i<dd->nnodes; i++) {
2720 ddindex2xyz(dd->nc,i,xyz);
2721 /* For y only use our y/z slab.
2722 * This assumes that the PME x grid size matches the DD grid size.
2724 if (dimind == 0 || xyz[XX] == dd->ci[XX]) {
2725 pmeindex = ddindex2pmeindex(dd,i);
2726 if (dimind == 0) {
2727 slab = pmeindex/nso;
2728 } else {
2729 slab = pmeindex % nslab;
2731 ddpme->pp_min[slab] = min(ddpme->pp_min[slab],xyz[dimind]);
2732 ddpme->pp_max[slab] = max(ddpme->pp_max[slab],xyz[dimind]);
2736 set_slb_pme_dim_f(dd,ddpme->dimind,&ddpme->slb_dim_f);
2739 int dd_pme_maxshift0(gmx_domdec_t *dd)
2741 return dd->comm->ddpme[0].maxshift;
2744 int dd_pme_maxshift1(gmx_domdec_t *dd)
2746 return dd->comm->ddpme[1].maxshift;
2749 static void set_pme_maxshift(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,
2750 bool bUniform,gmx_ddbox_t *ddbox,real *cell_f)
2752 gmx_domdec_comm_t *comm;
2753 int dim,nc,ns,s;
2754 int *xmin,*xmax;
2755 real range,pme_boundary;
2756 int sh;
2758 comm = dd->comm;
2759 dim = ddpme->dimind;
2760 nc = dd->nc[dim];
2761 ns = ddpme->nslab;
2763 if (dd->dim[dim] != dim)
2765 /* PP decomposition is not along dim: the worst situation */
2766 sh = ns/2;
2768 else if (ns <= 3 || (bUniform && ns == nc))
2770 /* The optimal situation */
2771 sh = 1;
2773 else
2775 /* We need to check for all pme nodes which nodes they
2776 * could possibly need to communicate with.
2778 xmin = ddpme->pp_min;
2779 xmax = ddpme->pp_max;
2780 /* Allow for atoms to be maximally 2/3 times the cut-off
2781 * out of their DD cell. This is a reasonable balance between
2782 * between performance and support for most charge-group/cut-off
2783 * combinations.
2785 range = 2.0/3.0*comm->cutoff/ddbox->box_size[dim];
2786 /* Avoid extra communication when we are exactly at a boundary */
2787 range *= 0.999;
2789 sh = 1;
2790 for(s=0; s<ns; s++)
2792 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
2793 pme_boundary = (real)s/ns;
2794 while (sh+1 < ns &&
2795 ((s-(sh+1) >= 0 &&
2796 cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
2797 (s-(sh+1) < 0 &&
2798 cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
2800 sh++;
2802 pme_boundary = (real)(s+1)/ns;
2803 while (sh+1 < ns &&
2804 ((s+(sh+1) < ns &&
2805 cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
2806 (s+(sh+1) >= ns &&
2807 cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
2809 sh++;
2814 ddpme->maxshift = sh;
2816 if (debug)
2818 fprintf(debug,"PME slab communication range for dimind %d is %d\n",
2819 ddpme->dimind,ddpme->maxshift);
2823 static void check_box_size(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
2825 int d,dim;
2827 for(d=0; d<dd->ndim; d++)
2829 dim = dd->dim[d];
2830 if (dim < ddbox->nboundeddim &&
2831 ddbox->box_size[dim]*ddbox->skew_fac[dim] <
2832 dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
2834 gmx_fatal(FARGS,"The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
2835 dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
2836 dd->nc[dim],dd->comm->cellsize_limit);
2841 static void set_dd_cell_sizes_slb(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
2842 bool bMaster,ivec npulse)
2844 gmx_domdec_comm_t *comm;
2845 int d,j;
2846 rvec cellsize_min;
2847 real *cell_x,cell_dx,cellsize;
2849 comm = dd->comm;
2851 for(d=0; d<DIM; d++)
2853 cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
2854 npulse[d] = 1;
2855 if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
2857 /* Uniform grid */
2858 cell_dx = ddbox->box_size[d]/dd->nc[d];
2859 if (bMaster)
2861 for(j=0; j<dd->nc[d]+1; j++)
2863 dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
2866 else
2868 comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
2869 comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
2871 cellsize = cell_dx*ddbox->skew_fac[d];
2872 while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
2874 npulse[d]++;
2876 cellsize_min[d] = cellsize;
2878 else
2880 /* Statically load balanced grid */
2881 /* Also when we are not doing a master distribution we determine
2882 * all cell borders in a loop to obtain identical values
2883 * to the master distribution case and to determine npulse.
2885 if (bMaster)
2887 cell_x = dd->ma->cell_x[d];
2889 else
2891 snew(cell_x,dd->nc[d]+1);
2893 cell_x[0] = ddbox->box0[d];
2894 for(j=0; j<dd->nc[d]; j++)
2896 cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
2897 cell_x[j+1] = cell_x[j] + cell_dx;
2898 cellsize = cell_dx*ddbox->skew_fac[d];
2899 while (cellsize*npulse[d] < comm->cutoff &&
2900 npulse[d] < dd->nc[d]-1)
2902 npulse[d]++;
2904 cellsize_min[d] = min(cellsize_min[d],cellsize);
2906 if (!bMaster)
2908 comm->cell_x0[d] = cell_x[dd->ci[d]];
2909 comm->cell_x1[d] = cell_x[dd->ci[d]+1];
2910 sfree(cell_x);
2913 /* The following limitation is to avoid that a cell would receive
2914 * some of its own home charge groups back over the periodic boundary.
2915 * Double charge groups cause trouble with the global indices.
2917 if (d < ddbox->npbcdim &&
2918 dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
2920 gmx_fatal_collective(FARGS,DDMASTER(dd),
2921 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
2922 dim2char(d),ddbox->box_size[d],ddbox->skew_fac[d],
2923 comm->cutoff,
2924 dd->nc[d],dd->nc[d],
2925 dd->nnodes > dd->nc[d] ? "cells" : "processors");
2929 if (!comm->bDynLoadBal)
2931 copy_rvec(cellsize_min,comm->cellsize_min);
2934 for(d=0; d<comm->npmedecompdim; d++)
2936 set_pme_maxshift(dd,&comm->ddpme[d],
2937 comm->slb_frac[dd->dim[d]]==NULL,ddbox,
2938 comm->ddpme[d].slb_dim_f);
2943 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
2944 int d,int dim,gmx_domdec_root_t *root,
2945 gmx_ddbox_t *ddbox,
2946 bool bUniform,gmx_large_int_t step, real cellsize_limit_f, int range[])
2948 gmx_domdec_comm_t *comm;
2949 int ncd,i,j,nmin,nmin_old;
2950 bool bLimLo,bLimHi;
2951 real *cell_size;
2952 real fac,halfway,cellsize_limit_f_i,region_size;
2953 bool bPBC,bLastHi=FALSE;
2954 int nrange[]={range[0],range[1]};
2956 region_size= root->cell_f[range[1]]-root->cell_f[range[0]];
2958 comm = dd->comm;
2960 ncd = dd->nc[dim];
2962 bPBC = (dim < ddbox->npbcdim);
2964 cell_size = root->buf_ncd;
2966 if (debug)
2968 fprintf(debug,"enforce_limits: %d %d\n",range[0],range[1]);
2971 /* First we need to check if the scaling does not make cells
2972 * smaller than the smallest allowed size.
2973 * We need to do this iteratively, since if a cell is too small,
2974 * it needs to be enlarged, which makes all the other cells smaller,
2975 * which could in turn make another cell smaller than allowed.
2977 for(i=range[0]; i<range[1]; i++)
2979 root->bCellMin[i] = FALSE;
2981 nmin = 0;
2984 nmin_old = nmin;
2985 /* We need the total for normalization */
2986 fac = 0;
2987 for(i=range[0]; i<range[1]; i++)
2989 if (root->bCellMin[i] == FALSE)
2991 fac += cell_size[i];
2994 fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
2995 /* Determine the cell boundaries */
2996 for(i=range[0]; i<range[1]; i++)
2998 if (root->bCellMin[i] == FALSE)
3000 cell_size[i] *= fac;
3001 if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
3003 cellsize_limit_f_i = 0;
3005 else
3007 cellsize_limit_f_i = cellsize_limit_f;
3009 if (cell_size[i] < cellsize_limit_f_i)
3011 root->bCellMin[i] = TRUE;
3012 cell_size[i] = cellsize_limit_f_i;
3013 nmin++;
3016 root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
3019 while (nmin > nmin_old);
3021 i=range[1]-1;
3022 cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
3023 /* For this check we should not use DD_CELL_MARGIN,
3024 * but a slightly smaller factor,
3025 * since rounding could get use below the limit.
3027 if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
3029 char buf[22];
3030 gmx_fatal(FARGS,"Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3031 gmx_step_str(step,buf),
3032 dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
3033 ncd,comm->cellsize_min[dim]);
3036 root->bLimited = (nmin > 0) || (range[0]>0) || (range[1]<ncd);
3038 if (!bUniform)
3040 /* Check if the boundary did not displace more than halfway
3041 * each of the cells it bounds, as this could cause problems,
3042 * especially when the differences between cell sizes are large.
3043 * If changes are applied, they will not make cells smaller
3044 * than the cut-off, as we check all the boundaries which
3045 * might be affected by a change and if the old state was ok,
3046 * the cells will at most be shrunk back to their old size.
3048 for(i=range[0]+1; i<range[1]; i++)
3050 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
3051 if (root->cell_f[i] < halfway)
3053 root->cell_f[i] = halfway;
3054 /* Check if the change also causes shifts of the next boundaries */
3055 for(j=i+1; j<range[1]; j++)
3057 if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
3058 root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
3061 halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
3062 if (root->cell_f[i] > halfway)
3064 root->cell_f[i] = halfway;
3065 /* Check if the change also causes shifts of the next boundaries */
3066 for(j=i-1; j>=range[0]+1; j--)
3068 if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
3069 root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
3075 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3076 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3077 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3078 * for a and b nrange is used */
3079 if (d > 0)
3081 /* Take care of the staggering of the cell boundaries */
3082 if (bUniform)
3084 for(i=range[0]; i<range[1]; i++)
3086 root->cell_f_max0[i] = root->cell_f[i];
3087 root->cell_f_min1[i] = root->cell_f[i+1];
3090 else
3092 for(i=range[0]+1; i<range[1]; i++)
3094 bLimLo = (root->cell_f[i] < root->bound_min[i]);
3095 bLimHi = (root->cell_f[i] > root->bound_max[i]);
3096 if (bLimLo && bLimHi)
3098 /* Both limits violated, try the best we can */
3099 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3100 root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
3101 nrange[0]=range[0];
3102 nrange[1]=i;
3103 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3105 nrange[0]=i;
3106 nrange[1]=range[1];
3107 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3109 return;
3111 else if (bLimLo)
3113 /* root->cell_f[i] = root->bound_min[i]; */
3114 nrange[1]=i; /* only store violation location. There could be a LimLo violation following with an higher index */
3115 bLastHi=FALSE;
3117 else if (bLimHi && !bLastHi)
3119 bLastHi=TRUE;
3120 if (nrange[1] < range[1]) /* found a LimLo before */
3122 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3123 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3124 nrange[0]=nrange[1];
3126 root->cell_f[i] = root->bound_max[i];
3127 nrange[1]=i;
3128 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3129 nrange[0]=i;
3130 nrange[1]=range[1];
3133 if (nrange[1] < range[1]) /* found last a LimLo */
3135 root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
3136 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3137 nrange[0]=nrange[1];
3138 nrange[1]=range[1];
3139 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3141 else if (nrange[0] > range[0]) /* found at least one LimHi */
3143 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
3150 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
3151 int d,int dim,gmx_domdec_root_t *root,
3152 gmx_ddbox_t *ddbox,bool bDynamicBox,
3153 bool bUniform,gmx_large_int_t step)
3155 gmx_domdec_comm_t *comm;
3156 int ncd,d1,i,j,pos;
3157 real *cell_size;
3158 real load_aver,load_i,imbalance,change,change_max,sc;
3159 real cellsize_limit_f,dist_min_f,dist_min_f_hard,space;
3160 real change_limit = 0.1;
3161 real relax = 0.5;
3162 bool bPBC;
3163 int range[] = { 0, 0 };
3165 comm = dd->comm;
3167 ncd = dd->nc[dim];
3169 bPBC = (dim < ddbox->npbcdim);
3171 cell_size = root->buf_ncd;
3173 /* Store the original boundaries */
3174 for(i=0; i<ncd+1; i++)
3176 root->old_cell_f[i] = root->cell_f[i];
3178 if (bUniform) {
3179 for(i=0; i<ncd; i++)
3181 cell_size[i] = 1.0/ncd;
3184 else if (dd_load_count(comm))
3186 load_aver = comm->load[d].sum_m/ncd;
3187 change_max = 0;
3188 for(i=0; i<ncd; i++)
3190 /* Determine the relative imbalance of cell i */
3191 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3192 imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
3193 /* Determine the change of the cell size using underrelaxation */
3194 change = -relax*imbalance;
3195 change_max = max(change_max,max(change,-change));
3197 /* Limit the amount of scaling.
3198 * We need to use the same rescaling for all cells in one row,
3199 * otherwise the load balancing might not converge.
3201 sc = relax;
3202 if (change_max > change_limit)
3204 sc *= change_limit/change_max;
3206 for(i=0; i<ncd; i++)
3208 /* Determine the relative imbalance of cell i */
3209 load_i = comm->load[d].load[i*comm->load[d].nload+2];
3210 imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
3211 /* Determine the change of the cell size using underrelaxation */
3212 change = -sc*imbalance;
3213 cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
3217 cellsize_limit_f = comm->cellsize_min[dim]/ddbox->box_size[dim];
3218 cellsize_limit_f *= DD_CELL_MARGIN;
3219 dist_min_f_hard = grid_jump_limit(comm,d)/ddbox->box_size[dim];
3220 dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
3221 if (ddbox->tric_dir[dim])
3223 cellsize_limit_f /= ddbox->skew_fac[dim];
3224 dist_min_f /= ddbox->skew_fac[dim];
3226 if (bDynamicBox && d > 0)
3228 dist_min_f *= DD_PRES_SCALE_MARGIN;
3230 if (d > 0 && !bUniform)
3232 /* Make sure that the grid is not shifted too much */
3233 for(i=1; i<ncd; i++) {
3234 if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
3236 gmx_incons("Inconsistent DD boundary staggering limits!");
3238 root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
3239 space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
3240 if (space > 0) {
3241 root->bound_min[i] += 0.5*space;
3243 root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
3244 space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
3245 if (space < 0) {
3246 root->bound_max[i] += 0.5*space;
3248 if (debug)
3250 fprintf(debug,
3251 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3252 d,i,
3253 root->cell_f_max0[i-1] + dist_min_f,
3254 root->bound_min[i],root->cell_f[i],root->bound_max[i],
3255 root->cell_f_min1[i] - dist_min_f);
3259 range[1]=ncd;
3260 root->cell_f[0] = 0;
3261 root->cell_f[ncd] = 1;
3262 dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
3265 /* After the checks above, the cells should obey the cut-off
3266 * restrictions, but it does not hurt to check.
3268 for(i=0; i<ncd; i++)
3270 if (debug)
3272 fprintf(debug,"Relative bounds dim %d cell %d: %f %f\n",
3273 dim,i,root->cell_f[i],root->cell_f[i+1]);
3276 if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
3277 root->cell_f[i+1] - root->cell_f[i] <
3278 cellsize_limit_f/DD_CELL_MARGIN)
3280 char buf[22];
3281 fprintf(stderr,
3282 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3283 gmx_step_str(step,buf),dim2char(dim),i,
3284 (root->cell_f[i+1] - root->cell_f[i])
3285 *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
3289 pos = ncd + 1;
3290 /* Store the cell boundaries of the lower dimensions at the end */
3291 for(d1=0; d1<d; d1++)
3293 root->cell_f[pos++] = comm->cell_f0[d1];
3294 root->cell_f[pos++] = comm->cell_f1[d1];
3297 if (d < comm->npmedecompdim)
3299 /* The master determines the maximum shift for
3300 * the coordinate communication between separate PME nodes.
3302 set_pme_maxshift(dd,&comm->ddpme[d],bUniform,ddbox,root->cell_f);
3304 root->cell_f[pos++] = comm->ddpme[0].maxshift;
3305 if (d >= 1)
3307 root->cell_f[pos++] = comm->ddpme[1].maxshift;
3311 static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
3312 gmx_ddbox_t *ddbox,int dimind)
3314 gmx_domdec_comm_t *comm;
3315 int dim;
3317 comm = dd->comm;
3319 /* Set the cell dimensions */
3320 dim = dd->dim[dimind];
3321 comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
3322 comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
3323 if (dim >= ddbox->nboundeddim)
3325 comm->cell_x0[dim] += ddbox->box0[dim];
3326 comm->cell_x1[dim] += ddbox->box0[dim];
3330 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3331 int d,int dim,real *cell_f_row,
3332 gmx_ddbox_t *ddbox)
3334 gmx_domdec_comm_t *comm;
3335 int d1,dim1,pos;
3337 comm = dd->comm;
3339 #ifdef GMX_MPI
3340 /* Each node would only need to know two fractions,
3341 * but it is probably cheaper to broadcast the whole array.
3343 MPI_Bcast(cell_f_row,DD_CELL_F_SIZE(dd,d)*sizeof(real),MPI_BYTE,
3344 0,comm->mpi_comm_load[d]);
3345 #endif
3346 /* Copy the fractions for this dimension from the buffer */
3347 comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
3348 comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
3349 /* The whole array was communicated, so set the buffer position */
3350 pos = dd->nc[dim] + 1;
3351 for(d1=0; d1<=d; d1++)
3353 if (d1 < d)
3355 /* Copy the cell fractions of the lower dimensions */
3356 comm->cell_f0[d1] = cell_f_row[pos++];
3357 comm->cell_f1[d1] = cell_f_row[pos++];
3359 relative_to_absolute_cell_bounds(dd,ddbox,d1);
3361 /* Convert the communicated shift from float to int */
3362 comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
3363 if (d >= 1)
3365 comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
3369 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
3370 gmx_ddbox_t *ddbox,bool bDynamicBox,
3371 bool bUniform,gmx_large_int_t step)
3373 gmx_domdec_comm_t *comm;
3374 int d,dim,d1;
3375 bool bRowMember,bRowRoot;
3376 real *cell_f_row;
3378 comm = dd->comm;
3380 for(d=0; d<dd->ndim; d++)
3382 dim = dd->dim[d];
3383 bRowMember = TRUE;
3384 bRowRoot = TRUE;
3385 for(d1=d; d1<dd->ndim; d1++)
3387 if (dd->ci[dd->dim[d1]] > 0)
3389 if (d1 > d)
3391 bRowMember = FALSE;
3393 bRowRoot = FALSE;
3396 if (bRowMember)
3398 if (bRowRoot)
3400 set_dd_cell_sizes_dlb_root(dd,d,dim,comm->root[d],
3401 ddbox,bDynamicBox,bUniform,step);
3402 cell_f_row = comm->root[d]->cell_f;
3404 else
3406 cell_f_row = comm->cell_f_row;
3408 distribute_dd_cell_sizes_dlb(dd,d,dim,cell_f_row,ddbox);
3413 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
3415 int d;
3417 /* This function assumes the box is static and should therefore
3418 * not be called when the box has changed since the last
3419 * call to dd_partition_system.
3421 for(d=0; d<dd->ndim; d++)
3423 relative_to_absolute_cell_bounds(dd,ddbox,d);
3429 static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
3430 gmx_ddbox_t *ddbox,bool bDynamicBox,
3431 bool bUniform,bool bDoDLB,gmx_large_int_t step,
3432 gmx_wallcycle_t wcycle)
3434 gmx_domdec_comm_t *comm;
3435 int dim;
3437 comm = dd->comm;
3439 if (bDoDLB)
3441 wallcycle_start(wcycle,ewcDDCOMMBOUND);
3442 set_dd_cell_sizes_dlb_change(dd,ddbox,bDynamicBox,bUniform,step);
3443 wallcycle_stop(wcycle,ewcDDCOMMBOUND);
3445 else if (bDynamicBox)
3447 set_dd_cell_sizes_dlb_nochange(dd,ddbox);
3450 /* Set the dimensions for which no DD is used */
3451 for(dim=0; dim<DIM; dim++) {
3452 if (dd->nc[dim] == 1) {
3453 comm->cell_x0[dim] = 0;
3454 comm->cell_x1[dim] = ddbox->box_size[dim];
3455 if (dim >= ddbox->nboundeddim)
3457 comm->cell_x0[dim] += ddbox->box0[dim];
3458 comm->cell_x1[dim] += ddbox->box0[dim];
3464 static void realloc_comm_ind(gmx_domdec_t *dd,ivec npulse)
3466 int d,np,i;
3467 gmx_domdec_comm_dim_t *cd;
3469 for(d=0; d<dd->ndim; d++)
3471 cd = &dd->comm->cd[d];
3472 np = npulse[dd->dim[d]];
3473 if (np > cd->np_nalloc)
3475 if (debug)
3477 fprintf(debug,"(Re)allocing cd for %c to %d pulses\n",
3478 dim2char(dd->dim[d]),np);
3480 if (DDMASTER(dd) && cd->np_nalloc > 0)
3482 fprintf(stderr,"\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n",dim2char(dd->dim[d]),np);
3484 srenew(cd->ind,np);
3485 for(i=cd->np_nalloc; i<np; i++)
3487 cd->ind[i].index = NULL;
3488 cd->ind[i].nalloc = 0;
3490 cd->np_nalloc = np;
3492 cd->np = np;
3497 static void set_dd_cell_sizes(gmx_domdec_t *dd,
3498 gmx_ddbox_t *ddbox,bool bDynamicBox,
3499 bool bUniform,bool bDoDLB,gmx_large_int_t step,
3500 gmx_wallcycle_t wcycle)
3502 gmx_domdec_comm_t *comm;
3503 int d;
3504 ivec npulse;
3506 comm = dd->comm;
3508 /* Copy the old cell boundaries for the cg displacement check */
3509 copy_rvec(comm->cell_x0,comm->old_cell_x0);
3510 copy_rvec(comm->cell_x1,comm->old_cell_x1);
3512 if (comm->bDynLoadBal)
3514 if (DDMASTER(dd))
3516 check_box_size(dd,ddbox);
3518 set_dd_cell_sizes_dlb(dd,ddbox,bDynamicBox,bUniform,bDoDLB,step,wcycle);
3520 else
3522 set_dd_cell_sizes_slb(dd,ddbox,FALSE,npulse);
3523 realloc_comm_ind(dd,npulse);
3526 if (debug)
3528 for(d=0; d<DIM; d++)
3530 fprintf(debug,"cell_x[%d] %f - %f skew_fac %f\n",
3531 d,comm->cell_x0[d],comm->cell_x1[d],ddbox->skew_fac[d]);
3536 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
3537 gmx_ddbox_t *ddbox,
3538 rvec cell_ns_x0,rvec cell_ns_x1,
3539 gmx_large_int_t step)
3541 gmx_domdec_comm_t *comm;
3542 int dim_ind,dim;
3544 comm = dd->comm;
3546 for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
3548 dim = dd->dim[dim_ind];
3550 /* Without PBC we don't have restrictions on the outer cells */
3551 if (!(dim >= ddbox->npbcdim &&
3552 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
3553 comm->bDynLoadBal &&
3554 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
3555 comm->cellsize_min[dim])
3557 char buf[22];
3558 gmx_fatal(FARGS,"Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3559 gmx_step_str(step,buf),dim2char(dim),
3560 comm->cell_x1[dim] - comm->cell_x0[dim],
3561 ddbox->skew_fac[dim],
3562 dd->comm->cellsize_min[dim],
3563 dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
3567 if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
3569 /* Communicate the boundaries and update cell_ns_x0/1 */
3570 dd_move_cellx(dd,ddbox,cell_ns_x0,cell_ns_x1);
3571 if (dd->bGridJump && dd->ndim > 1)
3573 check_grid_jump(step,dd,ddbox);
3578 static void make_tric_corr_matrix(int npbcdim,matrix box,matrix tcm)
3580 if (YY < npbcdim)
3582 tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
3584 else
3586 tcm[YY][XX] = 0;
3588 if (ZZ < npbcdim)
3590 tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
3591 tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
3593 else
3595 tcm[ZZ][XX] = 0;
3596 tcm[ZZ][YY] = 0;
3600 static void check_screw_box(matrix box)
3602 /* Mathematical limitation */
3603 if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
3605 gmx_fatal(FARGS,"With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3608 /* Limitation due to the asymmetry of the eighth shell method */
3609 if (box[ZZ][YY] != 0)
3611 gmx_fatal(FARGS,"pbc=screw with non-zero box_zy is not supported");
3615 static void distribute_cg(FILE *fplog,gmx_large_int_t step,
3616 matrix box,ivec tric_dir,t_block *cgs,rvec pos[],
3617 gmx_domdec_t *dd)
3619 gmx_domdec_master_t *ma;
3620 int **tmp_ind=NULL,*tmp_nalloc=NULL;
3621 int i,icg,j,k,k0,k1,d,npbcdim;
3622 matrix tcm;
3623 rvec box_size,cg_cm;
3624 ivec ind;
3625 real nrcg,inv_ncg,pos_d;
3626 atom_id *cgindex;
3627 bool bUnbounded,bScrew;
3629 ma = dd->ma;
3631 if (tmp_ind == NULL)
3633 snew(tmp_nalloc,dd->nnodes);
3634 snew(tmp_ind,dd->nnodes);
3635 for(i=0; i<dd->nnodes; i++)
3637 tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
3638 snew(tmp_ind[i],tmp_nalloc[i]);
3642 /* Clear the count */
3643 for(i=0; i<dd->nnodes; i++)
3645 ma->ncg[i] = 0;
3646 ma->nat[i] = 0;
3649 make_tric_corr_matrix(dd->npbcdim,box,tcm);
3651 cgindex = cgs->index;
3653 /* Compute the center of geometry for all charge groups */
3654 for(icg=0; icg<cgs->nr; icg++)
3656 k0 = cgindex[icg];
3657 k1 = cgindex[icg+1];
3658 nrcg = k1 - k0;
3659 if (nrcg == 1)
3661 copy_rvec(pos[k0],cg_cm);
3663 else
3665 inv_ncg = 1.0/nrcg;
3667 clear_rvec(cg_cm);
3668 for(k=k0; (k<k1); k++)
3670 rvec_inc(cg_cm,pos[k]);
3672 for(d=0; (d<DIM); d++)
3674 cg_cm[d] *= inv_ncg;
3677 /* Put the charge group in the box and determine the cell index */
3678 for(d=DIM-1; d>=0; d--) {
3679 pos_d = cg_cm[d];
3680 if (d < dd->npbcdim)
3682 bScrew = (dd->bScrewPBC && d == XX);
3683 if (tric_dir[d] && dd->nc[d] > 1)
3685 /* Use triclinic coordintates for this dimension */
3686 for(j=d+1; j<DIM; j++)
3688 pos_d += cg_cm[j]*tcm[j][d];
3691 while(pos_d >= box[d][d])
3693 pos_d -= box[d][d];
3694 rvec_dec(cg_cm,box[d]);
3695 if (bScrew)
3697 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3698 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3700 for(k=k0; (k<k1); k++)
3702 rvec_dec(pos[k],box[d]);
3703 if (bScrew)
3705 pos[k][YY] = box[YY][YY] - pos[k][YY];
3706 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3710 while(pos_d < 0)
3712 pos_d += box[d][d];
3713 rvec_inc(cg_cm,box[d]);
3714 if (bScrew)
3716 cg_cm[YY] = box[YY][YY] - cg_cm[YY];
3717 cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
3719 for(k=k0; (k<k1); k++)
3721 rvec_inc(pos[k],box[d]);
3722 if (bScrew) {
3723 pos[k][YY] = box[YY][YY] - pos[k][YY];
3724 pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
3729 /* This could be done more efficiently */
3730 ind[d] = 0;
3731 while(ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
3733 ind[d]++;
3736 i = dd_index(dd->nc,ind);
3737 if (ma->ncg[i] == tmp_nalloc[i])
3739 tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
3740 srenew(tmp_ind[i],tmp_nalloc[i]);
3742 tmp_ind[i][ma->ncg[i]] = icg;
3743 ma->ncg[i]++;
3744 ma->nat[i] += cgindex[icg+1] - cgindex[icg];
3747 k1 = 0;
3748 for(i=0; i<dd->nnodes; i++)
3750 ma->index[i] = k1;
3751 for(k=0; k<ma->ncg[i]; k++)
3753 ma->cg[k1++] = tmp_ind[i][k];
3756 ma->index[dd->nnodes] = k1;
3758 for(i=0; i<dd->nnodes; i++)
3760 sfree(tmp_ind[i]);
3762 sfree(tmp_ind);
3763 sfree(tmp_nalloc);
3765 if (fplog)
3767 char buf[22];
3768 fprintf(fplog,"Charge group distribution at step %s:",
3769 gmx_step_str(step,buf));
3770 for(i=0; i<dd->nnodes; i++)
3772 fprintf(fplog," %d",ma->ncg[i]);
3774 fprintf(fplog,"\n");
3778 static void get_cg_distribution(FILE *fplog,gmx_large_int_t step,gmx_domdec_t *dd,
3779 t_block *cgs,matrix box,gmx_ddbox_t *ddbox,
3780 rvec pos[])
3782 gmx_domdec_master_t *ma=NULL;
3783 ivec npulse;
3784 int i,cg_gl;
3785 int *ibuf,buf2[2] = { 0, 0 };
3787 if (DDMASTER(dd))
3789 ma = dd->ma;
3791 if (dd->bScrewPBC)
3793 check_screw_box(box);
3796 set_dd_cell_sizes_slb(dd,ddbox,TRUE,npulse);
3798 distribute_cg(fplog,step,box,ddbox->tric_dir,cgs,pos,dd);
3799 for(i=0; i<dd->nnodes; i++)
3801 ma->ibuf[2*i] = ma->ncg[i];
3802 ma->ibuf[2*i+1] = ma->nat[i];
3804 ibuf = ma->ibuf;
3806 else
3808 ibuf = NULL;
3810 dd_scatter(dd,2*sizeof(int),ibuf,buf2);
3812 dd->ncg_home = buf2[0];
3813 dd->nat_home = buf2[1];
3814 dd->ncg_tot = dd->ncg_home;
3815 dd->nat_tot = dd->nat_home;
3816 if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
3818 dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
3819 srenew(dd->index_gl,dd->cg_nalloc);
3820 srenew(dd->cgindex,dd->cg_nalloc+1);
3822 if (DDMASTER(dd))
3824 for(i=0; i<dd->nnodes; i++)
3826 ma->ibuf[i] = ma->ncg[i]*sizeof(int);
3827 ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
3831 dd_scatterv(dd,
3832 DDMASTER(dd) ? ma->ibuf : NULL,
3833 DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
3834 DDMASTER(dd) ? ma->cg : NULL,
3835 dd->ncg_home*sizeof(int),dd->index_gl);
3837 /* Determine the home charge group sizes */
3838 dd->cgindex[0] = 0;
3839 for(i=0; i<dd->ncg_home; i++)
3841 cg_gl = dd->index_gl[i];
3842 dd->cgindex[i+1] =
3843 dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
3846 if (debug)
3848 fprintf(debug,"Home charge groups:\n");
3849 for(i=0; i<dd->ncg_home; i++)
3851 fprintf(debug," %d",dd->index_gl[i]);
3852 if (i % 10 == 9)
3853 fprintf(debug,"\n");
3855 fprintf(debug,"\n");
3859 static int compact_and_copy_vec_at(int ncg,int *move,
3860 int *cgindex,
3861 int nvec,int vec,
3862 rvec *src,gmx_domdec_comm_t *comm,
3863 bool bCompact)
3865 int m,icg,i,i0,i1,nrcg;
3866 int home_pos;
3867 int pos_vec[DIM*2];
3869 home_pos = 0;
3871 for(m=0; m<DIM*2; m++)
3873 pos_vec[m] = 0;
3876 i0 = 0;
3877 for(icg=0; icg<ncg; icg++)
3879 i1 = cgindex[icg+1];
3880 m = move[icg];
3881 if (m == -1)
3883 if (bCompact)
3885 /* Compact the home array in place */
3886 for(i=i0; i<i1; i++)
3888 copy_rvec(src[i],src[home_pos++]);
3892 else
3894 /* Copy to the communication buffer */
3895 nrcg = i1 - i0;
3896 pos_vec[m] += 1 + vec*nrcg;
3897 for(i=i0; i<i1; i++)
3899 copy_rvec(src[i],comm->cgcm_state[m][pos_vec[m]++]);
3901 pos_vec[m] += (nvec - vec - 1)*nrcg;
3903 if (!bCompact)
3905 home_pos += i1 - i0;
3907 i0 = i1;
3910 return home_pos;
3913 static int compact_and_copy_vec_cg(int ncg,int *move,
3914 int *cgindex,
3915 int nvec,rvec *src,gmx_domdec_comm_t *comm,
3916 bool bCompact)
3918 int m,icg,i0,i1,nrcg;
3919 int home_pos;
3920 int pos_vec[DIM*2];
3922 home_pos = 0;
3924 for(m=0; m<DIM*2; m++)
3926 pos_vec[m] = 0;
3929 i0 = 0;
3930 for(icg=0; icg<ncg; icg++)
3932 i1 = cgindex[icg+1];
3933 m = move[icg];
3934 if (m == -1)
3936 if (bCompact)
3938 /* Compact the home array in place */
3939 copy_rvec(src[icg],src[home_pos++]);
3942 else
3944 nrcg = i1 - i0;
3945 /* Copy to the communication buffer */
3946 copy_rvec(src[icg],comm->cgcm_state[m][pos_vec[m]]);
3947 pos_vec[m] += 1 + nrcg*nvec;
3949 i0 = i1;
3951 if (!bCompact)
3953 home_pos = ncg;
3956 return home_pos;
3959 static int compact_ind(int ncg,int *move,
3960 int *index_gl,int *cgindex,
3961 int *gatindex,
3962 gmx_ga2la_t ga2la,char *bLocalCG,
3963 int *cginfo)
3965 int cg,nat,a0,a1,a,a_gl;
3966 int home_pos;
3968 home_pos = 0;
3969 nat = 0;
3970 for(cg=0; cg<ncg; cg++)
3972 a0 = cgindex[cg];
3973 a1 = cgindex[cg+1];
3974 if (move[cg] == -1)
3976 /* Compact the home arrays in place.
3977 * Anything that can be done here avoids access to global arrays.
3979 cgindex[home_pos] = nat;
3980 for(a=a0; a<a1; a++)
3982 a_gl = gatindex[a];
3983 gatindex[nat] = a_gl;
3984 /* The cell number stays 0, so we don't need to set it */
3985 ga2la_change_la(ga2la,a_gl,nat);
3986 nat++;
3988 index_gl[home_pos] = index_gl[cg];
3989 cginfo[home_pos] = cginfo[cg];
3990 /* The charge group remains local, so bLocalCG does not change */
3991 home_pos++;
3993 else
3995 /* Clear the global indices */
3996 for(a=a0; a<a1; a++)
3998 ga2la_del(ga2la,gatindex[a]);
4000 if (bLocalCG)
4002 bLocalCG[index_gl[cg]] = FALSE;
4006 cgindex[home_pos] = nat;
4008 return home_pos;
4011 static void clear_and_mark_ind(int ncg,int *move,
4012 int *index_gl,int *cgindex,int *gatindex,
4013 gmx_ga2la_t ga2la,char *bLocalCG,
4014 int *cell_index)
4016 int cg,a0,a1,a;
4018 for(cg=0; cg<ncg; cg++)
4020 if (move[cg] >= 0)
4022 a0 = cgindex[cg];
4023 a1 = cgindex[cg+1];
4024 /* Clear the global indices */
4025 for(a=a0; a<a1; a++)
4027 ga2la_del(ga2la,gatindex[a]);
4029 if (bLocalCG)
4031 bLocalCG[index_gl[cg]] = FALSE;
4033 /* Signal that this cg has moved using the ns cell index.
4034 * Here we set it to -1.
4035 * fill_grid will change it from -1 to 4*grid->ncells.
4037 cell_index[cg] = -1;
4042 static void print_cg_move(FILE *fplog,
4043 gmx_domdec_t *dd,
4044 gmx_large_int_t step,int cg,int dim,int dir,
4045 real limitd,
4046 rvec cm_old,rvec cm_new,real pos_d)
4048 gmx_domdec_comm_t *comm;
4049 char buf[22];
4051 comm = dd->comm;
4053 fprintf(fplog,"\nStep %s:\n",gmx_step_str(step,buf));
4054 fprintf(fplog,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition (%f) in direction %c\n",
4055 ddglatnr(dd,dd->cgindex[cg]),limitd,dim2char(dim));
4056 fprintf(fplog,"distance out of cell %f\n",
4057 dir==1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
4058 fprintf(fplog,"Old coordinates: %8.3f %8.3f %8.3f\n",
4059 cm_old[XX],cm_old[YY],cm_old[ZZ]);
4060 fprintf(fplog,"New coordinates: %8.3f %8.3f %8.3f\n",
4061 cm_new[XX],cm_new[YY],cm_new[ZZ]);
4062 fprintf(fplog,"Old cell boundaries in direction %c: %8.3f %8.3f\n",
4063 dim2char(dim),
4064 comm->old_cell_x0[dim],comm->old_cell_x1[dim]);
4065 fprintf(fplog,"New cell boundaries in direction %c: %8.3f %8.3f\n",
4066 dim2char(dim),
4067 comm->cell_x0[dim],comm->cell_x1[dim]);
4070 static void cg_move_error(FILE *fplog,
4071 gmx_domdec_t *dd,
4072 gmx_large_int_t step,int cg,int dim,int dir,
4073 real limitd,
4074 rvec cm_old,rvec cm_new,real pos_d)
4076 if (fplog)
4078 print_cg_move(fplog, dd,step,cg,dim,dir,limitd,cm_old,cm_new,pos_d);
4080 print_cg_move(stderr,dd,step,cg,dim,dir,limitd,cm_old,cm_new,pos_d);
4081 gmx_fatal(FARGS,
4082 "A charge group moved too far between two domain decomposition steps\n"
4083 "This usually means that your system is not well equilibrated");
4086 static void rotate_state_atom(t_state *state,int a)
4088 int est;
4090 for(est=0; est<estNR; est++)
4092 if (EST_DISTR(est) && state->flags & (1<<est)) {
4093 switch (est) {
4094 case estX:
4095 /* Rotate the complete state; for a rectangular box only */
4096 state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
4097 state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
4098 break;
4099 case estV:
4100 state->v[a][YY] = -state->v[a][YY];
4101 state->v[a][ZZ] = -state->v[a][ZZ];
4102 break;
4103 case estSDX:
4104 state->sd_X[a][YY] = -state->sd_X[a][YY];
4105 state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
4106 break;
4107 case estCGP:
4108 state->cg_p[a][YY] = -state->cg_p[a][YY];
4109 state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
4110 break;
4111 case estDISRE_INITF:
4112 case estDISRE_RM3TAV:
4113 case estORIRE_INITF:
4114 case estORIRE_DTAV:
4115 /* These are distances, so not affected by rotation */
4116 break;
4117 default:
4118 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4124 static int dd_redistribute_cg(FILE *fplog,gmx_large_int_t step,
4125 gmx_domdec_t *dd,ivec tric_dir,
4126 t_state *state,rvec **f,
4127 t_forcerec *fr,t_mdatoms *md,
4128 bool bCompact,
4129 t_nrnb *nrnb)
4131 int *move;
4132 int npbcdim;
4133 int ncg[DIM*2],nat[DIM*2];
4134 int c,i,cg,k,k0,k1,d,dim,dim2,dir,d2,d3,d4,cell_d;
4135 int mc,cdd,nrcg,ncg_recv,nat_recv,nvs,nvr,nvec,vec;
4136 int sbuf[2],rbuf[2];
4137 int home_pos_cg,home_pos_at,ncg_stay_home,buf_pos;
4138 int flag;
4139 bool bV=FALSE,bSDX=FALSE,bCGP=FALSE;
4140 bool bScrew;
4141 ivec dev;
4142 real inv_ncg,pos_d;
4143 matrix tcm;
4144 rvec *cg_cm,cell_x0,cell_x1,limitd,limit0,limit1,cm_new;
4145 atom_id *cgindex;
4146 cginfo_mb_t *cginfo_mb;
4147 gmx_domdec_comm_t *comm;
4149 if (dd->bScrewPBC)
4151 check_screw_box(state->box);
4154 comm = dd->comm;
4155 cg_cm = fr->cg_cm;
4157 for(i=0; i<estNR; i++)
4159 if (EST_DISTR(i))
4161 switch (i)
4163 case estX: /* Always present */ break;
4164 case estV: bV = (state->flags & (1<<i)); break;
4165 case estSDX: bSDX = (state->flags & (1<<i)); break;
4166 case estCGP: bCGP = (state->flags & (1<<i)); break;
4167 case estLD_RNG:
4168 case estLD_RNGI:
4169 case estDISRE_INITF:
4170 case estDISRE_RM3TAV:
4171 case estORIRE_INITF:
4172 case estORIRE_DTAV:
4173 /* No processing required */
4174 break;
4175 default:
4176 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4181 if (dd->ncg_tot > comm->nalloc_int)
4183 comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
4184 srenew(comm->buf_int,comm->nalloc_int);
4186 move = comm->buf_int;
4188 /* Clear the count */
4189 for(c=0; c<dd->ndim*2; c++)
4191 ncg[c] = 0;
4192 nat[c] = 0;
4195 npbcdim = dd->npbcdim;
4197 for(d=0; (d<DIM); d++)
4199 limitd[d] = dd->comm->cellsize_min[d];
4200 if (d >= npbcdim && dd->ci[d] == 0)
4202 cell_x0[d] = -GMX_FLOAT_MAX;
4204 else
4206 cell_x0[d] = comm->cell_x0[d];
4208 if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
4210 cell_x1[d] = GMX_FLOAT_MAX;
4212 else
4214 cell_x1[d] = comm->cell_x1[d];
4216 limit0[d] = comm->old_cell_x0[d] - limitd[d];
4217 limit1[d] = comm->old_cell_x1[d] + limitd[d];
4220 make_tric_corr_matrix(npbcdim,state->box,tcm);
4222 cgindex = dd->cgindex;
4224 /* Compute the center of geometry for all home charge groups
4225 * and put them in the box and determine where they should go.
4227 for(cg=0; cg<dd->ncg_home; cg++)
4229 k0 = cgindex[cg];
4230 k1 = cgindex[cg+1];
4231 nrcg = k1 - k0;
4232 if (nrcg == 1)
4234 copy_rvec(state->x[k0],cm_new);
4236 else
4238 inv_ncg = 1.0/nrcg;
4240 clear_rvec(cm_new);
4241 for(k=k0; (k<k1); k++)
4243 rvec_inc(cm_new,state->x[k]);
4245 for(d=0; (d<DIM); d++)
4247 cm_new[d] = inv_ncg*cm_new[d];
4251 clear_ivec(dev);
4252 /* Do pbc and check DD cell boundary crossings */
4253 for(d=DIM-1; d>=0; d--)
4255 if (dd->nc[d] > 1)
4257 bScrew = (dd->bScrewPBC && d == XX);
4258 /* Determine the location of this cg in lattice coordinates */
4259 pos_d = cm_new[d];
4260 if (tric_dir[d])
4262 for(d2=d+1; d2<DIM; d2++)
4264 pos_d += cm_new[d2]*tcm[d2][d];
4267 /* Put the charge group in the triclinic unit-cell */
4268 if (pos_d >= cell_x1[d])
4270 if (pos_d >= limit1[d])
4272 cg_move_error(fplog,dd,step,cg,d,1,limitd[d],
4273 cg_cm[cg],cm_new,pos_d);
4275 dev[d] = 1;
4276 if (dd->ci[d] == dd->nc[d] - 1)
4278 rvec_dec(cm_new,state->box[d]);
4279 if (bScrew)
4281 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4282 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4284 for(k=k0; (k<k1); k++)
4286 rvec_dec(state->x[k],state->box[d]);
4287 if (bScrew)
4289 rotate_state_atom(state,k);
4294 else if (pos_d < cell_x0[d])
4296 if (pos_d < limit0[d])
4298 cg_move_error(fplog,dd,step,cg,d,-1,limitd[d],
4299 cg_cm[cg],cm_new,pos_d);
4301 dev[d] = -1;
4302 if (dd->ci[d] == 0)
4304 rvec_inc(cm_new,state->box[d]);
4305 if (bScrew)
4307 cm_new[YY] = state->box[YY][YY] - cm_new[YY];
4308 cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
4310 for(k=k0; (k<k1); k++)
4312 rvec_inc(state->x[k],state->box[d]);
4313 if (bScrew)
4315 rotate_state_atom(state,k);
4321 else if (d < npbcdim)
4323 /* Put the charge group in the rectangular unit-cell */
4324 while (cm_new[d] >= state->box[d][d])
4326 rvec_dec(cm_new,state->box[d]);
4327 for(k=k0; (k<k1); k++)
4329 rvec_dec(state->x[k],state->box[d]);
4332 while (cm_new[d] < 0)
4334 rvec_inc(cm_new,state->box[d]);
4335 for(k=k0; (k<k1); k++)
4337 rvec_inc(state->x[k],state->box[d]);
4343 copy_rvec(cm_new,cg_cm[cg]);
4345 /* Determine where this cg should go */
4346 flag = 0;
4347 mc = -1;
4348 for(d=0; d<dd->ndim; d++)
4350 dim = dd->dim[d];
4351 if (dev[dim] == 1)
4353 flag |= DD_FLAG_FW(d);
4354 if (mc == -1)
4356 mc = d*2;
4359 else if (dev[dim] == -1)
4361 flag |= DD_FLAG_BW(d);
4362 if (mc == -1) {
4363 if (dd->nc[dim] > 2)
4365 mc = d*2 + 1;
4367 else
4369 mc = d*2;
4374 move[cg] = mc;
4375 if (mc >= 0)
4377 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4379 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4380 srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4382 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
4383 /* We store the cg size in the lower 16 bits
4384 * and the place where the charge group should go
4385 * in the next 6 bits. This saves some communication volume.
4387 comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
4388 ncg[mc] += 1;
4389 nat[mc] += nrcg;
4393 inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
4394 inc_nrnb(nrnb,eNR_RESETX,dd->ncg_home);
4396 nvec = 1;
4397 if (bV)
4399 nvec++;
4401 if (bSDX)
4403 nvec++;
4405 if (bCGP)
4407 nvec++;
4410 /* Make sure the communication buffers are large enough */
4411 for(mc=0; mc<dd->ndim*2; mc++)
4413 nvr = ncg[mc] + nat[mc]*nvec;
4414 if (nvr > comm->cgcm_state_nalloc[mc])
4416 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
4417 srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
4421 /* Recalculating cg_cm might be cheaper than communicating,
4422 * but that could give rise to rounding issues.
4424 home_pos_cg =
4425 compact_and_copy_vec_cg(dd->ncg_home,move,cgindex,
4426 nvec,cg_cm,comm,bCompact);
4428 vec = 0;
4429 home_pos_at =
4430 compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
4431 nvec,vec++,state->x,comm,bCompact);
4432 if (bV)
4434 compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
4435 nvec,vec++,state->v,comm,bCompact);
4437 if (bSDX)
4439 compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
4440 nvec,vec++,state->sd_X,comm,bCompact);
4442 if (bCGP)
4444 compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
4445 nvec,vec++,state->cg_p,comm,bCompact);
4448 if (bCompact)
4450 compact_ind(dd->ncg_home,move,
4451 dd->index_gl,dd->cgindex,dd->gatindex,
4452 dd->ga2la,comm->bLocalCG,
4453 fr->cginfo);
4455 else
4457 clear_and_mark_ind(dd->ncg_home,move,
4458 dd->index_gl,dd->cgindex,dd->gatindex,
4459 dd->ga2la,comm->bLocalCG,
4460 fr->ns.grid->cell_index);
4463 cginfo_mb = fr->cginfo_mb;
4465 ncg_stay_home = home_pos_cg;
4466 for(d=0; d<dd->ndim; d++)
4468 dim = dd->dim[d];
4469 ncg_recv = 0;
4470 nat_recv = 0;
4471 nvr = 0;
4472 for(dir=0; dir<(dd->nc[dim]==2 ? 1 : 2); dir++)
4474 cdd = d*2 + dir;
4475 /* Communicate the cg and atom counts */
4476 sbuf[0] = ncg[cdd];
4477 sbuf[1] = nat[cdd];
4478 if (debug)
4480 fprintf(debug,"Sending ddim %d dir %d: ncg %d nat %d\n",
4481 d,dir,sbuf[0],sbuf[1]);
4483 dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
4485 if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
4487 comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
4488 srenew(comm->buf_int,comm->nalloc_int);
4491 /* Communicate the charge group indices, sizes and flags */
4492 dd_sendrecv_int(dd, d, dir,
4493 comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
4494 comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
4496 nvs = ncg[cdd] + nat[cdd]*nvec;
4497 i = rbuf[0] + rbuf[1] *nvec;
4498 vec_rvec_check_alloc(&comm->vbuf,nvr+i);
4500 /* Communicate cgcm and state */
4501 dd_sendrecv_rvec(dd, d, dir,
4502 comm->cgcm_state[cdd], nvs,
4503 comm->vbuf.v+nvr, i);
4504 ncg_recv += rbuf[0];
4505 nat_recv += rbuf[1];
4506 nvr += i;
4509 /* Process the received charge groups */
4510 buf_pos = 0;
4511 for(cg=0; cg<ncg_recv; cg++)
4513 flag = comm->buf_int[cg*DD_CGIBS+1];
4514 mc = -1;
4515 if (d < dd->ndim-1)
4517 /* Check which direction this cg should go */
4518 for(d2=d+1; (d2<dd->ndim && mc==-1); d2++)
4520 if (dd->bGridJump)
4522 /* The cell boundaries for dimension d2 are not equal
4523 * for each cell row of the lower dimension(s),
4524 * therefore we might need to redetermine where
4525 * this cg should go.
4527 dim2 = dd->dim[d2];
4528 /* If this cg crosses the box boundary in dimension d2
4529 * we can use the communicated flag, so we do not
4530 * have to worry about pbc.
4532 if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
4533 (flag & DD_FLAG_FW(d2))) ||
4534 (dd->ci[dim2] == 0 &&
4535 (flag & DD_FLAG_BW(d2)))))
4537 /* Clear the two flags for this dimension */
4538 flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
4539 /* Determine the location of this cg
4540 * in lattice coordinates
4542 pos_d = comm->vbuf.v[buf_pos][dim2];
4543 if (tric_dir[dim2])
4545 for(d3=dim2+1; d3<DIM; d3++)
4547 pos_d +=
4548 comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
4551 /* Check of we are not at the box edge.
4552 * pbc is only handled in the first step above,
4553 * but this check could move over pbc while
4554 * the first step did not due to different rounding.
4556 if (pos_d >= cell_x1[dim2] &&
4557 dd->ci[dim2] != dd->nc[dim2]-1)
4559 flag |= DD_FLAG_FW(d2);
4561 else if (pos_d < cell_x0[dim2] &&
4562 dd->ci[dim2] != 0)
4564 flag |= DD_FLAG_BW(d2);
4566 comm->buf_int[cg*DD_CGIBS+1] = flag;
4569 /* Set to which neighboring cell this cg should go */
4570 if (flag & DD_FLAG_FW(d2))
4572 mc = d2*2;
4574 else if (flag & DD_FLAG_BW(d2))
4576 if (dd->nc[dd->dim[d2]] > 2)
4578 mc = d2*2+1;
4580 else
4582 mc = d2*2;
4588 nrcg = flag & DD_FLAG_NRCG;
4589 if (mc == -1)
4591 if (home_pos_cg+1 > dd->cg_nalloc)
4593 dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
4594 srenew(dd->index_gl,dd->cg_nalloc);
4595 srenew(dd->cgindex,dd->cg_nalloc+1);
4597 /* Set the global charge group index and size */
4598 dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
4599 dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
4600 /* Copy the state from the buffer */
4601 if (home_pos_cg >= fr->cg_nalloc)
4603 dd_realloc_fr_cg(fr,home_pos_cg+1);
4604 cg_cm = fr->cg_cm;
4606 copy_rvec(comm->vbuf.v[buf_pos++],cg_cm[home_pos_cg]);
4607 /* Set the cginfo */
4608 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
4609 dd->index_gl[home_pos_cg]);
4610 if (comm->bLocalCG)
4612 comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
4615 if (home_pos_at+nrcg > state->nalloc)
4617 dd_realloc_state(state,f,home_pos_at+nrcg);
4619 for(i=0; i<nrcg; i++)
4621 copy_rvec(comm->vbuf.v[buf_pos++],
4622 state->x[home_pos_at+i]);
4624 if (bV)
4626 for(i=0; i<nrcg; i++)
4628 copy_rvec(comm->vbuf.v[buf_pos++],
4629 state->v[home_pos_at+i]);
4632 if (bSDX)
4634 for(i=0; i<nrcg; i++)
4636 copy_rvec(comm->vbuf.v[buf_pos++],
4637 state->sd_X[home_pos_at+i]);
4640 if (bCGP)
4642 for(i=0; i<nrcg; i++)
4644 copy_rvec(comm->vbuf.v[buf_pos++],
4645 state->cg_p[home_pos_at+i]);
4648 home_pos_cg += 1;
4649 home_pos_at += nrcg;
4651 else
4653 /* Reallocate the buffers if necessary */
4654 if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
4656 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
4657 srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
4659 nvr = ncg[mc] + nat[mc]*nvec;
4660 if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
4662 comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
4663 srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
4665 /* Copy from the receive to the send buffers */
4666 memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
4667 comm->buf_int + cg*DD_CGIBS,
4668 DD_CGIBS*sizeof(int));
4669 memcpy(comm->cgcm_state[mc][nvr],
4670 comm->vbuf.v[buf_pos],
4671 (1+nrcg*nvec)*sizeof(rvec));
4672 buf_pos += 1 + nrcg*nvec;
4673 ncg[mc] += 1;
4674 nat[mc] += nrcg;
4679 /* With sorting (!bCompact) the indices are now only partially up to date
4680 * and ncg_home and nat_home are not the real count, since there are
4681 * "holes" in the arrays for the charge groups that moved to neighbors.
4683 dd->ncg_home = home_pos_cg;
4684 dd->nat_home = home_pos_at;
4686 if (debug)
4688 fprintf(debug,"Finished repartitioning\n");
4691 return ncg_stay_home;
4694 void dd_cycles_add(gmx_domdec_t *dd,float cycles,int ddCycl)
4696 dd->comm->cycl[ddCycl] += cycles;
4697 dd->comm->cycl_n[ddCycl]++;
4698 if (cycles > dd->comm->cycl_max[ddCycl])
4700 dd->comm->cycl_max[ddCycl] = cycles;
4704 static double force_flop_count(t_nrnb *nrnb)
4706 int i;
4707 double sum;
4708 const char *name;
4710 sum = 0;
4711 for(i=eNR_NBKERNEL010; i<eNR_NBKERNEL_FREE_ENERGY; i++)
4713 /* To get closer to the real timings, we half the count
4714 * for the normal loops and again half it for water loops.
4716 name = nrnb_str(i);
4717 if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
4719 sum += nrnb->n[i]*0.25*cost_nrnb(i);
4721 else
4723 sum += nrnb->n[i]*0.50*cost_nrnb(i);
4726 for(i=eNR_NBKERNEL_FREE_ENERGY; i<=eNR_NB14; i++)
4728 name = nrnb_str(i);
4729 if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
4730 sum += nrnb->n[i]*cost_nrnb(i);
4732 for(i=eNR_BONDS; i<=eNR_WALLS; i++)
4734 sum += nrnb->n[i]*cost_nrnb(i);
4737 return sum;
4740 void dd_force_flop_start(gmx_domdec_t *dd,t_nrnb *nrnb)
4742 if (dd->comm->eFlop)
4744 dd->comm->flop -= force_flop_count(nrnb);
4747 void dd_force_flop_stop(gmx_domdec_t *dd,t_nrnb *nrnb)
4749 if (dd->comm->eFlop)
4751 dd->comm->flop += force_flop_count(nrnb);
4752 dd->comm->flop_n++;
4756 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
4758 int i;
4760 for(i=0; i<ddCyclNr; i++)
4762 dd->comm->cycl[i] = 0;
4763 dd->comm->cycl_n[i] = 0;
4764 dd->comm->cycl_max[i] = 0;
4766 dd->comm->flop = 0;
4767 dd->comm->flop_n = 0;
4770 static void get_load_distribution(gmx_domdec_t *dd,gmx_wallcycle_t wcycle)
4772 gmx_domdec_comm_t *comm;
4773 gmx_domdec_load_t *load;
4774 gmx_domdec_root_t *root=NULL;
4775 int d,dim,cid,i,pos;
4776 float cell_frac=0,sbuf[DD_NLOAD_MAX];
4777 bool bSepPME;
4779 if (debug)
4781 fprintf(debug,"get_load_distribution start\n");
4784 wallcycle_start(wcycle,ewcDDCOMMLOAD);
4786 comm = dd->comm;
4788 bSepPME = (dd->pme_nodeid >= 0);
4790 for(d=dd->ndim-1; d>=0; d--)
4792 dim = dd->dim[d];
4793 /* Check if we participate in the communication in this dimension */
4794 if (d == dd->ndim-1 ||
4795 (dd->ci[dd->dim[d+1]]==0 && dd->ci[dd->dim[dd->ndim-1]]==0))
4797 load = &comm->load[d];
4798 if (dd->bGridJump)
4800 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
4802 pos = 0;
4803 if (d == dd->ndim-1)
4805 sbuf[pos++] = dd_force_load(comm);
4806 sbuf[pos++] = sbuf[0];
4807 if (dd->bGridJump)
4809 sbuf[pos++] = sbuf[0];
4810 sbuf[pos++] = cell_frac;
4811 if (d > 0)
4813 sbuf[pos++] = comm->cell_f_max0[d];
4814 sbuf[pos++] = comm->cell_f_min1[d];
4817 if (bSepPME)
4819 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
4820 sbuf[pos++] = comm->cycl[ddCyclPME];
4823 else
4825 sbuf[pos++] = comm->load[d+1].sum;
4826 sbuf[pos++] = comm->load[d+1].max;
4827 if (dd->bGridJump)
4829 sbuf[pos++] = comm->load[d+1].sum_m;
4830 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
4831 sbuf[pos++] = comm->load[d+1].flags;
4832 if (d > 0)
4834 sbuf[pos++] = comm->cell_f_max0[d];
4835 sbuf[pos++] = comm->cell_f_min1[d];
4838 if (bSepPME)
4840 sbuf[pos++] = comm->load[d+1].mdf;
4841 sbuf[pos++] = comm->load[d+1].pme;
4844 load->nload = pos;
4845 /* Communicate a row in DD direction d.
4846 * The communicators are setup such that the root always has rank 0.
4848 #ifdef GMX_MPI
4849 MPI_Gather(sbuf ,load->nload*sizeof(float),MPI_BYTE,
4850 load->load,load->nload*sizeof(float),MPI_BYTE,
4851 0,comm->mpi_comm_load[d]);
4852 #endif
4853 if (dd->ci[dim] == dd->master_ci[dim])
4855 /* We are the root, process this row */
4856 if (comm->bDynLoadBal)
4858 root = comm->root[d];
4860 load->sum = 0;
4861 load->max = 0;
4862 load->sum_m = 0;
4863 load->cvol_min = 1;
4864 load->flags = 0;
4865 load->mdf = 0;
4866 load->pme = 0;
4867 pos = 0;
4868 for(i=0; i<dd->nc[dim]; i++)
4870 load->sum += load->load[pos++];
4871 load->max = max(load->max,load->load[pos]);
4872 pos++;
4873 if (dd->bGridJump)
4875 if (root->bLimited)
4877 /* This direction could not be load balanced properly,
4878 * therefore we need to use the maximum iso the average load.
4880 load->sum_m = max(load->sum_m,load->load[pos]);
4882 else
4884 load->sum_m += load->load[pos];
4886 pos++;
4887 load->cvol_min = min(load->cvol_min,load->load[pos]);
4888 pos++;
4889 if (d < dd->ndim-1)
4891 load->flags = (int)(load->load[pos++] + 0.5);
4893 if (d > 0)
4895 root->cell_f_max0[i] = load->load[pos++];
4896 root->cell_f_min1[i] = load->load[pos++];
4899 if (bSepPME)
4901 load->mdf = max(load->mdf,load->load[pos]);
4902 pos++;
4903 load->pme = max(load->pme,load->load[pos]);
4904 pos++;
4907 if (comm->bDynLoadBal && root->bLimited)
4909 load->sum_m *= dd->nc[dim];
4910 load->flags |= (1<<d);
4916 if (DDMASTER(dd))
4918 comm->nload += dd_load_count(comm);
4919 comm->load_step += comm->cycl[ddCyclStep];
4920 comm->load_sum += comm->load[0].sum;
4921 comm->load_max += comm->load[0].max;
4922 if (comm->bDynLoadBal)
4924 for(d=0; d<dd->ndim; d++)
4926 if (comm->load[0].flags & (1<<d))
4928 comm->load_lim[d]++;
4932 if (bSepPME)
4934 comm->load_mdf += comm->load[0].mdf;
4935 comm->load_pme += comm->load[0].pme;
4939 wallcycle_stop(wcycle,ewcDDCOMMLOAD);
4941 if (debug)
4943 fprintf(debug,"get_load_distribution finished\n");
4947 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
4949 /* Return the relative performance loss on the total run time
4950 * due to the force calculation load imbalance.
4952 if (dd->comm->nload > 0)
4954 return
4955 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
4956 (dd->comm->load_step*dd->nnodes);
4958 else
4960 return 0;
4964 static void print_dd_load_av(FILE *fplog,gmx_domdec_t *dd)
4966 char buf[STRLEN];
4967 int npp,npme,nnodes,d,limp;
4968 float imbal,pme_f_ratio,lossf,lossp=0;
4969 bool bLim;
4970 gmx_domdec_comm_t *comm;
4972 comm = dd->comm;
4973 if (DDMASTER(dd) && comm->nload > 0)
4975 npp = dd->nnodes;
4976 npme = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
4977 nnodes = npp + npme;
4978 imbal = comm->load_max*npp/comm->load_sum - 1;
4979 lossf = dd_force_imb_perf_loss(dd);
4980 sprintf(buf," Average load imbalance: %.1f %%\n",imbal*100);
4981 fprintf(fplog,"%s",buf);
4982 fprintf(stderr,"\n");
4983 fprintf(stderr,"%s",buf);
4984 sprintf(buf," Part of the total run time spent waiting due to load imbalance: %.1f %%\n",lossf*100);
4985 fprintf(fplog,"%s",buf);
4986 fprintf(stderr,"%s",buf);
4987 bLim = FALSE;
4988 if (comm->bDynLoadBal)
4990 sprintf(buf," Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
4991 for(d=0; d<dd->ndim; d++)
4993 limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
4994 sprintf(buf+strlen(buf)," %c %d %%",dim2char(dd->dim[d]),limp);
4995 if (limp >= 50)
4997 bLim = TRUE;
5000 sprintf(buf+strlen(buf),"\n");
5001 fprintf(fplog,"%s",buf);
5002 fprintf(stderr,"%s",buf);
5004 if (npme > 0)
5006 pme_f_ratio = comm->load_pme/comm->load_mdf;
5007 lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
5008 if (lossp <= 0)
5010 lossp *= (float)npme/(float)nnodes;
5012 else
5014 lossp *= (float)npp/(float)nnodes;
5016 sprintf(buf," Average PME mesh/force load: %5.3f\n",pme_f_ratio);
5017 fprintf(fplog,"%s",buf);
5018 fprintf(stderr,"%s",buf);
5019 sprintf(buf," Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n",fabs(lossp)*100);
5020 fprintf(fplog,"%s",buf);
5021 fprintf(stderr,"%s",buf);
5023 fprintf(fplog,"\n");
5024 fprintf(stderr,"\n");
5026 if (lossf >= DD_PERF_LOSS)
5028 sprintf(buf,
5029 "NOTE: %.1f %% performance was lost due to load imbalance\n"
5030 " in the domain decomposition.\n",lossf*100);
5031 if (!comm->bDynLoadBal)
5033 sprintf(buf+strlen(buf)," You might want to use dynamic load balancing (option -dlb.)\n");
5035 else if (bLim)
5037 sprintf(buf+strlen(buf)," You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5039 fprintf(fplog,"%s\n",buf);
5040 fprintf(stderr,"%s\n",buf);
5042 if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
5044 sprintf(buf,
5045 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5046 " had %s work to do than the PP nodes.\n"
5047 " You might want to %s the number of PME nodes\n"
5048 " or %s the cut-off and the grid spacing.\n",
5049 fabs(lossp*100),
5050 (lossp < 0) ? "less" : "more",
5051 (lossp < 0) ? "decrease" : "increase",
5052 (lossp < 0) ? "decrease" : "increase");
5053 fprintf(fplog,"%s\n",buf);
5054 fprintf(stderr,"%s\n",buf);
5059 static float dd_vol_min(gmx_domdec_t *dd)
5061 return dd->comm->load[0].cvol_min*dd->nnodes;
5064 static bool dd_load_flags(gmx_domdec_t *dd)
5066 return dd->comm->load[0].flags;
5069 static float dd_f_imbal(gmx_domdec_t *dd)
5071 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
5074 static float dd_pme_f_ratio(gmx_domdec_t *dd)
5076 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
5079 static void dd_print_load(FILE *fplog,gmx_domdec_t *dd,gmx_large_int_t step)
5081 int flags,d;
5082 char buf[22];
5084 flags = dd_load_flags(dd);
5085 if (flags)
5087 fprintf(fplog,
5088 "DD load balancing is limited by minimum cell size in dimension");
5089 for(d=0; d<dd->ndim; d++)
5091 if (flags & (1<<d))
5093 fprintf(fplog," %c",dim2char(dd->dim[d]));
5096 fprintf(fplog,"\n");
5098 fprintf(fplog,"DD step %s",gmx_step_str(step,buf));
5099 if (dd->comm->bDynLoadBal)
5101 fprintf(fplog," vol min/aver %5.3f%c",
5102 dd_vol_min(dd),flags ? '!' : ' ');
5104 fprintf(fplog," load imb.: force %4.1f%%",dd_f_imbal(dd)*100);
5105 if (dd->comm->cycl_n[ddCyclPME])
5107 fprintf(fplog," pme mesh/force %5.3f",dd_pme_f_ratio(dd));
5109 fprintf(fplog,"\n\n");
5112 static void dd_print_load_verbose(gmx_domdec_t *dd)
5114 if (dd->comm->bDynLoadBal)
5116 fprintf(stderr,"vol %4.2f%c ",
5117 dd_vol_min(dd),dd_load_flags(dd) ? '!' : ' ');
5119 fprintf(stderr,"imb F %2d%% ",(int)(dd_f_imbal(dd)*100+0.5));
5120 if (dd->comm->cycl_n[ddCyclPME])
5122 fprintf(stderr,"pme/F %4.2f ",dd_pme_f_ratio(dd));
5126 #ifdef GMX_MPI
5127 static void make_load_communicator(gmx_domdec_t *dd,MPI_Group g_all,
5128 int dim_ind,ivec loc)
5130 MPI_Group g_row;
5131 MPI_Comm c_row;
5132 int dim,i,*rank;
5133 ivec loc_c;
5134 gmx_domdec_root_t *root;
5136 dim = dd->dim[dim_ind];
5137 copy_ivec(loc,loc_c);
5138 snew(rank,dd->nc[dim]);
5139 for(i=0; i<dd->nc[dim]; i++)
5141 loc_c[dim] = i;
5142 rank[i] = dd_index(dd->nc,loc_c);
5144 /* Here we create a new group, that does not necessarily
5145 * include our process. But MPI_Comm_create needs to be
5146 * called by all the processes in the original communicator.
5147 * Calling MPI_Group_free afterwards gives errors, so I assume
5148 * also the group is needed by all processes. (B. Hess)
5150 MPI_Group_incl(g_all,dd->nc[dim],rank,&g_row);
5151 MPI_Comm_create(dd->mpi_comm_all,g_row,&c_row);
5152 if (c_row != MPI_COMM_NULL)
5154 /* This process is part of the group */
5155 dd->comm->mpi_comm_load[dim_ind] = c_row;
5156 if (dd->comm->eDLB != edlbNO)
5158 if (dd->ci[dim] == dd->master_ci[dim])
5160 /* This is the root process of this row */
5161 snew(dd->comm->root[dim_ind],1);
5162 root = dd->comm->root[dim_ind];
5163 snew(root->cell_f,DD_CELL_F_SIZE(dd,dim_ind));
5164 snew(root->old_cell_f,dd->nc[dim]+1);
5165 snew(root->bCellMin,dd->nc[dim]);
5166 if (dim_ind > 0)
5168 snew(root->cell_f_max0,dd->nc[dim]);
5169 snew(root->cell_f_min1,dd->nc[dim]);
5170 snew(root->bound_min,dd->nc[dim]);
5171 snew(root->bound_max,dd->nc[dim]);
5173 snew(root->buf_ncd,dd->nc[dim]);
5175 else
5177 /* This is not a root process, we only need to receive cell_f */
5178 snew(dd->comm->cell_f_row,DD_CELL_F_SIZE(dd,dim_ind));
5181 if (dd->ci[dim] == dd->master_ci[dim])
5183 snew(dd->comm->load[dim_ind].load,dd->nc[dim]*DD_NLOAD_MAX);
5186 sfree(rank);
5188 #endif
5190 static void make_load_communicators(gmx_domdec_t *dd)
5192 #ifdef GMX_MPI
5193 MPI_Group g_all;
5194 int dim0,dim1,i,j;
5195 ivec loc;
5197 if (debug)
5198 fprintf(debug,"Making load communicators\n");
5200 MPI_Comm_group(dd->mpi_comm_all,&g_all);
5202 snew(dd->comm->load,dd->ndim);
5203 snew(dd->comm->mpi_comm_load,dd->ndim);
5205 clear_ivec(loc);
5206 make_load_communicator(dd,g_all,0,loc);
5207 if (dd->ndim > 1) {
5208 dim0 = dd->dim[0];
5209 for(i=0; i<dd->nc[dim0]; i++) {
5210 loc[dim0] = i;
5211 make_load_communicator(dd,g_all,1,loc);
5214 if (dd->ndim > 2) {
5215 dim0 = dd->dim[0];
5216 for(i=0; i<dd->nc[dim0]; i++) {
5217 loc[dim0] = i;
5218 dim1 = dd->dim[1];
5219 for(j=0; j<dd->nc[dim1]; j++) {
5220 loc[dim1] = j;
5221 make_load_communicator(dd,g_all,2,loc);
5226 MPI_Group_free(&g_all);
5228 if (debug)
5229 fprintf(debug,"Finished making load communicators\n");
5230 #endif
5233 void setup_dd_grid(FILE *fplog,gmx_domdec_t *dd)
5235 bool bZYX;
5236 int d,dim,i,j,m;
5237 ivec tmp,s;
5238 int nzone,nzonep;
5239 ivec dd_zp[DD_MAXIZONE];
5240 gmx_domdec_zones_t *zones;
5241 gmx_domdec_ns_ranges_t *izone;
5243 for(d=0; d<dd->ndim; d++)
5245 dim = dd->dim[d];
5246 copy_ivec(dd->ci,tmp);
5247 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
5248 dd->neighbor[d][0] = ddcoord2ddnodeid(dd,tmp);
5249 copy_ivec(dd->ci,tmp);
5250 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
5251 dd->neighbor[d][1] = ddcoord2ddnodeid(dd,tmp);
5252 if (debug)
5254 fprintf(debug,"DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5255 dd->rank,dim,
5256 dd->neighbor[d][0],
5257 dd->neighbor[d][1]);
5261 if (DDMASTER(dd))
5263 fprintf(stderr,"Making %dD domain decomposition %d x %d x %d\n",
5264 dd->ndim,dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
5266 if (fplog)
5268 fprintf(fplog,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5269 dd->ndim,
5270 dd->nc[XX],dd->nc[YY],dd->nc[ZZ],
5271 dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
5273 switch (dd->ndim)
5275 case 3:
5276 nzone = dd_z3n;
5277 nzonep = dd_zp3n;
5278 for(i=0; i<nzonep; i++)
5280 copy_ivec(dd_zp3[i],dd_zp[i]);
5282 break;
5283 case 2:
5284 nzone = dd_z2n;
5285 nzonep = dd_zp2n;
5286 for(i=0; i<nzonep; i++)
5288 copy_ivec(dd_zp2[i],dd_zp[i]);
5290 break;
5291 case 1:
5292 nzone = dd_z1n;
5293 nzonep = dd_zp1n;
5294 for(i=0; i<nzonep; i++)
5296 copy_ivec(dd_zp1[i],dd_zp[i]);
5298 break;
5299 default:
5300 gmx_fatal(FARGS,"Can only do 1, 2 or 3D domain decomposition");
5301 nzone = 0;
5302 nzonep = 0;
5305 zones = &dd->comm->zones;
5307 for(i=0; i<nzone; i++)
5309 m = 0;
5310 clear_ivec(zones->shift[i]);
5311 for(d=0; d<dd->ndim; d++)
5313 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
5317 zones->n = nzone;
5318 for(i=0; i<nzone; i++)
5320 for(d=0; d<DIM; d++)
5322 s[d] = dd->ci[d] - zones->shift[i][d];
5323 if (s[d] < 0)
5325 s[d] += dd->nc[d];
5327 else if (s[d] >= dd->nc[d])
5329 s[d] -= dd->nc[d];
5333 zones->nizone = nzonep;
5334 for(i=0; i<zones->nizone; i++)
5336 if (dd_zp[i][0] != i)
5338 gmx_fatal(FARGS,"Internal inconsistency in the dd grid setup");
5340 izone = &zones->izone[i];
5341 izone->j0 = dd_zp[i][1];
5342 izone->j1 = dd_zp[i][2];
5343 for(dim=0; dim<DIM; dim++)
5345 if (dd->nc[dim] == 1)
5347 /* All shifts should be allowed */
5348 izone->shift0[dim] = -1;
5349 izone->shift1[dim] = 1;
5351 else
5354 izone->shift0[d] = 0;
5355 izone->shift1[d] = 0;
5356 for(j=izone->j0; j<izone->j1; j++) {
5357 if (dd->shift[j][d] > dd->shift[i][d])
5358 izone->shift0[d] = -1;
5359 if (dd->shift[j][d] < dd->shift[i][d])
5360 izone->shift1[d] = 1;
5364 int shift_diff;
5366 /* Assume the shift are not more than 1 cell */
5367 izone->shift0[dim] = 1;
5368 izone->shift1[dim] = -1;
5369 for(j=izone->j0; j<izone->j1; j++)
5371 shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
5372 if (shift_diff < izone->shift0[dim])
5374 izone->shift0[dim] = shift_diff;
5376 if (shift_diff > izone->shift1[dim])
5378 izone->shift1[dim] = shift_diff;
5385 if (dd->comm->eDLB != edlbNO)
5387 snew(dd->comm->root,dd->ndim);
5390 if (dd->comm->bRecordLoad)
5392 make_load_communicators(dd);
5396 static void make_pp_communicator(FILE *fplog,t_commrec *cr,int reorder)
5398 gmx_domdec_t *dd;
5399 gmx_domdec_comm_t *comm;
5400 int i,rank,*buf;
5401 ivec periods;
5402 #ifdef GMX_MPI
5403 MPI_Comm comm_cart;
5404 #endif
5406 dd = cr->dd;
5407 comm = dd->comm;
5409 #ifdef GMX_MPI
5410 if (comm->bCartesianPP)
5412 /* Set up cartesian communication for the particle-particle part */
5413 if (fplog)
5415 fprintf(fplog,"Will use a Cartesian communicator: %d x %d x %d\n",
5416 dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
5419 for(i=0; i<DIM; i++)
5421 periods[i] = TRUE;
5423 MPI_Cart_create(cr->mpi_comm_mygroup,DIM,dd->nc,periods,reorder,
5424 &comm_cart);
5425 /* We overwrite the old communicator with the new cartesian one */
5426 cr->mpi_comm_mygroup = comm_cart;
5429 dd->mpi_comm_all = cr->mpi_comm_mygroup;
5430 MPI_Comm_rank(dd->mpi_comm_all,&dd->rank);
5432 if (comm->bCartesianPP_PME)
5434 /* Since we want to use the original cartesian setup for sim,
5435 * and not the one after split, we need to make an index.
5437 snew(comm->ddindex2ddnodeid,dd->nnodes);
5438 comm->ddindex2ddnodeid[dd_index(dd->nc,dd->ci)] = dd->rank;
5439 gmx_sumi(dd->nnodes,comm->ddindex2ddnodeid,cr);
5440 /* Get the rank of the DD master,
5441 * above we made sure that the master node is a PP node.
5443 if (MASTER(cr))
5445 rank = dd->rank;
5447 else
5449 rank = 0;
5451 MPI_Allreduce(&rank,&dd->masterrank,1,MPI_INT,MPI_SUM,dd->mpi_comm_all);
5453 else if (comm->bCartesianPP)
5455 if (cr->npmenodes == 0)
5457 /* The PP communicator is also
5458 * the communicator for this simulation
5460 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
5462 cr->nodeid = dd->rank;
5464 MPI_Cart_coords(dd->mpi_comm_all,dd->rank,DIM,dd->ci);
5466 /* We need to make an index to go from the coordinates
5467 * to the nodeid of this simulation.
5469 snew(comm->ddindex2simnodeid,dd->nnodes);
5470 snew(buf,dd->nnodes);
5471 if (cr->duty & DUTY_PP)
5473 buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
5475 /* Communicate the ddindex to simulation nodeid index */
5476 MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
5477 cr->mpi_comm_mysim);
5478 sfree(buf);
5480 /* Determine the master coordinates and rank.
5481 * The DD master should be the same node as the master of this sim.
5483 for(i=0; i<dd->nnodes; i++)
5485 if (comm->ddindex2simnodeid[i] == 0)
5487 ddindex2xyz(dd->nc,i,dd->master_ci);
5488 MPI_Cart_rank(dd->mpi_comm_all,dd->master_ci,&dd->masterrank);
5491 if (debug)
5493 fprintf(debug,"The master rank is %d\n",dd->masterrank);
5496 else
5498 /* No Cartesian communicators */
5499 /* We use the rank in dd->comm->all as DD index */
5500 ddindex2xyz(dd->nc,dd->rank,dd->ci);
5501 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
5502 dd->masterrank = 0;
5503 clear_ivec(dd->master_ci);
5505 #endif
5507 if (fplog)
5509 fprintf(fplog,
5510 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5511 dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
5513 if (debug)
5515 fprintf(debug,
5516 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5517 dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
5521 static void receive_ddindex2simnodeid(t_commrec *cr)
5523 gmx_domdec_t *dd;
5525 gmx_domdec_comm_t *comm;
5526 int *buf;
5528 dd = cr->dd;
5529 comm = dd->comm;
5531 #ifdef GMX_MPI
5532 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
5534 snew(comm->ddindex2simnodeid,dd->nnodes);
5535 snew(buf,dd->nnodes);
5536 if (cr->duty & DUTY_PP)
5538 buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
5540 #ifdef GMX_MPI
5541 /* Communicate the ddindex to simulation nodeid index */
5542 MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
5543 cr->mpi_comm_mysim);
5544 #endif
5545 sfree(buf);
5547 #endif
5550 static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
5551 int ncg,int natoms)
5553 gmx_domdec_master_t *ma;
5554 int i;
5556 snew(ma,1);
5558 snew(ma->ncg,dd->nnodes);
5559 snew(ma->index,dd->nnodes+1);
5560 snew(ma->cg,ncg);
5561 snew(ma->nat,dd->nnodes);
5562 snew(ma->ibuf,dd->nnodes*2);
5563 snew(ma->cell_x,DIM);
5564 for(i=0; i<DIM; i++)
5566 snew(ma->cell_x[i],dd->nc[i]+1);
5569 if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
5571 ma->vbuf = NULL;
5573 else
5575 snew(ma->vbuf,natoms);
5578 return ma;
5581 static void split_communicator(FILE *fplog,t_commrec *cr,int dd_node_order,
5582 int reorder)
5584 gmx_domdec_t *dd;
5585 gmx_domdec_comm_t *comm;
5586 int i,rank;
5587 bool bDiv[DIM];
5588 ivec periods;
5589 #ifdef GMX_MPI
5590 MPI_Comm comm_cart;
5591 #endif
5593 dd = cr->dd;
5594 comm = dd->comm;
5596 if (comm->bCartesianPP)
5598 for(i=1; i<DIM; i++)
5600 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
5602 if (bDiv[YY] || bDiv[ZZ])
5604 comm->bCartesianPP_PME = TRUE;
5605 /* If we have 2D PME decomposition, which is always in x+y,
5606 * we stack the PME only nodes in z.
5607 * Otherwise we choose the direction that provides the thinnest slab
5608 * of PME only nodes as this will have the least effect
5609 * on the PP communication.
5610 * But for the PME communication the opposite might be better.
5612 if (bDiv[ZZ] && (comm->npmenodes_minor > 1 ||
5613 !bDiv[YY] ||
5614 dd->nc[YY] > dd->nc[ZZ]))
5616 comm->cartpmedim = ZZ;
5618 else
5620 comm->cartpmedim = YY;
5622 comm->ntot[comm->cartpmedim]
5623 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
5625 else if (fplog)
5627 fprintf(fplog,"#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n",cr->npmenodes,dd->nc[XX],dd->nc[YY],dd->nc[XX],dd->nc[ZZ]);
5628 fprintf(fplog,
5629 "Will not use a Cartesian communicator for PP <-> PME\n\n");
5633 #ifdef GMX_MPI
5634 if (comm->bCartesianPP_PME)
5636 if (fplog)
5638 fprintf(fplog,"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n",comm->ntot[XX],comm->ntot[YY],comm->ntot[ZZ]);
5641 for(i=0; i<DIM; i++)
5643 periods[i] = TRUE;
5645 MPI_Cart_create(cr->mpi_comm_mysim,DIM,comm->ntot,periods,reorder,
5646 &comm_cart);
5648 MPI_Comm_rank(comm_cart,&rank);
5649 if (MASTERNODE(cr) && rank != 0)
5651 gmx_fatal(FARGS,"MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
5654 /* With this assigment we loose the link to the original communicator
5655 * which will usually be MPI_COMM_WORLD, unless have multisim.
5657 cr->mpi_comm_mysim = comm_cart;
5658 cr->sim_nodeid = rank;
5660 MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,dd->ci);
5662 if (fplog)
5664 fprintf(fplog,"Cartesian nodeid %d, coordinates %d %d %d\n\n",
5665 cr->sim_nodeid,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
5668 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
5670 cr->duty = DUTY_PP;
5672 if (cr->npmenodes == 0 ||
5673 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
5675 cr->duty = DUTY_PME;
5678 /* Split the sim communicator into PP and PME only nodes */
5679 MPI_Comm_split(cr->mpi_comm_mysim,
5680 cr->duty,
5681 dd_index(comm->ntot,dd->ci),
5682 &cr->mpi_comm_mygroup);
5684 else
5686 switch (dd_node_order)
5688 case ddnoPP_PME:
5689 if (fplog)
5691 fprintf(fplog,"Order of the nodes: PP first, PME last\n");
5693 break;
5694 case ddnoINTERLEAVE:
5695 /* Interleave the PP-only and PME-only nodes,
5696 * as on clusters with dual-core machines this will double
5697 * the communication bandwidth of the PME processes
5698 * and thus speed up the PP <-> PME and inter PME communication.
5700 if (fplog)
5702 fprintf(fplog,"Interleaving PP and PME nodes\n");
5704 comm->pmenodes = dd_pmenodes(cr);
5705 break;
5706 case ddnoCARTESIAN:
5707 break;
5708 default:
5709 gmx_fatal(FARGS,"Unknown dd_node_order=%d",dd_node_order);
5712 if (dd_simnode2pmenode(cr,cr->sim_nodeid) == -1)
5714 cr->duty = DUTY_PME;
5716 else
5718 cr->duty = DUTY_PP;
5721 /* Split the sim communicator into PP and PME only nodes */
5722 MPI_Comm_split(cr->mpi_comm_mysim,
5723 cr->duty,
5724 cr->nodeid,
5725 &cr->mpi_comm_mygroup);
5726 MPI_Comm_rank(cr->mpi_comm_mygroup,&cr->nodeid);
5728 #endif
5730 if (fplog)
5732 fprintf(fplog,"This is a %s only node\n\n",
5733 (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
5737 void make_dd_communicators(FILE *fplog,t_commrec *cr,int dd_node_order)
5739 gmx_domdec_t *dd;
5740 gmx_domdec_comm_t *comm;
5741 int CartReorder;
5743 dd = cr->dd;
5744 comm = dd->comm;
5746 copy_ivec(dd->nc,comm->ntot);
5748 comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
5749 comm->bCartesianPP_PME = FALSE;
5751 /* Reorder the nodes by default. This might change the MPI ranks.
5752 * Real reordering is only supported on very few architectures,
5753 * Blue Gene is one of them.
5755 CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
5757 if (cr->npmenodes > 0)
5759 /* Split the communicator into a PP and PME part */
5760 split_communicator(fplog,cr,dd_node_order,CartReorder);
5761 if (comm->bCartesianPP_PME)
5763 /* We (possibly) reordered the nodes in split_communicator,
5764 * so it is no longer required in make_pp_communicator.
5766 CartReorder = FALSE;
5769 else
5771 /* All nodes do PP and PME */
5772 #ifdef GMX_MPI
5773 /* We do not require separate communicators */
5774 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
5775 #endif
5778 if (cr->duty & DUTY_PP)
5780 /* Copy or make a new PP communicator */
5781 make_pp_communicator(fplog,cr,CartReorder);
5783 else
5785 receive_ddindex2simnodeid(cr);
5788 if (!(cr->duty & DUTY_PME))
5790 /* Set up the commnuication to our PME node */
5791 dd->pme_nodeid = dd_simnode2pmenode(cr,cr->sim_nodeid);
5792 dd->pme_receive_vir_ener = receive_vir_ener(cr);
5793 if (debug)
5795 fprintf(debug,"My pme_nodeid %d receive ener %d\n",
5796 dd->pme_nodeid,dd->pme_receive_vir_ener);
5799 else
5801 dd->pme_nodeid = -1;
5804 if (DDMASTER(dd))
5806 dd->ma = init_gmx_domdec_master_t(dd,
5807 comm->cgs_gl.nr,
5808 comm->cgs_gl.index[comm->cgs_gl.nr]);
5812 static real *get_slb_frac(FILE *fplog,const char *dir,int nc,const char *size_string)
5814 real *slb_frac,tot;
5815 int i,n;
5816 double dbl;
5818 slb_frac = NULL;
5819 if (nc > 1 && size_string != NULL)
5821 if (fplog)
5823 fprintf(fplog,"Using static load balancing for the %s direction\n",
5824 dir);
5826 snew(slb_frac,nc);
5827 tot = 0;
5828 for (i=0; i<nc; i++)
5830 dbl = 0;
5831 sscanf(size_string,"%lf%n",&dbl,&n);
5832 if (dbl == 0)
5834 gmx_fatal(FARGS,"Incorrect or not enough DD cell size entries for direction %s: '%s'",dir,size_string);
5836 slb_frac[i] = dbl;
5837 size_string += n;
5838 tot += slb_frac[i];
5840 /* Normalize */
5841 if (fplog)
5843 fprintf(fplog,"Relative cell sizes:");
5845 for (i=0; i<nc; i++)
5847 slb_frac[i] /= tot;
5848 if (fplog)
5850 fprintf(fplog," %5.3f",slb_frac[i]);
5853 if (fplog)
5855 fprintf(fplog,"\n");
5859 return slb_frac;
5862 static int multi_body_bondeds_count(gmx_mtop_t *mtop)
5864 int n,nmol,ftype;
5865 gmx_mtop_ilistloop_t iloop;
5866 t_ilist *il;
5868 n = 0;
5869 iloop = gmx_mtop_ilistloop_init(mtop);
5870 while (gmx_mtop_ilistloop_next(iloop,&il,&nmol))
5872 for(ftype=0; ftype<F_NRE; ftype++)
5874 if ((interaction_function[ftype].flags & IF_BOND) &&
5875 NRAL(ftype) > 2)
5877 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
5882 return n;
5885 static int dd_nst_env(FILE *fplog,const char *env_var,int def)
5887 char *val;
5888 int nst;
5890 nst = def;
5891 val = getenv(env_var);
5892 if (val)
5894 if (sscanf(val,"%d",&nst) <= 0)
5896 nst = 1;
5898 if (fplog)
5900 fprintf(fplog,"Found env.var. %s = %s, using value %d\n",
5901 env_var,val,nst);
5905 return nst;
5908 static void dd_warning(t_commrec *cr,FILE *fplog,const char *warn_string)
5910 if (MASTER(cr))
5912 fprintf(stderr,"\n%s\n",warn_string);
5914 if (fplog)
5916 fprintf(fplog,"\n%s\n",warn_string);
5920 static void check_dd_restrictions(t_commrec *cr,gmx_domdec_t *dd,
5921 t_inputrec *ir,FILE *fplog)
5923 if (ir->ePBC == epbcSCREW &&
5924 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
5926 gmx_fatal(FARGS,"With pbc=%s can only do domain decomposition in the x-direction",epbc_names[ir->ePBC]);
5929 if (ir->ns_type == ensSIMPLE)
5931 gmx_fatal(FARGS,"Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
5934 if (ir->nstlist == 0)
5936 gmx_fatal(FARGS,"Domain decomposition does not work with nstlist=0");
5939 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
5941 dd_warning(cr,fplog,"comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
5945 static real average_cellsize_min(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
5947 int di,d;
5948 real r;
5950 r = ddbox->box_size[XX];
5951 for(di=0; di<dd->ndim; di++)
5953 d = dd->dim[di];
5954 /* Check using the initial average cell size */
5955 r = min(r,ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
5958 return r;
5961 static int check_dlb_support(FILE *fplog,t_commrec *cr,
5962 const char *dlb_opt,bool bRecordLoad,
5963 unsigned long Flags,t_inputrec *ir)
5965 gmx_domdec_t *dd;
5966 int eDLB=-1;
5967 char buf[STRLEN];
5969 switch (dlb_opt[0])
5971 case 'a': eDLB = edlbAUTO; break;
5972 case 'n': eDLB = edlbNO; break;
5973 case 'y': eDLB = edlbYES; break;
5974 default: gmx_incons("Unknown dlb_opt");
5977 if (Flags & MD_RERUN)
5979 return edlbNO;
5982 if (!EI_DYNAMICS(ir->eI))
5984 if (eDLB == edlbYES)
5986 sprintf(buf,"NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n",EI(ir->eI));
5987 dd_warning(cr,fplog,buf);
5990 return edlbNO;
5993 if (!bRecordLoad)
5995 dd_warning(cr,fplog,"NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
5997 return edlbNO;
6000 if (Flags & MD_REPRODUCIBLE)
6002 switch (eDLB)
6004 case edlbNO:
6005 break;
6006 case edlbAUTO:
6007 dd_warning(cr,fplog,"NOTE: reproducability requested, will not use dynamic load balancing\n");
6008 eDLB = edlbNO;
6009 break;
6010 case edlbYES:
6011 dd_warning(cr,fplog,"WARNING: reproducability requested with dynamic load balancing, the simulation will NOT be binary reproducable\n");
6012 break;
6013 default:
6014 gmx_fatal(FARGS,"Death horror: undefined case (%d) for load balancing choice",eDLB);
6015 break;
6019 return eDLB;
6022 static void set_dd_dim(FILE *fplog,gmx_domdec_t *dd)
6024 int dim;
6026 dd->ndim = 0;
6027 if (getenv("GMX_DD_ORDER_ZYX"))
6029 /* Decomposition order z,y,x */
6030 if (fplog)
6032 fprintf(fplog,"Using domain decomposition order z, y, x\n");
6034 for(dim=DIM-1; dim>=0; dim--)
6036 if (dd->nc[dim] > 1)
6038 dd->dim[dd->ndim++] = dim;
6042 else
6044 /* Decomposition order x,y,z */
6045 for(dim=0; dim<DIM; dim++)
6047 if (dd->nc[dim] > 1)
6049 dd->dim[dd->ndim++] = dim;
6055 static gmx_domdec_comm_t *init_dd_comm()
6057 gmx_domdec_comm_t *comm;
6058 int i;
6060 snew(comm,1);
6061 snew(comm->cggl_flag,DIM*2);
6062 snew(comm->cgcm_state,DIM*2);
6063 for(i=0; i<DIM*2; i++)
6065 comm->cggl_flag_nalloc[i] = 0;
6066 comm->cgcm_state_nalloc[i] = 0;
6069 comm->nalloc_int = 0;
6070 comm->buf_int = NULL;
6072 vec_rvec_init(&comm->vbuf);
6074 comm->n_load_have = 0;
6075 comm->n_load_collect = 0;
6077 for(i=0; i<ddnatNR-ddnatZONE; i++)
6079 comm->sum_nat[i] = 0;
6081 comm->ndecomp = 0;
6082 comm->nload = 0;
6083 comm->load_step = 0;
6084 comm->load_sum = 0;
6085 comm->load_max = 0;
6086 clear_ivec(comm->load_lim);
6087 comm->load_mdf = 0;
6088 comm->load_pme = 0;
6090 return comm;
6093 gmx_domdec_t *init_domain_decomposition(FILE *fplog,t_commrec *cr,
6094 unsigned long Flags,
6095 ivec nc,
6096 real comm_distance_min,real rconstr,
6097 const char *dlb_opt,real dlb_scale,
6098 const char *sizex,const char *sizey,const char *sizez,
6099 gmx_mtop_t *mtop,t_inputrec *ir,
6100 matrix box,rvec *x,
6101 gmx_ddbox_t *ddbox,
6102 int *npme_major,int *npme_minor)
6104 gmx_domdec_t *dd;
6105 gmx_domdec_comm_t *comm;
6106 int recload;
6107 int d,i,j;
6108 real r_2b,r_mb,r_bonded=-1,r_bonded_limit=-1,limit,acs;
6109 bool bC;
6110 char buf[STRLEN];
6112 if (fplog)
6114 fprintf(fplog,
6115 "\nInitializing Domain Decomposition on %d nodes\n",cr->nnodes);
6118 snew(dd,1);
6119 dd->comm = init_dd_comm();
6120 comm = dd->comm;
6121 snew(comm->cggl_flag,DIM*2);
6122 snew(comm->cgcm_state,DIM*2);
6124 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
6125 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
6127 dd->bSendRecv2 = dd_nst_env(fplog,"GMX_DD_SENDRECV2",0);
6128 comm->eFlop = dd_nst_env(fplog,"GMX_DLB_FLOP",0);
6129 recload = dd_nst_env(fplog,"GMX_DD_LOAD",1);
6130 comm->nstSortCG = dd_nst_env(fplog,"GMX_DD_SORT",1);
6131 comm->nstDDDump = dd_nst_env(fplog,"GMX_DD_DUMP",0);
6132 comm->nstDDDumpGrid = dd_nst_env(fplog,"GMX_DD_DUMP_GRID",0);
6133 comm->DD_debug = dd_nst_env(fplog,"GMX_DD_DEBUG",0);
6135 dd->pme_recv_f_alloc = 0;
6136 dd->pme_recv_f_buf = NULL;
6138 if (dd->bSendRecv2 && fplog)
6140 fprintf(fplog,"Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6142 if (comm->eFlop)
6144 if (fplog)
6146 fprintf(fplog,"Will load balance based on FLOP count\n");
6148 if (comm->eFlop > 1)
6150 srand(1+cr->nodeid);
6152 comm->bRecordLoad = TRUE;
6154 else
6156 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
6160 comm->eDLB = check_dlb_support(fplog,cr,dlb_opt,comm->bRecordLoad,Flags,ir);
6162 comm->bDynLoadBal = (comm->eDLB == edlbYES);
6163 if (fplog)
6165 fprintf(fplog,"Dynamic load balancing: %s\n",edlb_names[comm->eDLB]);
6167 dd->bGridJump = comm->bDynLoadBal;
6169 if (comm->nstSortCG)
6171 if (fplog)
6173 if (comm->nstSortCG == 1)
6175 fprintf(fplog,"Will sort the charge groups at every domain (re)decomposition\n");
6177 else
6179 fprintf(fplog,"Will sort the charge groups every %d steps\n",
6180 comm->nstSortCG);
6183 snew(comm->sort,1);
6185 else
6187 if (fplog)
6189 fprintf(fplog,"Will not sort the charge groups\n");
6193 comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
6194 if (comm->bInterCGBondeds)
6196 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
6198 else
6200 comm->bInterCGMultiBody = FALSE;
6203 dd->bInterCGcons = inter_charge_group_constraints(mtop);
6205 if (ir->rlistlong == 0)
6207 /* Set the cut-off to some very large value,
6208 * so we don't need if statements everywhere in the code.
6209 * We use sqrt, since the cut-off is squared in some places.
6211 comm->cutoff = GMX_CUTOFF_INF;
6213 else
6215 comm->cutoff = ir->rlistlong;
6217 comm->cutoff_mbody = 0;
6219 comm->cellsize_limit = 0;
6220 comm->bBondComm = FALSE;
6222 if (comm->bInterCGBondeds)
6224 if (comm_distance_min > 0)
6226 comm->cutoff_mbody = comm_distance_min;
6227 if (Flags & MD_DDBONDCOMM)
6229 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
6231 else
6233 comm->cutoff = max(comm->cutoff,comm->cutoff_mbody);
6235 r_bonded_limit = comm->cutoff_mbody;
6237 else if (ir->bPeriodicMols)
6239 /* Can not easily determine the required cut-off */
6240 dd_warning(cr,fplog,"NOTE: Periodic molecules: can not easily determine the required minimum bonded cut-off, using half the non-bonded cut-off\n");
6241 comm->cutoff_mbody = comm->cutoff/2;
6242 r_bonded_limit = comm->cutoff_mbody;
6244 else
6246 if (MASTER(cr))
6248 dd_bonded_cg_distance(fplog,dd,mtop,ir,x,box,
6249 Flags & MD_DDBONDCHECK,&r_2b,&r_mb);
6251 gmx_bcast(sizeof(r_2b),&r_2b,cr);
6252 gmx_bcast(sizeof(r_mb),&r_mb,cr);
6254 /* We use an initial margin of 10% for the minimum cell size,
6255 * except when we are just below the non-bonded cut-off.
6257 if (Flags & MD_DDBONDCOMM)
6259 if (max(r_2b,r_mb) > comm->cutoff)
6261 r_bonded = max(r_2b,r_mb);
6262 r_bonded_limit = 1.1*r_bonded;
6263 comm->bBondComm = TRUE;
6265 else
6267 r_bonded = r_mb;
6268 r_bonded_limit = min(1.1*r_bonded,comm->cutoff);
6270 /* We determine cutoff_mbody later */
6272 else
6274 /* No special bonded communication,
6275 * simply increase the DD cut-off.
6277 r_bonded_limit = 1.1*max(r_2b,r_mb);
6278 comm->cutoff_mbody = r_bonded_limit;
6279 comm->cutoff = max(comm->cutoff,comm->cutoff_mbody);
6282 comm->cellsize_limit = max(comm->cellsize_limit,r_bonded_limit);
6283 if (fplog)
6285 fprintf(fplog,
6286 "Minimum cell size due to bonded interactions: %.3f nm\n",
6287 comm->cellsize_limit);
6291 if (dd->bInterCGcons && rconstr <= 0)
6293 /* There is a cell size limit due to the constraints (P-LINCS) */
6294 rconstr = constr_r_max(fplog,mtop,ir);
6295 if (fplog)
6297 fprintf(fplog,
6298 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6299 rconstr);
6300 if (rconstr > comm->cellsize_limit)
6302 fprintf(fplog,"This distance will limit the DD cell size, you can override this with -rcon\n");
6306 else if (rconstr > 0 && fplog)
6308 /* Here we do not check for dd->bInterCGcons,
6309 * because one can also set a cell size limit for virtual sites only
6310 * and at this point we don't know yet if there are intercg v-sites.
6312 fprintf(fplog,
6313 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6314 rconstr);
6316 comm->cellsize_limit = max(comm->cellsize_limit,rconstr);
6318 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
6320 if (nc[XX] > 0)
6322 copy_ivec(nc,dd->nc);
6323 set_dd_dim(fplog,dd);
6324 set_ddbox_cr(cr,&dd->nc,ir,box,&comm->cgs_gl,x,ddbox);
6326 if (cr->npmenodes == -1)
6328 cr->npmenodes = 0;
6330 acs = average_cellsize_min(dd,ddbox);
6331 if (acs < comm->cellsize_limit)
6333 if (fplog)
6335 fprintf(fplog,"ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n",acs,comm->cellsize_limit);
6337 gmx_fatal_collective(FARGS,MASTER(cr),
6338 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6339 acs,comm->cellsize_limit);
6342 else
6344 set_ddbox_cr(cr,NULL,ir,box,&comm->cgs_gl,x,ddbox);
6346 /* We need to choose the optimal DD grid and possibly PME nodes */
6347 limit = dd_choose_grid(fplog,cr,dd,ir,mtop,box,ddbox,
6348 comm->eDLB!=edlbNO,dlb_scale,
6349 comm->cellsize_limit,comm->cutoff,
6350 comm->bInterCGBondeds,comm->bInterCGMultiBody);
6352 if (dd->nc[XX] == 0)
6354 bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
6355 sprintf(buf,"Change the number of nodes or mdrun option %s%s%s",
6356 !bC ? "-rdd" : "-rcon",
6357 comm->eDLB!=edlbNO ? " or -dds" : "",
6358 bC ? " or your LINCS settings" : "");
6360 gmx_fatal_collective(FARGS,MASTER(cr),
6361 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6362 "%s\n"
6363 "Look in the log file for details on the domain decomposition",
6364 cr->nnodes-cr->npmenodes,limit,buf);
6366 set_dd_dim(fplog,dd);
6369 if (fplog)
6371 fprintf(fplog,
6372 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6373 dd->nc[XX],dd->nc[YY],dd->nc[ZZ],cr->npmenodes);
6376 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
6377 if (cr->nnodes - dd->nnodes != cr->npmenodes)
6379 gmx_fatal(FARGS,"The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6380 dd->nnodes,cr->nnodes - cr->npmenodes,cr->nnodes);
6382 if (cr->npmenodes > dd->nnodes)
6384 gmx_fatal(FARGS,"The number of separate PME node (%d) is larger than the number of PP nodes (%d), this is not supported.",cr->npmenodes,dd->nnodes);
6386 if (cr->npmenodes > 0)
6388 comm->npmenodes = cr->npmenodes;
6390 else
6392 comm->npmenodes = dd->nnodes;
6395 if (EEL_PME(ir->coulombtype))
6397 if (comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
6398 getenv("GMX_PMEONEDD") == NULL)
6400 comm->npmedecompdim = 2;
6401 comm->npmenodes_major = dd->nc[XX];
6402 comm->npmenodes_minor = comm->npmenodes/comm->npmenodes_major;
6404 else
6406 comm->npmedecompdim = 1;
6407 comm->npmenodes_major = comm->npmenodes;
6408 comm->npmenodes_minor = comm->npmenodes/comm->npmenodes_major;
6410 if (fplog)
6412 fprintf(fplog,"PME domain decomposition: %d x %d x %d\n",
6413 comm->npmenodes_major,comm->npmenodes_minor,1);
6416 else
6418 comm->npmedecompdim = 0;
6419 comm->npmenodes_major = 0;
6420 comm->npmenodes_minor = 0;
6423 /* Technically we don't need both of these, but it simplifies code not having to recalculate it */
6424 *npme_major = comm->npmenodes_major;
6425 *npme_minor = comm->npmenodes_minor;
6427 snew(comm->slb_frac,DIM);
6428 if (comm->eDLB == edlbNO)
6430 comm->slb_frac[XX] = get_slb_frac(fplog,"x",dd->nc[XX],sizex);
6431 comm->slb_frac[YY] = get_slb_frac(fplog,"y",dd->nc[YY],sizey);
6432 comm->slb_frac[ZZ] = get_slb_frac(fplog,"z",dd->nc[ZZ],sizez);
6435 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
6437 if (comm->bBondComm || comm->eDLB != edlbNO)
6439 /* Set the bonded communication distance to halfway
6440 * the minimum and the maximum,
6441 * since the extra communication cost is nearly zero.
6443 acs = average_cellsize_min(dd,ddbox);
6444 comm->cutoff_mbody = 0.5*(r_bonded + acs);
6445 if (comm->eDLB != edlbNO)
6447 /* Check if this does not limit the scaling */
6448 comm->cutoff_mbody = min(comm->cutoff_mbody,dlb_scale*acs);
6450 if (!comm->bBondComm)
6452 /* Without bBondComm do not go beyond the n.b. cut-off */
6453 comm->cutoff_mbody = min(comm->cutoff_mbody,comm->cutoff);
6454 if (comm->cellsize_limit >= comm->cutoff)
6456 /* We don't loose a lot of efficieny
6457 * when increasing it to the n.b. cut-off.
6458 * It can even be slightly faster, because we need
6459 * less checks for the communication setup.
6461 comm->cutoff_mbody = comm->cutoff;
6464 /* Check if we did not end up below our original limit */
6465 comm->cutoff_mbody = max(comm->cutoff_mbody,r_bonded_limit);
6467 if (comm->cutoff_mbody > comm->cellsize_limit)
6469 comm->cellsize_limit = comm->cutoff_mbody;
6472 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
6475 if (debug)
6477 fprintf(debug,"Bonded atom communication beyond the cut-off: %d\n"
6478 "cellsize limit %f\n",
6479 comm->bBondComm,comm->cellsize_limit);
6482 if (MASTER(cr))
6484 check_dd_restrictions(cr,dd,ir,fplog);
6487 comm->partition_step = INT_MIN;
6488 dd->ddp_count = 0;
6490 clear_dd_cycle_counts(dd);
6492 return dd;
6495 static void set_dlb_limits(gmx_domdec_t *dd)
6498 int d;
6500 for(d=0; d<dd->ndim; d++)
6502 dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
6503 dd->comm->cellsize_min[dd->dim[d]] =
6504 dd->comm->cellsize_min_dlb[dd->dim[d]];
6509 static void turn_on_dlb(FILE *fplog,t_commrec *cr,gmx_large_int_t step)
6511 gmx_domdec_t *dd;
6512 gmx_domdec_comm_t *comm;
6513 real cellsize_min;
6514 int d,nc,i;
6515 char buf[STRLEN];
6517 dd = cr->dd;
6518 comm = dd->comm;
6520 if (fplog)
6522 fprintf(fplog,"At step %s the performance loss due to force load imbalance is %.1f %%\n",gmx_step_str(step,buf),dd_force_imb_perf_loss(dd)*100);
6525 cellsize_min = comm->cellsize_min[dd->dim[0]];
6526 for(d=1; d<dd->ndim; d++)
6528 cellsize_min = min(cellsize_min,comm->cellsize_min[dd->dim[d]]);
6531 if (cellsize_min < comm->cellsize_limit*1.05)
6533 dd_warning(cr,fplog,"NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
6535 /* Change DLB from "auto" to "no". */
6536 comm->eDLB = edlbNO;
6538 return;
6541 dd_warning(cr,fplog,"NOTE: Turning on dynamic load balancing\n");
6542 comm->bDynLoadBal = TRUE;
6543 dd->bGridJump = TRUE;
6545 set_dlb_limits(dd);
6547 /* We can set the required cell size info here,
6548 * so we do not need to communicate this.
6549 * The grid is completely uniform.
6551 for(d=0; d<dd->ndim; d++)
6553 if (comm->root[d])
6555 comm->load[d].sum_m = comm->load[d].sum;
6557 nc = dd->nc[dd->dim[d]];
6558 for(i=0; i<nc; i++)
6560 comm->root[d]->cell_f[i] = i/(real)nc;
6561 if (d > 0)
6563 comm->root[d]->cell_f_max0[i] = i /(real)nc;
6564 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
6567 comm->root[d]->cell_f[nc] = 1.0;
6572 static char *init_bLocalCG(gmx_mtop_t *mtop)
6574 int ncg,cg;
6575 char *bLocalCG;
6577 ncg = ncg_mtop(mtop);
6578 snew(bLocalCG,ncg);
6579 for(cg=0; cg<ncg; cg++)
6581 bLocalCG[cg] = FALSE;
6584 return bLocalCG;
6587 void dd_init_bondeds(FILE *fplog,
6588 gmx_domdec_t *dd,gmx_mtop_t *mtop,
6589 gmx_vsite_t *vsite,gmx_constr_t constr,
6590 t_inputrec *ir,bool bBCheck,cginfo_mb_t *cginfo_mb)
6592 gmx_domdec_comm_t *comm;
6593 bool bBondComm;
6594 int d;
6596 dd_make_reverse_top(fplog,dd,mtop,vsite,constr,ir,bBCheck);
6598 comm = dd->comm;
6600 if (comm->bBondComm)
6602 /* Communicate atoms beyond the cut-off for bonded interactions */
6603 comm = dd->comm;
6605 comm->cglink = make_charge_group_links(mtop,dd,cginfo_mb);
6607 comm->bLocalCG = init_bLocalCG(mtop);
6609 else
6611 /* Only communicate atoms based on cut-off */
6612 comm->cglink = NULL;
6613 comm->bLocalCG = NULL;
6617 static void print_dd_settings(FILE *fplog,gmx_domdec_t *dd,
6618 t_inputrec *ir,
6619 bool bDynLoadBal,real dlb_scale,
6620 gmx_ddbox_t *ddbox)
6622 gmx_domdec_comm_t *comm;
6623 int d;
6624 ivec np;
6625 real limit,shrink;
6626 char buf[64];
6628 if (fplog == NULL)
6630 return;
6633 comm = dd->comm;
6635 if (bDynLoadBal)
6637 fprintf(fplog,"The maximum number of communication pulses is:");
6638 for(d=0; d<dd->ndim; d++)
6640 fprintf(fplog," %c %d",dim2char(dd->dim[d]),comm->cd[d].np_dlb);
6642 fprintf(fplog,"\n");
6643 fprintf(fplog,"The minimum size for domain decomposition cells is %.3f nm\n",comm->cellsize_limit);
6644 fprintf(fplog,"The requested allowed shrink of DD cells (option -dds) is: %.2f\n",dlb_scale);
6645 fprintf(fplog,"The allowed shrink of domain decomposition cells is:");
6646 for(d=0; d<DIM; d++)
6648 if (dd->nc[d] > 1)
6650 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
6652 shrink = 0;
6654 else
6656 shrink =
6657 comm->cellsize_min_dlb[d]/
6658 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
6660 fprintf(fplog," %c %.2f",dim2char(d),shrink);
6663 fprintf(fplog,"\n");
6665 else
6667 set_dd_cell_sizes_slb(dd,ddbox,FALSE,np);
6668 fprintf(fplog,"The initial number of communication pulses is:");
6669 for(d=0; d<dd->ndim; d++)
6671 fprintf(fplog," %c %d",dim2char(dd->dim[d]),np[dd->dim[d]]);
6673 fprintf(fplog,"\n");
6674 fprintf(fplog,"The initial domain decomposition cell size is:");
6675 for(d=0; d<DIM; d++) {
6676 if (dd->nc[d] > 1)
6678 fprintf(fplog," %c %.2f nm",
6679 dim2char(d),dd->comm->cellsize_min[d]);
6682 fprintf(fplog,"\n\n");
6685 if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
6687 fprintf(fplog,"The maximum allowed distance for charge groups involved in interactions is:\n");
6688 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6689 "non-bonded interactions","",comm->cutoff);
6691 if (bDynLoadBal)
6693 limit = dd->comm->cellsize_limit;
6695 else
6697 if (dynamic_dd_box(ddbox,ir))
6699 fprintf(fplog,"(the following are initial values, they could change due to box deformation)\n");
6701 limit = dd->comm->cellsize_min[XX];
6702 for(d=1; d<DIM; d++)
6704 limit = min(limit,dd->comm->cellsize_min[d]);
6708 if (comm->bInterCGBondeds)
6710 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6711 "two-body bonded interactions","(-rdd)",
6712 max(comm->cutoff,comm->cutoff_mbody));
6713 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6714 "multi-body bonded interactions","(-rdd)",
6715 (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff,limit));
6717 if (dd->vsite_comm)
6719 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6720 "virtual site constructions","(-rcon)",limit);
6722 if (dd->constraint_comm)
6724 sprintf(buf,"atoms separated by up to %d constraints",
6725 1+ir->nProjOrder);
6726 fprintf(fplog,"%40s %-7s %6.3f nm\n",
6727 buf,"(-rcon)",limit);
6729 fprintf(fplog,"\n");
6732 fflush(fplog);
6735 void set_dd_parameters(FILE *fplog,gmx_domdec_t *dd,real dlb_scale,
6736 t_inputrec *ir,t_forcerec *fr,
6737 gmx_ddbox_t *ddbox)
6739 gmx_domdec_comm_t *comm;
6740 int d,dim,npulse,npulse_d_max,npulse_d;
6741 bool bNoCutOff;
6742 int natoms_tot;
6743 real vol_frac;
6745 comm = dd->comm;
6747 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
6749 if (EEL_PME(ir->coulombtype))
6751 init_ddpme(dd,&comm->ddpme[0],0,comm->npmenodes_major);
6752 if (comm->npmedecompdim >= 2)
6754 init_ddpme(dd,&comm->ddpme[1],1,
6755 comm->npmenodes/comm->npmenodes_major);
6758 else
6760 comm->npmenodes = 0;
6761 if (dd->pme_nodeid >= 0)
6763 gmx_fatal(FARGS,"Can not have separate PME nodes without PME electrostatics");
6767 /* If each molecule is a single charge group
6768 * or we use domain decomposition for each periodic dimension,
6769 * we do not need to take pbc into account for the bonded interactions.
6771 if (fr->ePBC == epbcNONE || !comm->bInterCGBondeds ||
6772 (dd->nc[XX]>1 && dd->nc[YY]>1 && (dd->nc[ZZ]>1 || fr->ePBC==epbcXY)))
6774 fr->bMolPBC = FALSE;
6776 else
6778 fr->bMolPBC = TRUE;
6781 if (debug)
6783 fprintf(debug,"The DD cut-off is %f\n",comm->cutoff);
6785 if (comm->eDLB != edlbNO)
6787 /* Determine the maximum number of comm. pulses in one dimension */
6789 comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
6791 /* Determine the maximum required number of grid pulses */
6792 if (comm->cellsize_limit >= comm->cutoff)
6794 /* Only a single pulse is required */
6795 npulse = 1;
6797 else if (!bNoCutOff && comm->cellsize_limit > 0)
6799 /* We round down slightly here to avoid overhead due to the latency
6800 * of extra communication calls when the cut-off
6801 * would be only slightly longer than the cell size.
6802 * Later cellsize_limit is redetermined,
6803 * so we can not miss interactions due to this rounding.
6805 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
6807 else
6809 /* There is no cell size limit */
6810 npulse = max(dd->nc[XX]-1,max(dd->nc[YY]-1,dd->nc[ZZ]-1));
6813 if (!bNoCutOff && npulse > 1)
6815 /* See if we can do with less pulses, based on dlb_scale */
6816 npulse_d_max = 0;
6817 for(d=0; d<dd->ndim; d++)
6819 dim = dd->dim[d];
6820 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
6821 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
6822 npulse_d_max = max(npulse_d_max,npulse_d);
6824 npulse = min(npulse,npulse_d_max);
6827 /* This env var can override npulse */
6828 d = dd_nst_env(fplog,"GMX_DD_NPULSE",0);
6829 if (d > 0)
6831 npulse = d;
6834 comm->maxpulse = 1;
6835 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
6836 for(d=0; d<dd->ndim; d++)
6838 comm->cd[d].np_dlb = min(npulse,dd->nc[dd->dim[d]]-1);
6839 comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
6840 snew(comm->cd[d].ind,comm->cd[d].np_nalloc);
6841 comm->maxpulse = max(comm->maxpulse,comm->cd[d].np_dlb);
6842 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
6844 comm->bVacDLBNoLimit = FALSE;
6848 /* cellsize_limit is set for LINCS in init_domain_decomposition */
6849 if (!comm->bVacDLBNoLimit)
6851 comm->cellsize_limit = max(comm->cellsize_limit,
6852 comm->cutoff/comm->maxpulse);
6854 comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
6855 /* Set the minimum cell size for each DD dimension */
6856 for(d=0; d<dd->ndim; d++)
6858 if (comm->bVacDLBNoLimit ||
6859 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
6861 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
6863 else
6865 comm->cellsize_min_dlb[dd->dim[d]] =
6866 comm->cutoff/comm->cd[d].np_dlb;
6869 if (comm->cutoff_mbody <= 0)
6871 comm->cutoff_mbody = min(comm->cutoff,comm->cellsize_limit);
6873 if (comm->bDynLoadBal)
6875 set_dlb_limits(dd);
6879 print_dd_settings(fplog,dd,ir,comm->bDynLoadBal,dlb_scale,ddbox);
6880 if (comm->eDLB == edlbAUTO)
6882 if (fplog)
6884 fprintf(fplog,"When dynamic load balancing gets turned on, these settings will change to:\n");
6886 print_dd_settings(fplog,dd,ir,TRUE,dlb_scale,ddbox);
6889 vol_frac = 1/(real)dd->nnodes + comm_box_frac(dd->nc,comm->cutoff,ddbox);
6890 if (debug)
6892 fprintf(debug,"Volume fraction for all DD zones: %f\n",vol_frac);
6894 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
6896 dd->ga2la = ga2la_init(natoms_tot,vol_frac*natoms_tot);
6899 static void merge_cg_buffers(int ncell,
6900 gmx_domdec_comm_dim_t *cd, int pulse,
6901 int *ncg_cell,
6902 int *index_gl, int *recv_i,
6903 rvec *cg_cm, rvec *recv_vr,
6904 int *cgindex,
6905 cginfo_mb_t *cginfo_mb,int *cginfo)
6907 gmx_domdec_ind_t *ind,*ind_p;
6908 int p,cell,c,cg,cg0,cg1,cg_gl,nat;
6909 int shift,shift_at;
6911 ind = &cd->ind[pulse];
6913 /* First correct the already stored data */
6914 shift = ind->nrecv[ncell];
6915 for(cell=ncell-1; cell>=0; cell--)
6917 shift -= ind->nrecv[cell];
6918 if (shift > 0)
6920 /* Move the cg's present from previous grid pulses */
6921 cg0 = ncg_cell[ncell+cell];
6922 cg1 = ncg_cell[ncell+cell+1];
6923 cgindex[cg1+shift] = cgindex[cg1];
6924 for(cg=cg1-1; cg>=cg0; cg--)
6926 index_gl[cg+shift] = index_gl[cg];
6927 copy_rvec(cg_cm[cg],cg_cm[cg+shift]);
6928 cgindex[cg+shift] = cgindex[cg];
6929 cginfo[cg+shift] = cginfo[cg];
6931 /* Correct the already stored send indices for the shift */
6932 for(p=1; p<=pulse; p++)
6934 ind_p = &cd->ind[p];
6935 cg0 = 0;
6936 for(c=0; c<cell; c++)
6938 cg0 += ind_p->nsend[c];
6940 cg1 = cg0 + ind_p->nsend[cell];
6941 for(cg=cg0; cg<cg1; cg++)
6943 ind_p->index[cg] += shift;
6949 /* Merge in the communicated buffers */
6950 shift = 0;
6951 shift_at = 0;
6952 cg0 = 0;
6953 for(cell=0; cell<ncell; cell++)
6955 cg1 = ncg_cell[ncell+cell+1] + shift;
6956 if (shift_at > 0)
6958 /* Correct the old cg indices */
6959 for(cg=ncg_cell[ncell+cell]; cg<cg1; cg++)
6961 cgindex[cg+1] += shift_at;
6964 for(cg=0; cg<ind->nrecv[cell]; cg++)
6966 /* Copy this charge group from the buffer */
6967 index_gl[cg1] = recv_i[cg0];
6968 copy_rvec(recv_vr[cg0],cg_cm[cg1]);
6969 /* Add it to the cgindex */
6970 cg_gl = index_gl[cg1];
6971 cginfo[cg1] = ddcginfo(cginfo_mb,cg_gl);
6972 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
6973 cgindex[cg1+1] = cgindex[cg1] + nat;
6974 cg0++;
6975 cg1++;
6976 shift_at += nat;
6978 shift += ind->nrecv[cell];
6979 ncg_cell[ncell+cell+1] = cg1;
6983 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
6984 int nzone,int cg0,const int *cgindex)
6986 int cg,zone,p;
6988 /* Store the atom block boundaries for easy copying of communication buffers
6990 cg = cg0;
6991 for(zone=0; zone<nzone; zone++)
6993 for(p=0; p<cd->np; p++) {
6994 cd->ind[p].cell2at0[zone] = cgindex[cg];
6995 cg += cd->ind[p].nrecv[zone];
6996 cd->ind[p].cell2at1[zone] = cgindex[cg];
7001 static bool missing_link(t_blocka *link,int cg_gl,char *bLocalCG)
7003 int i;
7004 bool bMiss;
7006 bMiss = FALSE;
7007 for(i=link->index[cg_gl]; i<link->index[cg_gl+1]; i++)
7009 if (!bLocalCG[link->a[i]])
7011 bMiss = TRUE;
7015 return bMiss;
7018 static void setup_dd_communication(gmx_domdec_t *dd,
7019 matrix box,gmx_ddbox_t *ddbox,t_forcerec *fr)
7021 int dim_ind,dim,dim0,dim1=-1,dim2=-1,dimd,p,nat_tot;
7022 int nzone,nzone_send,zone,zonei,cg0,cg1;
7023 int c,i,j,cg,cg_gl,nrcg;
7024 int *zone_cg_range,pos_cg,*index_gl,*cgindex,*recv_i;
7025 gmx_domdec_comm_t *comm;
7026 gmx_domdec_zones_t *zones;
7027 gmx_domdec_comm_dim_t *cd;
7028 gmx_domdec_ind_t *ind;
7029 cginfo_mb_t *cginfo_mb;
7030 bool bBondComm,bDist2B,bDistMB,bDistMB_pulse,bDistBonded,bScrew;
7031 real r_mb,r_comm2,r_scomm2,r_bcomm2,r,r_0,r_1,r2,rb2,r2inc,inv_ncg,tric_sh;
7032 rvec rb,rn;
7033 real corner[DIM][4],corner_round_0=0,corner_round_1[4];
7034 real bcorner[DIM],bcorner_round_1=0;
7035 ivec tric_dist;
7036 rvec *cg_cm,*normal,*v_d,*v_0=NULL,*v_1=NULL,*recv_vr;
7037 real skew_fac2_d,skew_fac_01;
7038 rvec sf2_round;
7039 int nsend,nat;
7041 if (debug)
7043 fprintf(debug,"Setting up DD communication\n");
7046 comm = dd->comm;
7047 cg_cm = fr->cg_cm;
7049 for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
7051 dim = dd->dim[dim_ind];
7053 /* Check if we need to use triclinic distances */
7054 tric_dist[dim_ind] = 0;
7055 for(i=0; i<=dim_ind; i++)
7057 if (ddbox->tric_dir[dd->dim[i]])
7059 tric_dist[dim_ind] = 1;
7064 bBondComm = comm->bBondComm;
7066 /* Do we need to determine extra distances for multi-body bondeds? */
7067 bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
7069 /* Do we need to determine extra distances for only two-body bondeds? */
7070 bDist2B = (bBondComm && !bDistMB);
7072 r_comm2 = sqr(comm->cutoff);
7073 r_bcomm2 = sqr(comm->cutoff_mbody);
7075 if (debug)
7077 fprintf(debug,"bBondComm %d, r_bc %f\n",bBondComm,sqrt(r_bcomm2));
7080 zones = &comm->zones;
7082 dim0 = dd->dim[0];
7083 /* The first dimension is equal for all cells */
7084 corner[0][0] = comm->cell_x0[dim0];
7085 if (bDistMB)
7087 bcorner[0] = corner[0][0];
7089 if (dd->ndim >= 2)
7091 dim1 = dd->dim[1];
7092 /* This cell row is only seen from the first row */
7093 corner[1][0] = comm->cell_x0[dim1];
7094 /* All rows can see this row */
7095 corner[1][1] = comm->cell_x0[dim1];
7096 if (dd->bGridJump)
7098 corner[1][1] = max(comm->cell_x0[dim1],comm->zone_d1[1].mch0);
7099 if (bDistMB)
7101 /* For the multi-body distance we need the maximum */
7102 bcorner[1] = max(comm->cell_x0[dim1],comm->zone_d1[1].p1_0);
7105 /* Set the upper-right corner for rounding */
7106 corner_round_0 = comm->cell_x1[dim0];
7108 if (dd->ndim >= 3)
7110 dim2 = dd->dim[2];
7111 for(j=0; j<4; j++)
7113 corner[2][j] = comm->cell_x0[dim2];
7115 if (dd->bGridJump)
7117 /* Use the maximum of the i-cells that see a j-cell */
7118 for(i=0; i<zones->nizone; i++)
7120 for(j=zones->izone[i].j0; j<zones->izone[i].j1; j++)
7122 if (j >= 4)
7124 corner[2][j-4] =
7125 max(corner[2][j-4],
7126 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
7130 if (bDistMB)
7132 /* For the multi-body distance we need the maximum */
7133 bcorner[2] = comm->cell_x0[dim2];
7134 for(i=0; i<2; i++)
7136 for(j=0; j<2; j++)
7138 bcorner[2] = max(bcorner[2],
7139 comm->zone_d2[i][j].p1_0);
7145 /* Set the upper-right corner for rounding */
7146 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7147 * Only cell (0,0,0) can see cell 7 (1,1,1)
7149 corner_round_1[0] = comm->cell_x1[dim1];
7150 corner_round_1[3] = comm->cell_x1[dim1];
7151 if (dd->bGridJump)
7153 corner_round_1[0] = max(comm->cell_x1[dim1],
7154 comm->zone_d1[1].mch1);
7155 if (bDistMB)
7157 /* For the multi-body distance we need the maximum */
7158 bcorner_round_1 = max(comm->cell_x1[dim1],
7159 comm->zone_d1[1].p1_1);
7165 /* Triclinic stuff */
7166 normal = ddbox->normal;
7167 skew_fac_01 = 0;
7168 if (dd->ndim >= 2)
7170 v_0 = ddbox->v[dim0];
7171 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
7173 /* Determine the coupling coefficient for the distances
7174 * to the cell planes along dim0 and dim1 through dim2.
7175 * This is required for correct rounding.
7177 skew_fac_01 =
7178 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
7179 if (debug)
7181 fprintf(debug,"\nskew_fac_01 %f\n",skew_fac_01);
7185 if (dd->ndim >= 3)
7187 v_1 = ddbox->v[dim1];
7190 zone_cg_range = zones->cg_range;
7191 index_gl = dd->index_gl;
7192 cgindex = dd->cgindex;
7193 cginfo_mb = fr->cginfo_mb;
7195 zone_cg_range[0] = 0;
7196 zone_cg_range[1] = dd->ncg_home;
7197 comm->zone_ncg1[0] = dd->ncg_home;
7198 pos_cg = dd->ncg_home;
7200 nat_tot = dd->nat_home;
7201 nzone = 1;
7202 for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
7204 dim = dd->dim[dim_ind];
7205 cd = &comm->cd[dim_ind];
7207 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
7209 /* No pbc in this dimension, the first node should not comm. */
7210 nzone_send = 0;
7212 else
7214 nzone_send = nzone;
7217 bScrew = (dd->bScrewPBC && dim == XX);
7219 v_d = ddbox->v[dim];
7220 skew_fac2_d = sqr(ddbox->skew_fac[dim]);
7222 cd->bInPlace = TRUE;
7223 for(p=0; p<cd->np; p++)
7225 /* Only atoms communicated in the first pulse are used
7226 * for multi-body bonded interactions or for bBondComm.
7228 bDistBonded = ((bDistMB || bDist2B) && p == 0);
7229 bDistMB_pulse = (bDistMB && bDistBonded);
7231 ind = &cd->ind[p];
7232 nsend = 0;
7233 nat = 0;
7234 for(zone=0; zone<nzone_send; zone++)
7236 if (tric_dist[dim_ind] && dim_ind > 0)
7238 /* Determine slightly more optimized skew_fac's
7239 * for rounding.
7240 * This reduces the number of communicated atoms
7241 * by about 10% for 3D DD of rhombic dodecahedra.
7243 for(dimd=0; dimd<dim; dimd++)
7245 sf2_round[dimd] = 1;
7246 if (ddbox->tric_dir[dimd])
7248 for(i=dd->dim[dimd]+1; i<DIM; i++)
7250 /* If we are shifted in dimension i
7251 * and the cell plane is tilted forward
7252 * in dimension i, skip this coupling.
7254 if (!(zones->shift[nzone+zone][i] &&
7255 ddbox->v[dimd][i][dimd] >= 0))
7257 sf2_round[dimd] +=
7258 sqr(ddbox->v[dimd][i][dimd]);
7261 sf2_round[dimd] = 1/sf2_round[dimd];
7266 zonei = zone_perm[dim_ind][zone];
7267 if (p == 0)
7269 /* Here we permutate the zones to obtain a convenient order
7270 * for neighbor searching
7272 cg0 = zone_cg_range[zonei];
7273 cg1 = zone_cg_range[zonei+1];
7275 else
7277 /* Look only at the cg's received in the previous grid pulse
7279 cg1 = zone_cg_range[nzone+zone+1];
7280 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
7282 ind->nsend[zone] = 0;
7283 for(cg=cg0; cg<cg1; cg++)
7285 r2 = 0;
7286 rb2 = 0;
7287 if (tric_dist[dim_ind] == 0)
7289 /* Rectangular direction, easy */
7290 r = cg_cm[cg][dim] - corner[dim_ind][zone];
7291 if (r > 0)
7293 r2 += r*r;
7295 if (bDistMB_pulse)
7297 r = cg_cm[cg][dim] - bcorner[dim_ind];
7298 if (r > 0)
7300 rb2 += r*r;
7303 /* Rounding gives at most a 16% reduction
7304 * in communicated atoms
7306 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7308 r = cg_cm[cg][dim0] - corner_round_0;
7309 /* This is the first dimension, so always r >= 0 */
7310 r2 += r*r;
7311 if (bDistMB_pulse)
7313 rb2 += r*r;
7316 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7318 r = cg_cm[cg][dim1] - corner_round_1[zone];
7319 if (r > 0)
7321 r2 += r*r;
7323 if (bDistMB_pulse)
7325 r = cg_cm[cg][dim1] - bcorner_round_1;
7326 if (r > 0)
7328 rb2 += r*r;
7333 else
7335 /* Triclinic direction, more complicated */
7336 clear_rvec(rn);
7337 clear_rvec(rb);
7338 /* Rounding, conservative as the skew_fac multiplication
7339 * will slightly underestimate the distance.
7341 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
7343 rn[dim0] = cg_cm[cg][dim0] - corner_round_0;
7344 for(i=dim0+1; i<DIM; i++)
7346 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
7348 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
7349 if (bDistMB_pulse)
7351 rb[dim0] = rn[dim0];
7352 rb2 = r2;
7354 /* Take care that the cell planes along dim0 might not
7355 * be orthogonal to those along dim1 and dim2.
7357 for(i=1; i<=dim_ind; i++)
7359 dimd = dd->dim[i];
7360 if (normal[dim0][dimd] > 0)
7362 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
7363 if (bDistMB_pulse)
7365 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
7370 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
7372 rn[dim1] += cg_cm[cg][dim1] - corner_round_1[zone];
7373 tric_sh = 0;
7374 for(i=dim1+1; i<DIM; i++)
7376 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
7378 rn[dim1] += tric_sh;
7379 if (rn[dim1] > 0)
7381 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
7382 /* Take care of coupling of the distances
7383 * to the planes along dim0 and dim1 through dim2.
7385 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
7386 /* Take care that the cell planes along dim1
7387 * might not be orthogonal to that along dim2.
7389 if (normal[dim1][dim2] > 0)
7391 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
7394 if (bDistMB_pulse)
7396 rb[dim1] +=
7397 cg_cm[cg][dim1] - bcorner_round_1 + tric_sh;
7398 if (rb[dim1] > 0)
7400 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
7401 /* Take care of coupling of the distances
7402 * to the planes along dim0 and dim1 through dim2.
7404 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
7405 /* Take care that the cell planes along dim1
7406 * might not be orthogonal to that along dim2.
7408 if (normal[dim1][dim2] > 0)
7410 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
7415 /* The distance along the communication direction */
7416 rn[dim] += cg_cm[cg][dim] - corner[dim_ind][zone];
7417 tric_sh = 0;
7418 for(i=dim+1; i<DIM; i++)
7420 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
7422 rn[dim] += tric_sh;
7423 if (rn[dim] > 0)
7425 r2 += rn[dim]*rn[dim]*skew_fac2_d;
7426 /* Take care of coupling of the distances
7427 * to the planes along dim0 and dim1 through dim2.
7429 if (dim_ind == 1 && zonei == 1)
7431 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
7434 if (bDistMB_pulse)
7436 clear_rvec(rb);
7437 rb[dim] += cg_cm[cg][dim] - bcorner[dim_ind] + tric_sh;
7438 if (rb[dim] > 0)
7440 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
7441 /* Take care of coupling of the distances
7442 * to the planes along dim0 and dim1 through dim2.
7444 if (dim_ind == 1 && zonei == 1)
7446 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
7452 if (r2 < r_comm2 ||
7453 (bDistBonded &&
7454 ((bDistMB && rb2 < r_bcomm2) ||
7455 (bDist2B && r2 < r_bcomm2)) &&
7456 (!bBondComm ||
7457 (GET_CGINFO_BOND_INTER(fr->cginfo[cg]) &&
7458 missing_link(comm->cglink,index_gl[cg],
7459 comm->bLocalCG)))))
7461 /* Make an index to the local charge groups */
7462 if (nsend+1 > ind->nalloc)
7464 ind->nalloc = over_alloc_large(nsend+1);
7465 srenew(ind->index,ind->nalloc);
7467 if (nsend+1 > comm->nalloc_int)
7469 comm->nalloc_int = over_alloc_large(nsend+1);
7470 srenew(comm->buf_int,comm->nalloc_int);
7472 ind->index[nsend] = cg;
7473 comm->buf_int[nsend] = index_gl[cg];
7474 ind->nsend[zone]++;
7475 vec_rvec_check_alloc(&comm->vbuf,nsend+1);
7477 if (dd->ci[dim] == 0)
7479 /* Correct cg_cm for pbc */
7480 rvec_add(cg_cm[cg],box[dim],comm->vbuf.v[nsend]);
7481 if (bScrew)
7483 comm->vbuf.v[nsend][YY] =
7484 box[YY][YY]-comm->vbuf.v[nsend][YY];
7485 comm->vbuf.v[nsend][ZZ] =
7486 box[ZZ][ZZ]-comm->vbuf.v[nsend][ZZ];
7489 else
7491 copy_rvec(cg_cm[cg],comm->vbuf.v[nsend]);
7493 nsend++;
7494 nat += cgindex[cg+1] - cgindex[cg];
7498 /* Clear the counts in case we do not have pbc */
7499 for(zone=nzone_send; zone<nzone; zone++)
7501 ind->nsend[zone] = 0;
7503 ind->nsend[nzone] = nsend;
7504 ind->nsend[nzone+1] = nat;
7505 /* Communicate the number of cg's and atoms to receive */
7506 dd_sendrecv_int(dd, dim_ind, dddirBackward,
7507 ind->nsend, nzone+2,
7508 ind->nrecv, nzone+2);
7510 /* The rvec buffer is also required for atom buffers of size nsend
7511 * in dd_move_x and dd_move_f.
7513 vec_rvec_check_alloc(&comm->vbuf,ind->nsend[nzone+1]);
7515 if (p > 0)
7517 /* We can receive in place if only the last zone is not empty */
7518 for(zone=0; zone<nzone-1; zone++)
7520 if (ind->nrecv[zone] > 0)
7522 cd->bInPlace = FALSE;
7525 if (!cd->bInPlace)
7527 /* The int buffer is only required here for the cg indices */
7528 if (ind->nrecv[nzone] > comm->nalloc_int2)
7530 comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
7531 srenew(comm->buf_int2,comm->nalloc_int2);
7533 /* The rvec buffer is also required for atom buffers
7534 * of size nrecv in dd_move_x and dd_move_f.
7536 i = max(cd->ind[0].nrecv[nzone+1],ind->nrecv[nzone+1]);
7537 vec_rvec_check_alloc(&comm->vbuf2,i);
7541 /* Make space for the global cg indices */
7542 if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
7543 || dd->cg_nalloc == 0)
7545 dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
7546 srenew(index_gl,dd->cg_nalloc);
7547 srenew(cgindex,dd->cg_nalloc+1);
7549 /* Communicate the global cg indices */
7550 if (cd->bInPlace)
7552 recv_i = index_gl + pos_cg;
7554 else
7556 recv_i = comm->buf_int2;
7558 dd_sendrecv_int(dd, dim_ind, dddirBackward,
7559 comm->buf_int, nsend,
7560 recv_i, ind->nrecv[nzone]);
7562 /* Make space for cg_cm */
7563 if (pos_cg + ind->nrecv[nzone] > fr->cg_nalloc)
7565 dd_realloc_fr_cg(fr,pos_cg + ind->nrecv[nzone]);
7566 cg_cm = fr->cg_cm;
7568 /* Communicate cg_cm */
7569 if (cd->bInPlace)
7571 recv_vr = cg_cm + pos_cg;
7573 else
7575 recv_vr = comm->vbuf2.v;
7577 dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
7578 comm->vbuf.v, nsend,
7579 recv_vr, ind->nrecv[nzone]);
7581 /* Make the charge group index */
7582 if (cd->bInPlace)
7584 zone = (p == 0 ? 0 : nzone - 1);
7585 while (zone < nzone)
7587 for(cg=0; cg<ind->nrecv[zone]; cg++)
7589 cg_gl = index_gl[pos_cg];
7590 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb,cg_gl);
7591 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
7592 cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
7593 if (bBondComm)
7595 /* Update the charge group presence,
7596 * so we can use it in the next pass of the loop.
7598 comm->bLocalCG[cg_gl] = TRUE;
7600 pos_cg++;
7602 if (p == 0)
7604 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
7606 zone++;
7607 zone_cg_range[nzone+zone] = pos_cg;
7610 else
7612 /* This part of the code is never executed with bBondComm. */
7613 merge_cg_buffers(nzone,cd,p,zone_cg_range,
7614 index_gl,recv_i,cg_cm,recv_vr,
7615 cgindex,fr->cginfo_mb,fr->cginfo);
7616 pos_cg += ind->nrecv[nzone];
7618 nat_tot += ind->nrecv[nzone+1];
7620 if (!cd->bInPlace)
7622 /* Store the atom block for easy copying of communication buffers */
7623 make_cell2at_index(cd,nzone,zone_cg_range[nzone],cgindex);
7625 nzone += nzone;
7627 dd->index_gl = index_gl;
7628 dd->cgindex = cgindex;
7630 dd->ncg_tot = zone_cg_range[zones->n];
7631 dd->nat_tot = nat_tot;
7632 comm->nat[ddnatHOME] = dd->nat_home;
7633 for(i=ddnatZONE; i<ddnatNR; i++)
7635 comm->nat[i] = dd->nat_tot;
7638 if (!bBondComm)
7640 /* We don't need to update cginfo, since that was alrady done above.
7641 * So we pass NULL for the forcerec.
7643 dd_set_cginfo(dd->index_gl,dd->ncg_home,dd->ncg_tot,
7644 NULL,comm->bLocalCG);
7647 if (debug)
7649 fprintf(debug,"Finished setting up DD communication, zones:");
7650 for(c=0; c<zones->n; c++)
7652 fprintf(debug," %d",zones->cg_range[c+1]-zones->cg_range[c]);
7654 fprintf(debug,"\n");
7658 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
7660 int c;
7662 for(c=0; c<zones->nizone; c++)
7664 zones->izone[c].cg1 = zones->cg_range[c+1];
7665 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
7666 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
7670 static int comp_cgsort(const void *a,const void *b)
7672 int comp;
7674 gmx_cgsort_t *cga,*cgb;
7675 cga = (gmx_cgsort_t *)a;
7676 cgb = (gmx_cgsort_t *)b;
7678 comp = cga->nsc - cgb->nsc;
7679 if (comp == 0)
7681 comp = cga->ind_gl - cgb->ind_gl;
7684 return comp;
7687 static void order_int_cg(int n,gmx_cgsort_t *sort,
7688 int *a,int *buf)
7690 int i;
7692 /* Order the data */
7693 for(i=0; i<n; i++)
7695 buf[i] = a[sort[i].ind];
7698 /* Copy back to the original array */
7699 for(i=0; i<n; i++)
7701 a[i] = buf[i];
7705 static void order_vec_cg(int n,gmx_cgsort_t *sort,
7706 rvec *v,rvec *buf)
7708 int i;
7710 /* Order the data */
7711 for(i=0; i<n; i++)
7713 copy_rvec(v[sort[i].ind],buf[i]);
7716 /* Copy back to the original array */
7717 for(i=0; i<n; i++)
7719 copy_rvec(buf[i],v[i]);
7723 static void order_vec_atom(int ncg,int *cgindex,gmx_cgsort_t *sort,
7724 rvec *v,rvec *buf)
7726 int a,atot,cg,cg0,cg1,i;
7728 /* Order the data */
7729 a = 0;
7730 for(cg=0; cg<ncg; cg++)
7732 cg0 = cgindex[sort[cg].ind];
7733 cg1 = cgindex[sort[cg].ind+1];
7734 for(i=cg0; i<cg1; i++)
7736 copy_rvec(v[i],buf[a]);
7737 a++;
7740 atot = a;
7742 /* Copy back to the original array */
7743 for(a=0; a<atot; a++)
7745 copy_rvec(buf[a],v[a]);
7749 static void ordered_sort(int nsort2,gmx_cgsort_t *sort2,
7750 int nsort_new,gmx_cgsort_t *sort_new,
7751 gmx_cgsort_t *sort1)
7753 int i1,i2,i_new;
7755 /* The new indices are not very ordered, so we qsort them */
7756 qsort(sort_new,nsort_new,sizeof(sort_new[0]),comp_cgsort);
7758 /* sort2 is already ordered, so now we can merge the two arrays */
7759 i1 = 0;
7760 i2 = 0;
7761 i_new = 0;
7762 while(i2 < nsort2 || i_new < nsort_new)
7764 if (i2 == nsort2)
7766 sort1[i1++] = sort_new[i_new++];
7768 else if (i_new == nsort_new)
7770 sort1[i1++] = sort2[i2++];
7772 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
7773 (sort2[i2].nsc == sort_new[i_new].nsc &&
7774 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
7776 sort1[i1++] = sort2[i2++];
7778 else
7780 sort1[i1++] = sort_new[i_new++];
7785 static void dd_sort_state(gmx_domdec_t *dd,int ePBC,
7786 rvec *cgcm,t_forcerec *fr,t_state *state,
7787 int ncg_home_old)
7789 gmx_domdec_sort_t *sort;
7790 gmx_cgsort_t *cgsort,*sort_i;
7791 int ncg_new,nsort2,nsort_new,i,cell_index,*ibuf,cgsize;
7792 rvec *vbuf;
7794 sort = dd->comm->sort;
7796 if (dd->ncg_home > sort->sort_nalloc)
7798 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
7799 srenew(sort->sort1,sort->sort_nalloc);
7800 srenew(sort->sort2,sort->sort_nalloc);
7803 if (ncg_home_old >= 0)
7805 /* The charge groups that remained in the same ns grid cell
7806 * are completely ordered. So we can sort efficiently by sorting
7807 * the charge groups that did move into the stationary list.
7809 ncg_new = 0;
7810 nsort2 = 0;
7811 nsort_new = 0;
7812 for(i=0; i<dd->ncg_home; i++)
7814 /* Check if this cg did not move to another node */
7815 cell_index = fr->ns.grid->cell_index[i];
7816 if (cell_index != 4*fr->ns.grid->ncells)
7818 if (i >= ncg_home_old || cell_index != sort->sort1[i].nsc)
7820 /* This cg is new on this node or moved ns grid cell */
7821 if (nsort_new >= sort->sort_new_nalloc)
7823 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
7824 srenew(sort->sort_new,sort->sort_new_nalloc);
7826 sort_i = &(sort->sort_new[nsort_new++]);
7828 else
7830 /* This cg did not move */
7831 sort_i = &(sort->sort2[nsort2++]);
7833 /* Sort on the ns grid cell indices
7834 * and the global topology index
7836 sort_i->nsc = cell_index;
7837 sort_i->ind_gl = dd->index_gl[i];
7838 sort_i->ind = i;
7839 ncg_new++;
7842 if (debug)
7844 fprintf(debug,"ordered sort cgs: stationary %d moved %d\n",
7845 nsort2,nsort_new);
7847 /* Sort efficiently */
7848 ordered_sort(nsort2,sort->sort2,nsort_new,sort->sort_new,sort->sort1);
7850 else
7852 cgsort = sort->sort1;
7853 ncg_new = 0;
7854 for(i=0; i<dd->ncg_home; i++)
7856 /* Sort on the ns grid cell indices
7857 * and the global topology index
7859 cgsort[i].nsc = fr->ns.grid->cell_index[i];
7860 cgsort[i].ind_gl = dd->index_gl[i];
7861 cgsort[i].ind = i;
7862 if (cgsort[i].nsc != 4*fr->ns.grid->ncells)
7864 ncg_new++;
7867 if (debug)
7869 fprintf(debug,"qsort cgs: %d new home %d\n",dd->ncg_home,ncg_new);
7871 /* Determine the order of the charge groups using qsort */
7872 qsort(cgsort,dd->ncg_home,sizeof(cgsort[0]),comp_cgsort);
7874 cgsort = sort->sort1;
7876 /* We alloc with the old size, since cgindex is still old */
7877 vec_rvec_check_alloc(&dd->comm->vbuf,dd->cgindex[dd->ncg_home]);
7878 vbuf = dd->comm->vbuf.v;
7880 /* Remove the charge groups which are no longer at home here */
7881 dd->ncg_home = ncg_new;
7883 /* Reorder the state */
7884 for(i=0; i<estNR; i++)
7886 if (EST_DISTR(i) && state->flags & (1<<i))
7888 switch (i)
7890 case estX:
7891 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->x,vbuf);
7892 break;
7893 case estV:
7894 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->v,vbuf);
7895 break;
7896 case estSDX:
7897 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->sd_X,vbuf);
7898 break;
7899 case estCGP:
7900 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->cg_p,vbuf);
7901 break;
7902 case estLD_RNG:
7903 case estLD_RNGI:
7904 case estDISRE_INITF:
7905 case estDISRE_RM3TAV:
7906 case estORIRE_INITF:
7907 case estORIRE_DTAV:
7908 /* No ordering required */
7909 break;
7910 default:
7911 gmx_incons("Unknown state entry encountered in dd_sort_state");
7912 break;
7916 /* Reorder cgcm */
7917 order_vec_cg(dd->ncg_home,cgsort,cgcm,vbuf);
7919 if (dd->ncg_home+1 > sort->ibuf_nalloc)
7921 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
7922 srenew(sort->ibuf,sort->ibuf_nalloc);
7924 ibuf = sort->ibuf;
7925 /* Reorder the global cg index */
7926 order_int_cg(dd->ncg_home,cgsort,dd->index_gl,ibuf);
7927 /* Reorder the cginfo */
7928 order_int_cg(dd->ncg_home,cgsort,fr->cginfo,ibuf);
7929 /* Rebuild the local cg index */
7930 ibuf[0] = 0;
7931 for(i=0; i<dd->ncg_home; i++)
7933 cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
7934 ibuf[i+1] = ibuf[i] + cgsize;
7936 for(i=0; i<dd->ncg_home+1; i++)
7938 dd->cgindex[i] = ibuf[i];
7940 /* Set the home atom number */
7941 dd->nat_home = dd->cgindex[dd->ncg_home];
7943 /* Copy the sorted ns cell indices back to the ns grid struct */
7944 for(i=0; i<dd->ncg_home; i++)
7946 fr->ns.grid->cell_index[i] = cgsort[i].nsc;
7948 fr->ns.grid->nr = dd->ncg_home;
7951 static void add_dd_statistics(gmx_domdec_t *dd)
7953 gmx_domdec_comm_t *comm;
7954 int ddnat;
7956 comm = dd->comm;
7958 for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
7960 comm->sum_nat[ddnat-ddnatZONE] +=
7961 comm->nat[ddnat] - comm->nat[ddnat-1];
7963 comm->ndecomp++;
7966 void reset_dd_statistics_counters(gmx_domdec_t *dd)
7968 gmx_domdec_comm_t *comm;
7969 int ddnat;
7971 comm = dd->comm;
7973 /* Reset all the statistics and counters for total run counting */
7974 for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
7976 comm->sum_nat[ddnat-ddnatZONE] = 0;
7978 comm->ndecomp = 0;
7979 comm->nload = 0;
7980 comm->load_step = 0;
7981 comm->load_sum = 0;
7982 comm->load_max = 0;
7983 clear_ivec(comm->load_lim);
7984 comm->load_mdf = 0;
7985 comm->load_pme = 0;
7988 void print_dd_statistics(t_commrec *cr,t_inputrec *ir,FILE *fplog)
7990 gmx_domdec_comm_t *comm;
7991 int ddnat;
7992 double av;
7994 comm = cr->dd->comm;
7996 gmx_sumd(ddnatNR-ddnatZONE,comm->sum_nat,cr);
7998 if (fplog == NULL)
8000 return;
8003 fprintf(fplog,"\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
8005 for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
8007 av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
8008 switch(ddnat)
8010 case ddnatZONE:
8011 fprintf(fplog,
8012 " av. #atoms communicated per step for force: %d x %.1f\n",
8013 2,av);
8014 break;
8015 case ddnatVSITE:
8016 if (cr->dd->vsite_comm)
8018 fprintf(fplog,
8019 " av. #atoms communicated per step for vsites: %d x %.1f\n",
8020 (EEL_PME(ir->coulombtype) || ir->coulombtype==eelEWALD) ? 3 : 2,
8021 av);
8023 break;
8024 case ddnatCON:
8025 if (cr->dd->constraint_comm)
8027 fprintf(fplog,
8028 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
8029 1 + ir->nLincsIter,av);
8031 break;
8032 default:
8033 gmx_incons(" Unknown type for DD statistics");
8036 fprintf(fplog,"\n");
8038 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
8040 print_dd_load_av(fplog,cr->dd);
8044 void dd_partition_system(FILE *fplog,
8045 gmx_large_int_t step,
8046 t_commrec *cr,
8047 bool bMasterState,
8048 int nstglobalcomm,
8049 t_state *state_global,
8050 gmx_mtop_t *top_global,
8051 t_inputrec *ir,
8052 t_state *state_local,
8053 rvec **f,
8054 t_mdatoms *mdatoms,
8055 gmx_localtop_t *top_local,
8056 t_forcerec *fr,
8057 gmx_vsite_t *vsite,
8058 gmx_shellfc_t shellfc,
8059 gmx_constr_t constr,
8060 t_nrnb *nrnb,
8061 gmx_wallcycle_t wcycle,
8062 bool bVerbose)
8064 gmx_domdec_t *dd;
8065 gmx_domdec_comm_t *comm;
8066 gmx_ddbox_t ddbox;
8067 t_block *cgs_gl;
8068 gmx_large_int_t step_pcoupl;
8069 rvec cell_ns_x0,cell_ns_x1;
8070 int i,j,n,cg0=0,ncg_home_old=-1,nat_f_novirsum;
8071 bool bBoxChanged,bNStGlobalComm,bDoDLB,bCheckDLB,bTurnOnDLB,bLogLoad;
8072 bool bRedist,bSortCG,bResortAll;
8073 ivec ncells_old,np;
8074 real grid_density;
8075 char sbuf[22];
8077 dd = cr->dd;
8078 comm = dd->comm;
8080 bBoxChanged = (bMasterState || DEFORM(*ir));
8081 if (ir->epc != epcNO)
8083 /* With nstcalcenery > 1 pressure coupling happens.
8084 * one step after calculating the energies.
8085 * Box scaling happens at the end of the MD step,
8086 * after the DD partitioning.
8087 * We therefore have to do DLB in the first partitioning
8088 * after an MD step where P-coupling occured.
8089 * We need to determine the last step in which p-coupling occurred.
8090 * MRS -- need to validate this for vv?
8092 n = ir->nstcalcenergy;
8093 if (n == 1)
8095 step_pcoupl = step - 1;
8097 else
8099 step_pcoupl = ((step - 1)/n)*n + 1;
8101 if (step_pcoupl >= comm->partition_step)
8103 bBoxChanged = TRUE;
8107 bNStGlobalComm = (step >= comm->partition_step + nstglobalcomm);
8109 if (!comm->bDynLoadBal)
8111 bDoDLB = FALSE;
8113 else
8115 /* Should we do dynamic load balacing this step?
8116 * Since it requires (possibly expensive) global communication,
8117 * we might want to do DLB less frequently.
8119 if (bBoxChanged || ir->epc != epcNO)
8121 bDoDLB = bBoxChanged;
8123 else
8125 bDoDLB = bNStGlobalComm;
8129 /* Check if we have recorded loads on the nodes */
8130 if (comm->bRecordLoad && dd_load_count(comm))
8132 if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
8134 /* Check if we should use DLB at the second partitioning
8135 * and every 100 partitionings,
8136 * so the extra communication cost is negligible.
8138 n = max(100,nstglobalcomm);
8139 bCheckDLB = (comm->n_load_collect == 0 ||
8140 comm->n_load_have % n == n-1);
8142 else
8144 bCheckDLB = FALSE;
8147 /* Print load every nstlog, first and last step to the log file */
8148 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
8149 comm->n_load_collect == 0 ||
8150 (step + ir->nstlist > ir->init_step + ir->nsteps));
8152 /* Avoid extra communication due to verbose screen output
8153 * when nstglobalcomm is set.
8155 if (bDoDLB || bLogLoad || bCheckDLB ||
8156 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
8158 get_load_distribution(dd,wcycle);
8159 if (DDMASTER(dd))
8161 if (bLogLoad)
8163 dd_print_load(fplog,dd,step-1);
8165 if (bVerbose)
8167 dd_print_load_verbose(dd);
8170 comm->n_load_collect++;
8172 if (bCheckDLB) {
8173 /* Since the timings are node dependent, the master decides */
8174 if (DDMASTER(dd))
8176 bTurnOnDLB =
8177 (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
8178 if (debug)
8180 fprintf(debug,"step %s, imb loss %f\n",
8181 gmx_step_str(step,sbuf),
8182 dd_force_imb_perf_loss(dd));
8185 dd_bcast(dd,sizeof(bTurnOnDLB),&bTurnOnDLB);
8186 if (bTurnOnDLB)
8188 turn_on_dlb(fplog,cr,step);
8189 bDoDLB = TRUE;
8193 comm->n_load_have++;
8196 cgs_gl = &comm->cgs_gl;
8198 bRedist = FALSE;
8199 if (bMasterState)
8201 /* Clear the old state */
8202 clear_dd_indices(dd,0,0);
8204 set_ddbox(dd,bMasterState,cr,ir,state_global->box,
8205 TRUE,cgs_gl,state_global->x,&ddbox);
8207 get_cg_distribution(fplog,step,dd,cgs_gl,
8208 state_global->box,&ddbox,state_global->x);
8210 dd_distribute_state(dd,cgs_gl,
8211 state_global,state_local,f);
8213 dd_make_local_cgs(dd,&top_local->cgs);
8215 if (dd->ncg_home > fr->cg_nalloc)
8217 dd_realloc_fr_cg(fr,dd->ncg_home);
8219 calc_cgcm(fplog,0,dd->ncg_home,
8220 &top_local->cgs,state_local->x,fr->cg_cm);
8222 inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
8224 dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
8226 cg0 = 0;
8228 else if (state_local->ddp_count != dd->ddp_count)
8230 if (state_local->ddp_count > dd->ddp_count)
8232 gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)",state_local->ddp_count,dd->ddp_count);
8235 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
8237 gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)",state_local->ddp_count_cg_gl,state_local->ddp_count);
8240 /* Clear the old state */
8241 clear_dd_indices(dd,0,0);
8243 /* Build the new indices */
8244 rebuild_cgindex(dd,cgs_gl->index,state_local);
8245 make_dd_indices(dd,cgs_gl->index,0);
8247 /* Redetermine the cg COMs */
8248 calc_cgcm(fplog,0,dd->ncg_home,
8249 &top_local->cgs,state_local->x,fr->cg_cm);
8251 inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
8253 dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
8255 set_ddbox(dd,bMasterState,cr,ir,state_local->box,
8256 TRUE,&top_local->cgs,state_local->x,&ddbox);
8258 bRedist = comm->bDynLoadBal;
8260 else
8262 /* We have the full state, only redistribute the cgs */
8264 /* Clear the non-home indices */
8265 clear_dd_indices(dd,dd->ncg_home,dd->nat_home);
8267 /* Avoid global communication for dim's without pbc and -gcom */
8268 if (!bNStGlobalComm)
8270 copy_rvec(comm->box0 ,ddbox.box0 );
8271 copy_rvec(comm->box_size,ddbox.box_size);
8273 set_ddbox(dd,bMasterState,cr,ir,state_local->box,
8274 bNStGlobalComm,&top_local->cgs,state_local->x,&ddbox);
8276 bBoxChanged = TRUE;
8277 bRedist = TRUE;
8279 /* For dim's without pbc and -gcom */
8280 copy_rvec(ddbox.box0 ,comm->box0 );
8281 copy_rvec(ddbox.box_size,comm->box_size);
8283 set_dd_cell_sizes(dd,&ddbox,dynamic_dd_box(&ddbox,ir),bMasterState,bDoDLB,
8284 step,wcycle);
8286 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
8288 write_dd_grid_pdb("dd_grid",step,dd,state_local->box,&ddbox);
8291 /* Check if we should sort the charge groups */
8292 if (comm->nstSortCG > 0)
8294 bSortCG = (bMasterState ||
8295 (bRedist && (step % comm->nstSortCG == 0)));
8297 else
8299 bSortCG = FALSE;
8302 ncg_home_old = dd->ncg_home;
8304 if (bRedist)
8306 cg0 = dd_redistribute_cg(fplog,step,dd,ddbox.tric_dir,
8307 state_local,f,fr,mdatoms,
8308 !bSortCG,nrnb);
8311 get_nsgrid_boundaries(fr->ns.grid,dd,
8312 state_local->box,&ddbox,&comm->cell_x0,&comm->cell_x1,
8313 dd->ncg_home,fr->cg_cm,
8314 cell_ns_x0,cell_ns_x1,&grid_density);
8316 if (bBoxChanged)
8318 comm_dd_ns_cell_sizes(dd,&ddbox,cell_ns_x0,cell_ns_x1,step);
8321 copy_ivec(fr->ns.grid->n,ncells_old);
8322 grid_first(fplog,fr->ns.grid,dd,&ddbox,fr->ePBC,
8323 state_local->box,cell_ns_x0,cell_ns_x1,
8324 fr->rlistlong,grid_density);
8325 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
8326 copy_ivec(ddbox.tric_dir,comm->tric_dir);
8328 if (bSortCG)
8330 /* Sort the state on charge group position.
8331 * This enables exact restarts from this step.
8332 * It also improves performance by about 15% with larger numbers
8333 * of atoms per node.
8336 /* Fill the ns grid with the home cell,
8337 * so we can sort with the indices.
8339 set_zones_ncg_home(dd);
8340 fill_grid(fplog,&comm->zones,fr->ns.grid,dd->ncg_home,
8341 0,dd->ncg_home,fr->cg_cm);
8343 /* Check if we can user the old order and ns grid cell indices
8344 * of the charge groups to sort the charge groups efficiently.
8346 bResortAll = (bMasterState ||
8347 fr->ns.grid->n[XX] != ncells_old[XX] ||
8348 fr->ns.grid->n[YY] != ncells_old[YY] ||
8349 fr->ns.grid->n[ZZ] != ncells_old[ZZ]);
8351 if (debug)
8353 fprintf(debug,"Step %s, sorting the %d home charge groups\n",
8354 gmx_step_str(step,sbuf),dd->ncg_home);
8356 dd_sort_state(dd,ir->ePBC,fr->cg_cm,fr,state_local,
8357 bResortAll ? -1 : ncg_home_old);
8358 /* Rebuild all the indices */
8359 cg0 = 0;
8360 ga2la_clear(dd->ga2la);
8363 /* Setup up the communication and communicate the coordinates */
8364 setup_dd_communication(dd,state_local->box,&ddbox,fr);
8366 /* Set the indices */
8367 make_dd_indices(dd,cgs_gl->index,cg0);
8369 /* Set the charge group boundaries for neighbor searching */
8370 set_cg_boundaries(&comm->zones);
8373 write_dd_pdb("dd_home",step,"dump",top_global,cr,
8374 -1,state_local->x,state_local->box);
8377 /* Extract a local topology from the global topology */
8378 for(i=0; i<dd->ndim; i++)
8380 np[dd->dim[i]] = comm->cd[i].np;
8382 dd_make_local_top(fplog,dd,&comm->zones,dd->npbcdim,state_local->box,
8383 comm->cellsize_min,np,
8384 fr,vsite,top_global,top_local);
8386 /* Set up the special atom communication */
8387 n = comm->nat[ddnatZONE];
8388 for(i=ddnatZONE+1; i<ddnatNR; i++)
8390 switch(i)
8392 case ddnatVSITE:
8393 if (vsite && vsite->n_intercg_vsite)
8395 n = dd_make_local_vsites(dd,n,top_local->idef.il);
8397 break;
8398 case ddnatCON:
8399 if (dd->bInterCGcons)
8401 /* Only for inter-cg constraints we need special code */
8402 n = dd_make_local_constraints(dd,n,top_global,
8403 constr,ir->nProjOrder,
8404 &top_local->idef.il[F_CONSTR]);
8406 break;
8407 default:
8408 gmx_incons("Unknown special atom type setup");
8410 comm->nat[i] = n;
8413 /* Make space for the extra coordinates for virtual site
8414 * or constraint communication.
8416 state_local->natoms = comm->nat[ddnatNR-1];
8417 if (state_local->natoms > state_local->nalloc)
8419 dd_realloc_state(state_local,f,state_local->natoms);
8422 if (fr->bF_NoVirSum)
8424 if (vsite && vsite->n_intercg_vsite)
8426 nat_f_novirsum = comm->nat[ddnatVSITE];
8428 else
8430 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
8432 nat_f_novirsum = dd->nat_tot;
8434 else
8436 nat_f_novirsum = dd->nat_home;
8440 else
8442 nat_f_novirsum = 0;
8445 /* Set the number of atoms required for the force calculation.
8446 * Forces need to be constrained when using a twin-range setup
8447 * or with energy minimization. For simple simulations we could
8448 * avoid some allocation, zeroing and copying, but this is
8449 * probably not worth the complications ande checking.
8451 forcerec_set_ranges(fr,dd->ncg_home,dd->ncg_tot,
8452 comm->nat[ddnatCON],nat_f_novirsum);
8454 /* We make the all mdatoms up to nat_tot_con.
8455 * We could save some work by only setting invmass
8456 * between nat_tot and nat_tot_con.
8458 /* This call also sets the new number of home particles to dd->nat_home */
8459 atoms2md(top_global,ir,
8460 comm->nat[ddnatCON],dd->gatindex,0,dd->nat_home,mdatoms);
8462 /* Now we have the charges we can sort the FE interactions */
8463 dd_sort_local_top(dd,mdatoms,top_local);
8465 if (shellfc)
8467 /* Make the local shell stuff, currently no communication is done */
8468 make_local_shells(cr,mdatoms,shellfc);
8471 if (ir->implicit_solvent)
8473 make_local_gb(cr,fr->born,ir->gb_algorithm);
8476 if (!(cr->duty & DUTY_PME))
8478 /* Send the charges to our PME only node */
8479 gmx_pme_send_q(cr,mdatoms->nChargePerturbed,
8480 mdatoms->chargeA,mdatoms->chargeB,
8481 comm->ddpme[0].maxshift,comm->ddpme[1].maxshift);
8484 if (constr)
8486 set_constraints(constr,top_local,ir,mdatoms,cr);
8489 if (ir->ePull != epullNO)
8491 /* Update the local pull groups */
8492 dd_make_local_pull_groups(dd,ir->pull,mdatoms);
8495 add_dd_statistics(dd);
8497 /* Make sure we only count the cycles for this DD partitioning */
8498 clear_dd_cycle_counts(dd);
8500 /* Because the order of the atoms might have changed since
8501 * the last vsite construction, we need to communicate the constructing
8502 * atom coordinates again (for spreading the forces this MD step).
8504 dd_move_x_vsites(dd,state_local->box,state_local->x);
8506 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
8508 dd_move_x(dd,state_local->box,state_local->x);
8509 write_dd_pdb("dd_dump",step,"dump",top_global,cr,
8510 -1,state_local->x,state_local->box);
8513 /* Store the partitioning step */
8514 comm->partition_step = step;
8516 /* Increase the DD partitioning counter */
8517 dd->ddp_count++;
8518 /* The state currently matches this DD partitioning count, store it */
8519 state_local->ddp_count = dd->ddp_count;
8520 if (bMasterState)
8522 /* The DD master node knows the complete cg distribution,
8523 * store the count so we can possibly skip the cg info communication.
8525 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
8528 if (comm->DD_debug > 0)
8530 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
8531 check_index_consistency(dd,top_global->natoms,ncg_mtop(top_global),
8532 "after partitioning");