1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
32 #include "domdec_network.h"
35 #include "chargegroup.h"
44 #include "pull_rotation.h"
45 #include "gmx_wallcycle.h"
49 #include "mtop_util.h"
51 #include "gmx_ga2la.h"
61 #define DDRANK(dd,rank) (rank)
62 #define DDMASTERRANK(dd) (dd->masterrank)
64 typedef struct gmx_domdec_master
66 /* The cell boundaries */
68 /* The global charge group division */
69 int *ncg
; /* Number of home charge groups for each node */
70 int *index
; /* Index of nnodes+1 into cg */
71 int *cg
; /* Global charge group index */
72 int *nat
; /* Number of home atoms for each node. */
73 int *ibuf
; /* Buffer for communication */
74 rvec
*vbuf
; /* Buffer for state scattering and gathering */
75 } gmx_domdec_master_t
;
79 /* The numbers of charge groups to send and receive for each cell
80 * that requires communication, the last entry contains the total
81 * number of atoms that needs to be communicated.
83 int nsend
[DD_MAXIZONE
+2];
84 int nrecv
[DD_MAXIZONE
+2];
85 /* The charge groups to send */
88 /* The atom range for non-in-place communication */
89 int cell2at0
[DD_MAXIZONE
];
90 int cell2at1
[DD_MAXIZONE
];
95 int np
; /* Number of grid pulses in this dimension */
96 int np_dlb
; /* For dlb, for use with edlbAUTO */
97 gmx_domdec_ind_t
*ind
; /* The indices to communicate, size np */
99 gmx_bool bInPlace
; /* Can we communicate in place? */
100 } gmx_domdec_comm_dim_t
;
104 gmx_bool
*bCellMin
; /* Temp. var.: is this cell size at the limit */
105 real
*cell_f
; /* State var.: cell boundaries, box relative */
106 real
*old_cell_f
; /* Temp. var.: old cell size */
107 real
*cell_f_max0
; /* State var.: max lower boundary, incl neighbors */
108 real
*cell_f_min1
; /* State var.: min upper boundary, incl neighbors */
109 real
*bound_min
; /* Temp. var.: lower limit for cell boundary */
110 real
*bound_max
; /* Temp. var.: upper limit for cell boundary */
111 gmx_bool bLimited
; /* State var.: is DLB limited in this dim and row */
112 real
*buf_ncd
; /* Temp. var. */
115 #define DD_NLOAD_MAX 9
117 /* Here floats are accurate enough, since these variables
118 * only influence the load balancing, not the actual MD results.
142 gmx_cgsort_t
*sort1
,*sort2
;
144 gmx_cgsort_t
*sort_new
;
156 /* This enum determines the order of the coordinates.
157 * ddnatHOME and ddnatZONE should be first and second,
158 * the others can be ordered as wanted.
160 enum { ddnatHOME
, ddnatZONE
, ddnatVSITE
, ddnatCON
, ddnatNR
};
162 enum { edlbAUTO
, edlbNO
, edlbYES
, edlbNR
};
163 const char *edlb_names
[edlbNR
] = { "auto", "no", "yes" };
167 int dim
; /* The dimension */
168 gmx_bool dim_match
;/* Tells if DD and PME dims match */
169 int nslab
; /* The number of PME slabs in this dimension */
170 real
*slb_dim_f
; /* Cell sizes for determining the PME comm. with SLB */
171 int *pp_min
; /* The minimum pp node location, size nslab */
172 int *pp_max
; /* The maximum pp node location,size nslab */
173 int maxshift
; /* The maximum shift for coordinate redistribution in PME */
178 real min0
; /* The minimum bottom of this zone */
179 real max1
; /* The maximum top of this zone */
180 real mch0
; /* The maximum bottom communicaton height for this zone */
181 real mch1
; /* The maximum top communicaton height for this zone */
182 real p1_0
; /* The bottom value of the first cell in this zone */
183 real p1_1
; /* The top value of the first cell in this zone */
186 typedef struct gmx_domdec_comm
188 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
189 * unless stated otherwise.
192 /* The number of decomposition dimensions for PME, 0: no PME */
194 /* The number of nodes doing PME (PP/PME or only PME) */
198 /* The communication setup including the PME only nodes */
199 gmx_bool bCartesianPP_PME
;
202 int *pmenodes
; /* size npmenodes */
203 int *ddindex2simnodeid
; /* size npmenodes, only with bCartesianPP
204 * but with bCartesianPP_PME */
205 gmx_ddpme_t ddpme
[2];
207 /* The DD particle-particle nodes only */
208 gmx_bool bCartesianPP
;
209 int *ddindex2ddnodeid
; /* size npmenode, only with bCartesianPP_PME */
211 /* The global charge groups */
214 /* Should we sort the cgs */
216 gmx_domdec_sort_t
*sort
;
218 /* Are there bonded and multi-body interactions between charge groups? */
219 gmx_bool bInterCGBondeds
;
220 gmx_bool bInterCGMultiBody
;
222 /* Data for the optional bonded interaction atom communication range */
229 /* Are we actually using DLB? */
230 gmx_bool bDynLoadBal
;
232 /* Cell sizes for static load balancing, first index cartesian */
235 /* The width of the communicated boundaries */
238 /* The minimum cell size (including triclinic correction) */
240 /* For dlb, for use with edlbAUTO */
241 rvec cellsize_min_dlb
;
242 /* The lower limit for the DD cell size with DLB */
244 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
245 gmx_bool bVacDLBNoLimit
;
247 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
249 /* box0 and box_size are required with dim's without pbc and -gcom */
253 /* The cell boundaries */
257 /* The old location of the cell boundaries, to check cg displacements */
261 /* The communication setup and charge group boundaries for the zones */
262 gmx_domdec_zones_t zones
;
264 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
265 * cell boundaries of neighboring cells for dynamic load balancing.
267 gmx_ddzone_t zone_d1
[2];
268 gmx_ddzone_t zone_d2
[2][2];
270 /* The coordinate/force communication setup and indices */
271 gmx_domdec_comm_dim_t cd
[DIM
];
272 /* The maximum number of cells to communicate with in one dimension */
275 /* Which cg distribution is stored on the master node */
276 int master_cg_ddp_count
;
278 /* The number of cg's received from the direct neighbors */
279 int zone_ncg1
[DD_MAXZONE
];
281 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
284 /* Communication buffer for general use */
288 /* Communication buffer for general use */
291 /* Communication buffers only used with multiple grid pulses */
296 /* Communication buffers for local redistribution */
298 int cggl_flag_nalloc
[DIM
*2];
300 int cgcm_state_nalloc
[DIM
*2];
302 /* Cell sizes for dynamic load balancing */
303 gmx_domdec_root_t
**root
;
307 real cell_f_max0
[DIM
];
308 real cell_f_min1
[DIM
];
310 /* Stuff for load communication */
311 gmx_bool bRecordLoad
;
312 gmx_domdec_load_t
*load
;
314 MPI_Comm
*mpi_comm_load
;
317 float cycl
[ddCyclNr
];
318 int cycl_n
[ddCyclNr
];
319 float cycl_max
[ddCyclNr
];
320 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
324 /* Have often have did we have load measurements */
326 /* Have often have we collected the load measurements */
330 double sum_nat
[ddnatNR
-ddnatZONE
];
340 /* The last partition step */
341 gmx_large_int_t partition_step
;
349 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
352 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
353 #define DD_FLAG_NRCG 65535
354 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
355 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
357 /* Zone permutation required to obtain consecutive charge groups
358 * for neighbor searching.
360 static const int zone_perm
[3][4] = { {0,0,0,0},{1,0,0,0},{3,0,1,2} };
362 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
363 * components see only j zones with that component 0.
366 /* The DD zone order */
367 static const ivec dd_zo
[DD_MAXZONE
] =
368 {{0,0,0},{1,0,0},{1,1,0},{0,1,0},{0,1,1},{0,0,1},{1,0,1},{1,1,1}};
373 static const ivec dd_zp3
[dd_zp3n
] = {{0,0,8},{1,3,6},{2,5,6},{3,5,7}};
378 static const ivec dd_zp2
[dd_zp2n
] = {{0,0,4},{1,3,4}};
383 static const ivec dd_zp1
[dd_zp1n
] = {{0,0,2}};
385 /* Factors used to avoid problems due to rounding issues */
386 #define DD_CELL_MARGIN 1.0001
387 #define DD_CELL_MARGIN2 1.00005
388 /* Factor to account for pressure scaling during nstlist steps */
389 #define DD_PRES_SCALE_MARGIN 1.02
391 /* Allowed performance loss before we DLB or warn */
392 #define DD_PERF_LOSS 0.05
394 #define DD_CELL_F_SIZE(dd,di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
396 /* Use separate MPI send and receive commands
397 * when nnodes <= GMX_DD_NNODES_SENDRECV.
398 * This saves memory (and some copying for small nnodes).
399 * For high parallelization scatter and gather calls are used.
401 #define GMX_DD_NNODES_SENDRECV 4
405 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
407 static void index2xyz(ivec nc,int ind,ivec xyz)
409 xyz[XX] = ind % nc[XX];
410 xyz[YY] = (ind / nc[XX]) % nc[YY];
411 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
415 /* This order is required to minimize the coordinate communication in PME
416 * which uses decomposition in the x direction.
418 #define dd_index(n,i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
420 static void ddindex2xyz(ivec nc
,int ind
,ivec xyz
)
422 xyz
[XX
] = ind
/ (nc
[YY
]*nc
[ZZ
]);
423 xyz
[YY
] = (ind
/ nc
[ZZ
]) % nc
[YY
];
424 xyz
[ZZ
] = ind
% nc
[ZZ
];
427 static int ddcoord2ddnodeid(gmx_domdec_t
*dd
,ivec c
)
432 ddindex
= dd_index(dd
->nc
,c
);
433 if (dd
->comm
->bCartesianPP_PME
)
435 ddnodeid
= dd
->comm
->ddindex2ddnodeid
[ddindex
];
437 else if (dd
->comm
->bCartesianPP
)
440 MPI_Cart_rank(dd
->mpi_comm_all
,c
,&ddnodeid
);
451 static gmx_bool
dynamic_dd_box(gmx_ddbox_t
*ddbox
,t_inputrec
*ir
)
453 return (ddbox
->nboundeddim
< DIM
|| DYNAMIC_BOX(*ir
));
456 int ddglatnr(gmx_domdec_t
*dd
,int i
)
466 if (i
>= dd
->comm
->nat
[ddnatNR
-1])
468 gmx_fatal(FARGS
,"glatnr called with %d, which is larger than the local number of atoms (%d)",i
,dd
->comm
->nat
[ddnatNR
-1]);
470 atnr
= dd
->gatindex
[i
] + 1;
476 t_block
*dd_charge_groups_global(gmx_domdec_t
*dd
)
478 return &dd
->comm
->cgs_gl
;
481 static void vec_rvec_init(vec_rvec_t
*v
)
487 static void vec_rvec_check_alloc(vec_rvec_t
*v
,int n
)
491 v
->nalloc
= over_alloc_dd(n
);
492 srenew(v
->v
,v
->nalloc
);
496 void dd_store_state(gmx_domdec_t
*dd
,t_state
*state
)
500 if (state
->ddp_count
!= dd
->ddp_count
)
502 gmx_incons("The state does not the domain decomposition state");
505 state
->ncg_gl
= dd
->ncg_home
;
506 if (state
->ncg_gl
> state
->cg_gl_nalloc
)
508 state
->cg_gl_nalloc
= over_alloc_dd(state
->ncg_gl
);
509 srenew(state
->cg_gl
,state
->cg_gl_nalloc
);
511 for(i
=0; i
<state
->ncg_gl
; i
++)
513 state
->cg_gl
[i
] = dd
->index_gl
[i
];
516 state
->ddp_count_cg_gl
= dd
->ddp_count
;
519 gmx_domdec_zones_t
*domdec_zones(gmx_domdec_t
*dd
)
521 return &dd
->comm
->zones
;
524 void dd_get_ns_ranges(gmx_domdec_t
*dd
,int icg
,
525 int *jcg0
,int *jcg1
,ivec shift0
,ivec shift1
)
527 gmx_domdec_zones_t
*zones
;
530 zones
= &dd
->comm
->zones
;
533 while (icg
>= zones
->izone
[izone
].cg1
)
542 else if (izone
< zones
->nizone
)
544 *jcg0
= zones
->izone
[izone
].jcg0
;
548 gmx_fatal(FARGS
,"DD icg %d out of range: izone (%d) >= nizone (%d)",
549 icg
,izone
,zones
->nizone
);
552 *jcg1
= zones
->izone
[izone
].jcg1
;
554 for(d
=0; d
<dd
->ndim
; d
++)
557 shift0
[dim
] = zones
->izone
[izone
].shift0
[dim
];
558 shift1
[dim
] = zones
->izone
[izone
].shift1
[dim
];
559 if (dd
->comm
->tric_dir
[dim
] || (dd
->bGridJump
&& d
> 0))
561 /* A conservative approach, this can be optimized */
568 int dd_natoms_vsite(gmx_domdec_t
*dd
)
570 return dd
->comm
->nat
[ddnatVSITE
];
573 void dd_get_constraint_range(gmx_domdec_t
*dd
,int *at_start
,int *at_end
)
575 *at_start
= dd
->comm
->nat
[ddnatCON
-1];
576 *at_end
= dd
->comm
->nat
[ddnatCON
];
579 void dd_move_x(gmx_domdec_t
*dd
,matrix box
,rvec x
[])
581 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
583 gmx_domdec_comm_t
*comm
;
584 gmx_domdec_comm_dim_t
*cd
;
585 gmx_domdec_ind_t
*ind
;
586 rvec shift
={0,0,0},*buf
,*rbuf
;
587 gmx_bool bPBC
,bScrew
;
591 cgindex
= dd
->cgindex
;
596 nat_tot
= dd
->nat_home
;
597 for(d
=0; d
<dd
->ndim
; d
++)
599 bPBC
= (dd
->ci
[dd
->dim
[d
]] == 0);
600 bScrew
= (bPBC
&& dd
->bScrewPBC
&& dd
->dim
[d
] == XX
);
603 copy_rvec(box
[dd
->dim
[d
]],shift
);
606 for(p
=0; p
<cd
->np
; p
++)
613 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
615 at0
= cgindex
[index
[i
]];
616 at1
= cgindex
[index
[i
]+1];
617 for(j
=at0
; j
<at1
; j
++)
619 copy_rvec(x
[j
],buf
[n
]);
626 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
628 at0
= cgindex
[index
[i
]];
629 at1
= cgindex
[index
[i
]+1];
630 for(j
=at0
; j
<at1
; j
++)
632 /* We need to shift the coordinates */
633 rvec_add(x
[j
],shift
,buf
[n
]);
640 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
642 at0
= cgindex
[index
[i
]];
643 at1
= cgindex
[index
[i
]+1];
644 for(j
=at0
; j
<at1
; j
++)
647 buf
[n
][XX
] = x
[j
][XX
] + shift
[XX
];
649 * This operation requires a special shift force
650 * treatment, which is performed in calc_vir.
652 buf
[n
][YY
] = box
[YY
][YY
] - x
[j
][YY
];
653 buf
[n
][ZZ
] = box
[ZZ
][ZZ
] - x
[j
][ZZ
];
665 rbuf
= comm
->vbuf2
.v
;
667 /* Send and receive the coordinates */
668 dd_sendrecv_rvec(dd
, d
, dddirBackward
,
669 buf
, ind
->nsend
[nzone
+1],
670 rbuf
, ind
->nrecv
[nzone
+1]);
674 for(zone
=0; zone
<nzone
; zone
++)
676 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
678 copy_rvec(rbuf
[j
],x
[i
]);
683 nat_tot
+= ind
->nrecv
[nzone
+1];
689 void dd_move_f(gmx_domdec_t
*dd
,rvec f
[],rvec
*fshift
)
691 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
693 gmx_domdec_comm_t
*comm
;
694 gmx_domdec_comm_dim_t
*cd
;
695 gmx_domdec_ind_t
*ind
;
699 gmx_bool bPBC
,bScrew
;
703 cgindex
= dd
->cgindex
;
708 nzone
= comm
->zones
.n
/2;
709 nat_tot
= dd
->nat_tot
;
710 for(d
=dd
->ndim
-1; d
>=0; d
--)
712 bPBC
= (dd
->ci
[dd
->dim
[d
]] == 0);
713 bScrew
= (bPBC
&& dd
->bScrewPBC
&& dd
->dim
[d
] == XX
);
714 if (fshift
== NULL
&& !bScrew
)
718 /* Determine which shift vector we need */
724 for(p
=cd
->np
-1; p
>=0; p
--) {
726 nat_tot
-= ind
->nrecv
[nzone
+1];
733 sbuf
= comm
->vbuf2
.v
;
735 for(zone
=0; zone
<nzone
; zone
++)
737 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
739 copy_rvec(f
[i
],sbuf
[j
]);
744 /* Communicate the forces */
745 dd_sendrecv_rvec(dd
, d
, dddirForward
,
746 sbuf
, ind
->nrecv
[nzone
+1],
747 buf
, ind
->nsend
[nzone
+1]);
749 /* Add the received forces */
753 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
755 at0
= cgindex
[index
[i
]];
756 at1
= cgindex
[index
[i
]+1];
757 for(j
=at0
; j
<at1
; j
++)
759 rvec_inc(f
[j
],buf
[n
]);
766 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
768 at0
= cgindex
[index
[i
]];
769 at1
= cgindex
[index
[i
]+1];
770 for(j
=at0
; j
<at1
; j
++)
772 rvec_inc(f
[j
],buf
[n
]);
773 /* Add this force to the shift force */
774 rvec_inc(fshift
[is
],buf
[n
]);
781 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
783 at0
= cgindex
[index
[i
]];
784 at1
= cgindex
[index
[i
]+1];
785 for(j
=at0
; j
<at1
; j
++)
787 /* Rotate the force */
788 f
[j
][XX
] += buf
[n
][XX
];
789 f
[j
][YY
] -= buf
[n
][YY
];
790 f
[j
][ZZ
] -= buf
[n
][ZZ
];
793 /* Add this force to the shift force */
794 rvec_inc(fshift
[is
],buf
[n
]);
805 void dd_atom_spread_real(gmx_domdec_t
*dd
,real v
[])
807 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
809 gmx_domdec_comm_t
*comm
;
810 gmx_domdec_comm_dim_t
*cd
;
811 gmx_domdec_ind_t
*ind
;
816 cgindex
= dd
->cgindex
;
818 buf
= &comm
->vbuf
.v
[0][0];
821 nat_tot
= dd
->nat_home
;
822 for(d
=0; d
<dd
->ndim
; d
++)
825 for(p
=0; p
<cd
->np
; p
++)
830 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
832 at0
= cgindex
[index
[i
]];
833 at1
= cgindex
[index
[i
]+1];
834 for(j
=at0
; j
<at1
; j
++)
847 rbuf
= &comm
->vbuf2
.v
[0][0];
849 /* Send and receive the coordinates */
850 dd_sendrecv_real(dd
, d
, dddirBackward
,
851 buf
, ind
->nsend
[nzone
+1],
852 rbuf
, ind
->nrecv
[nzone
+1]);
856 for(zone
=0; zone
<nzone
; zone
++)
858 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
865 nat_tot
+= ind
->nrecv
[nzone
+1];
871 void dd_atom_sum_real(gmx_domdec_t
*dd
,real v
[])
873 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
875 gmx_domdec_comm_t
*comm
;
876 gmx_domdec_comm_dim_t
*cd
;
877 gmx_domdec_ind_t
*ind
;
882 cgindex
= dd
->cgindex
;
884 buf
= &comm
->vbuf
.v
[0][0];
887 nzone
= comm
->zones
.n
/2;
888 nat_tot
= dd
->nat_tot
;
889 for(d
=dd
->ndim
-1; d
>=0; d
--)
892 for(p
=cd
->np
-1; p
>=0; p
--) {
894 nat_tot
-= ind
->nrecv
[nzone
+1];
901 sbuf
= &comm
->vbuf2
.v
[0][0];
903 for(zone
=0; zone
<nzone
; zone
++)
905 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
912 /* Communicate the forces */
913 dd_sendrecv_real(dd
, d
, dddirForward
,
914 sbuf
, ind
->nrecv
[nzone
+1],
915 buf
, ind
->nsend
[nzone
+1]);
917 /* Add the received forces */
919 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
921 at0
= cgindex
[index
[i
]];
922 at1
= cgindex
[index
[i
]+1];
923 for(j
=at0
; j
<at1
; j
++)
934 static void print_ddzone(FILE *fp
,int d
,int i
,int j
,gmx_ddzone_t
*zone
)
936 fprintf(fp
,"zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
938 zone
->min0
,zone
->max1
,
939 zone
->mch0
,zone
->mch0
,
940 zone
->p1_0
,zone
->p1_1
);
943 static void dd_sendrecv_ddzone(const gmx_domdec_t
*dd
,
944 int ddimind
,int direction
,
945 gmx_ddzone_t
*buf_s
,int n_s
,
946 gmx_ddzone_t
*buf_r
,int n_r
)
948 rvec vbuf_s
[5*2],vbuf_r
[5*2];
953 vbuf_s
[i
*2 ][0] = buf_s
[i
].min0
;
954 vbuf_s
[i
*2 ][1] = buf_s
[i
].max1
;
955 vbuf_s
[i
*2 ][2] = buf_s
[i
].mch0
;
956 vbuf_s
[i
*2+1][0] = buf_s
[i
].mch1
;
957 vbuf_s
[i
*2+1][1] = buf_s
[i
].p1_0
;
958 vbuf_s
[i
*2+1][2] = buf_s
[i
].p1_1
;
961 dd_sendrecv_rvec(dd
, ddimind
, direction
,
967 buf_r
[i
].min0
= vbuf_r
[i
*2 ][0];
968 buf_r
[i
].max1
= vbuf_r
[i
*2 ][1];
969 buf_r
[i
].mch0
= vbuf_r
[i
*2 ][2];
970 buf_r
[i
].mch1
= vbuf_r
[i
*2+1][0];
971 buf_r
[i
].p1_0
= vbuf_r
[i
*2+1][1];
972 buf_r
[i
].p1_1
= vbuf_r
[i
*2+1][2];
976 static void dd_move_cellx(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
,
977 rvec cell_ns_x0
,rvec cell_ns_x1
)
979 int d
,d1
,dim
,dim1
,pos
,buf_size
,i
,j
,k
,p
,npulse
,npulse_min
;
980 gmx_ddzone_t
*zp
,buf_s
[5],buf_r
[5],buf_e
[5];
981 rvec extr_s
[2],extr_r
[2];
984 gmx_domdec_comm_t
*comm
;
989 for(d
=1; d
<dd
->ndim
; d
++)
992 zp
= (d
== 1) ? &comm
->zone_d1
[0] : &comm
->zone_d2
[0][0];
993 zp
->min0
= cell_ns_x0
[dim
];
994 zp
->max1
= cell_ns_x1
[dim
];
995 zp
->mch0
= cell_ns_x0
[dim
];
996 zp
->mch1
= cell_ns_x1
[dim
];
997 zp
->p1_0
= cell_ns_x0
[dim
];
998 zp
->p1_1
= cell_ns_x1
[dim
];
1001 for(d
=dd
->ndim
-2; d
>=0; d
--)
1004 bPBC
= (dim
< ddbox
->npbcdim
);
1006 /* Use an rvec to store two reals */
1007 extr_s
[d
][0] = comm
->cell_f0
[d
+1];
1008 extr_s
[d
][1] = comm
->cell_f1
[d
+1];
1012 /* Store the extremes in the backward sending buffer,
1013 * so the get updated separately from the forward communication.
1015 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1017 /* We invert the order to be able to use the same loop for buf_e */
1018 buf_s
[pos
].min0
= extr_s
[d1
][1];
1019 buf_s
[pos
].max1
= extr_s
[d1
][0];
1020 buf_s
[pos
].mch0
= 0;
1021 buf_s
[pos
].mch1
= 0;
1022 /* Store the cell corner of the dimension we communicate along */
1023 buf_s
[pos
].p1_0
= comm
->cell_x0
[dim
];
1024 buf_s
[pos
].p1_1
= 0;
1028 buf_s
[pos
] = (dd
->ndim
== 2) ? comm
->zone_d1
[0] : comm
->zone_d2
[0][0];
1031 if (dd
->ndim
== 3 && d
== 0)
1033 buf_s
[pos
] = comm
->zone_d2
[0][1];
1035 buf_s
[pos
] = comm
->zone_d1
[0];
1039 /* We only need to communicate the extremes
1040 * in the forward direction
1042 npulse
= comm
->cd
[d
].np
;
1045 /* Take the minimum to avoid double communication */
1046 npulse_min
= min(npulse
,dd
->nc
[dim
]-1-npulse
);
1050 /* Without PBC we should really not communicate over
1051 * the boundaries, but implementing that complicates
1052 * the communication setup and therefore we simply
1053 * do all communication, but ignore some data.
1055 npulse_min
= npulse
;
1057 for(p
=0; p
<npulse_min
; p
++)
1059 /* Communicate the extremes forward */
1060 bUse
= (bPBC
|| dd
->ci
[dim
] > 0);
1062 dd_sendrecv_rvec(dd
, d
, dddirForward
,
1063 extr_s
+d
, dd
->ndim
-d
-1,
1064 extr_r
+d
, dd
->ndim
-d
-1);
1068 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1070 extr_s
[d1
][0] = max(extr_s
[d1
][0],extr_r
[d1
][0]);
1071 extr_s
[d1
][1] = min(extr_s
[d1
][1],extr_r
[d1
][1]);
1077 for(p
=0; p
<npulse
; p
++)
1079 /* Communicate all the zone information backward */
1080 bUse
= (bPBC
|| dd
->ci
[dim
] < dd
->nc
[dim
] - 1);
1082 dd_sendrecv_ddzone(dd
, d
, dddirBackward
,
1089 for(d1
=d
+1; d1
<dd
->ndim
; d1
++)
1091 /* Determine the decrease of maximum required
1092 * communication height along d1 due to the distance along d,
1093 * this avoids a lot of useless atom communication.
1095 dist_d
= comm
->cell_x1
[dim
] - buf_r
[0].p1_0
;
1097 if (ddbox
->tric_dir
[dim
])
1099 /* c is the off-diagonal coupling between the cell planes
1100 * along directions d and d1.
1102 c
= ddbox
->v
[dim
][dd
->dim
[d1
]][dim
];
1108 det
= (1 + c
*c
)*comm
->cutoff
*comm
->cutoff
- dist_d
*dist_d
;
1111 dh
[d1
] = comm
->cutoff
- (c
*dist_d
+ sqrt(det
))/(1 + c
*c
);
1115 /* A negative value signals out of range */
1121 /* Accumulate the extremes over all pulses */
1122 for(i
=0; i
<buf_size
; i
++)
1126 buf_e
[i
] = buf_r
[i
];
1132 buf_e
[i
].min0
= min(buf_e
[i
].min0
,buf_r
[i
].min0
);
1133 buf_e
[i
].max1
= max(buf_e
[i
].max1
,buf_r
[i
].max1
);
1136 if (dd
->ndim
== 3 && d
== 0 && i
== buf_size
- 1)
1144 if (bUse
&& dh
[d1
] >= 0)
1146 buf_e
[i
].mch0
= max(buf_e
[i
].mch0
,buf_r
[i
].mch0
-dh
[d1
]);
1147 buf_e
[i
].mch1
= max(buf_e
[i
].mch1
,buf_r
[i
].mch1
-dh
[d1
]);
1150 /* Copy the received buffer to the send buffer,
1151 * to pass the data through with the next pulse.
1153 buf_s
[i
] = buf_r
[i
];
1155 if (((bPBC
|| dd
->ci
[dim
]+npulse
< dd
->nc
[dim
]) && p
== npulse
-1) ||
1156 (!bPBC
&& dd
->ci
[dim
]+1+p
== dd
->nc
[dim
]-1))
1158 /* Store the extremes */
1161 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1163 extr_s
[d1
][1] = min(extr_s
[d1
][1],buf_e
[pos
].min0
);
1164 extr_s
[d1
][0] = max(extr_s
[d1
][0],buf_e
[pos
].max1
);
1168 if (d
== 1 || (d
== 0 && dd
->ndim
== 3))
1172 comm
->zone_d2
[1-d
][i
] = buf_e
[pos
];
1178 comm
->zone_d1
[1] = buf_e
[pos
];
1192 print_ddzone(debug
,1,i
,0,&comm
->zone_d1
[i
]);
1194 cell_ns_x0
[dim
] = min(cell_ns_x0
[dim
],comm
->zone_d1
[i
].min0
);
1195 cell_ns_x1
[dim
] = max(cell_ns_x1
[dim
],comm
->zone_d1
[i
].max1
);
1207 print_ddzone(debug
,2,i
,j
,&comm
->zone_d2
[i
][j
]);
1209 cell_ns_x0
[dim
] = min(cell_ns_x0
[dim
],comm
->zone_d2
[i
][j
].min0
);
1210 cell_ns_x1
[dim
] = max(cell_ns_x1
[dim
],comm
->zone_d2
[i
][j
].max1
);
1214 for(d
=1; d
<dd
->ndim
; d
++)
1216 comm
->cell_f_max0
[d
] = extr_s
[d
-1][0];
1217 comm
->cell_f_min1
[d
] = extr_s
[d
-1][1];
1220 fprintf(debug
,"Cell fraction d %d, max0 %f, min1 %f\n",
1221 d
,comm
->cell_f_max0
[d
],comm
->cell_f_min1
[d
]);
1226 static void dd_collect_cg(gmx_domdec_t
*dd
,
1227 t_state
*state_local
)
1229 gmx_domdec_master_t
*ma
=NULL
;
1230 int buf2
[2],*ibuf
,i
,ncg_home
=0,*cg
=NULL
,nat_home
=0;
1233 if (state_local
->ddp_count
== dd
->comm
->master_cg_ddp_count
)
1235 /* The master has the correct distribution */
1239 if (state_local
->ddp_count
== dd
->ddp_count
)
1241 ncg_home
= dd
->ncg_home
;
1243 nat_home
= dd
->nat_home
;
1245 else if (state_local
->ddp_count_cg_gl
== state_local
->ddp_count
)
1247 cgs_gl
= &dd
->comm
->cgs_gl
;
1249 ncg_home
= state_local
->ncg_gl
;
1250 cg
= state_local
->cg_gl
;
1252 for(i
=0; i
<ncg_home
; i
++)
1254 nat_home
+= cgs_gl
->index
[cg
[i
]+1] - cgs_gl
->index
[cg
[i
]];
1259 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1262 buf2
[0] = dd
->ncg_home
;
1263 buf2
[1] = dd
->nat_home
;
1273 /* Collect the charge group and atom counts on the master */
1274 dd_gather(dd
,2*sizeof(int),buf2
,ibuf
);
1279 for(i
=0; i
<dd
->nnodes
; i
++)
1281 ma
->ncg
[i
] = ma
->ibuf
[2*i
];
1282 ma
->nat
[i
] = ma
->ibuf
[2*i
+1];
1283 ma
->index
[i
+1] = ma
->index
[i
] + ma
->ncg
[i
];
1286 /* Make byte counts and indices */
1287 for(i
=0; i
<dd
->nnodes
; i
++)
1289 ma
->ibuf
[i
] = ma
->ncg
[i
]*sizeof(int);
1290 ma
->ibuf
[dd
->nnodes
+i
] = ma
->index
[i
]*sizeof(int);
1294 fprintf(debug
,"Initial charge group distribution: ");
1295 for(i
=0; i
<dd
->nnodes
; i
++)
1296 fprintf(debug
," %d",ma
->ncg
[i
]);
1297 fprintf(debug
,"\n");
1301 /* Collect the charge group indices on the master */
1303 dd
->ncg_home
*sizeof(int),dd
->index_gl
,
1304 DDMASTER(dd
) ? ma
->ibuf
: NULL
,
1305 DDMASTER(dd
) ? ma
->ibuf
+dd
->nnodes
: NULL
,
1306 DDMASTER(dd
) ? ma
->cg
: NULL
);
1308 dd
->comm
->master_cg_ddp_count
= state_local
->ddp_count
;
1311 static void dd_collect_vec_sendrecv(gmx_domdec_t
*dd
,
1314 gmx_domdec_master_t
*ma
;
1315 int n
,i
,c
,a
,nalloc
=0;
1324 MPI_Send(lv
,dd
->nat_home
*sizeof(rvec
),MPI_BYTE
,DDMASTERRANK(dd
),
1325 dd
->rank
,dd
->mpi_comm_all
);
1328 /* Copy the master coordinates to the global array */
1329 cgs_gl
= &dd
->comm
->cgs_gl
;
1331 n
= DDMASTERRANK(dd
);
1333 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1335 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1337 copy_rvec(lv
[a
++],v
[c
]);
1341 for(n
=0; n
<dd
->nnodes
; n
++)
1345 if (ma
->nat
[n
] > nalloc
)
1347 nalloc
= over_alloc_dd(ma
->nat
[n
]);
1351 MPI_Recv(buf
,ma
->nat
[n
]*sizeof(rvec
),MPI_BYTE
,DDRANK(dd
,n
),
1352 n
,dd
->mpi_comm_all
,MPI_STATUS_IGNORE
);
1355 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1357 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1359 copy_rvec(buf
[a
++],v
[c
]);
1368 static void get_commbuffer_counts(gmx_domdec_t
*dd
,
1369 int **counts
,int **disps
)
1371 gmx_domdec_master_t
*ma
;
1376 /* Make the rvec count and displacment arrays */
1378 *disps
= ma
->ibuf
+ dd
->nnodes
;
1379 for(n
=0; n
<dd
->nnodes
; n
++)
1381 (*counts
)[n
] = ma
->nat
[n
]*sizeof(rvec
);
1382 (*disps
)[n
] = (n
== 0 ? 0 : (*disps
)[n
-1] + (*counts
)[n
-1]);
1386 static void dd_collect_vec_gatherv(gmx_domdec_t
*dd
,
1389 gmx_domdec_master_t
*ma
;
1390 int *rcounts
=NULL
,*disps
=NULL
;
1399 get_commbuffer_counts(dd
,&rcounts
,&disps
);
1404 dd_gatherv(dd
,dd
->nat_home
*sizeof(rvec
),lv
,rcounts
,disps
,buf
);
1408 cgs_gl
= &dd
->comm
->cgs_gl
;
1411 for(n
=0; n
<dd
->nnodes
; n
++)
1413 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1415 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1417 copy_rvec(buf
[a
++],v
[c
]);
1424 void dd_collect_vec(gmx_domdec_t
*dd
,
1425 t_state
*state_local
,rvec
*lv
,rvec
*v
)
1427 gmx_domdec_master_t
*ma
;
1428 int n
,i
,c
,a
,nalloc
=0;
1431 dd_collect_cg(dd
,state_local
);
1433 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
1435 dd_collect_vec_sendrecv(dd
,lv
,v
);
1439 dd_collect_vec_gatherv(dd
,lv
,v
);
1444 void dd_collect_state(gmx_domdec_t
*dd
,
1445 t_state
*state_local
,t_state
*state
)
1449 nh
= state
->nhchainlength
;
1453 state
->lambda
= state_local
->lambda
;
1454 state
->veta
= state_local
->veta
;
1455 state
->vol0
= state_local
->vol0
;
1456 copy_mat(state_local
->box
,state
->box
);
1457 copy_mat(state_local
->boxv
,state
->boxv
);
1458 copy_mat(state_local
->svir_prev
,state
->svir_prev
);
1459 copy_mat(state_local
->fvir_prev
,state
->fvir_prev
);
1460 copy_mat(state_local
->pres_prev
,state
->pres_prev
);
1463 for(i
=0; i
<state_local
->ngtc
; i
++)
1465 for(j
=0; j
<nh
; j
++) {
1466 state
->nosehoover_xi
[i
*nh
+j
] = state_local
->nosehoover_xi
[i
*nh
+j
];
1467 state
->nosehoover_vxi
[i
*nh
+j
] = state_local
->nosehoover_vxi
[i
*nh
+j
];
1469 state
->therm_integral
[i
] = state_local
->therm_integral
[i
];
1471 for(i
=0; i
<state_local
->nnhpres
; i
++)
1473 for(j
=0; j
<nh
; j
++) {
1474 state
->nhpres_xi
[i
*nh
+j
] = state_local
->nhpres_xi
[i
*nh
+j
];
1475 state
->nhpres_vxi
[i
*nh
+j
] = state_local
->nhpres_vxi
[i
*nh
+j
];
1479 for(est
=0; est
<estNR
; est
++)
1481 if (EST_DISTR(est
) && state_local
->flags
& (1<<est
))
1485 dd_collect_vec(dd
,state_local
,state_local
->x
,state
->x
);
1488 dd_collect_vec(dd
,state_local
,state_local
->v
,state
->v
);
1491 dd_collect_vec(dd
,state_local
,state_local
->sd_X
,state
->sd_X
);
1494 dd_collect_vec(dd
,state_local
,state_local
->cg_p
,state
->cg_p
);
1497 if (state
->nrngi
== 1)
1501 for(i
=0; i
<state_local
->nrng
; i
++)
1503 state
->ld_rng
[i
] = state_local
->ld_rng
[i
];
1509 dd_gather(dd
,state_local
->nrng
*sizeof(state
->ld_rng
[0]),
1510 state_local
->ld_rng
,state
->ld_rng
);
1514 if (state
->nrngi
== 1)
1518 state
->ld_rngi
[0] = state_local
->ld_rngi
[0];
1523 dd_gather(dd
,sizeof(state
->ld_rngi
[0]),
1524 state_local
->ld_rngi
,state
->ld_rngi
);
1527 case estDISRE_INITF
:
1528 case estDISRE_RM3TAV
:
1529 case estORIRE_INITF
:
1533 gmx_incons("Unknown state entry encountered in dd_collect_state");
1539 static void dd_realloc_fr_cg(t_forcerec
*fr
,int nalloc
)
1543 fprintf(debug
,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr
->cg_nalloc
,nalloc
,over_alloc_dd(nalloc
));
1545 fr
->cg_nalloc
= over_alloc_dd(nalloc
);
1546 srenew(fr
->cg_cm
,fr
->cg_nalloc
);
1547 srenew(fr
->cginfo
,fr
->cg_nalloc
);
1550 static void dd_realloc_state(t_state
*state
,rvec
**f
,int nalloc
)
1556 fprintf(debug
,"Reallocating state: currently %d, required %d, allocating %d\n",state
->nalloc
,nalloc
,over_alloc_dd(nalloc
));
1559 state
->nalloc
= over_alloc_dd(nalloc
);
1561 for(est
=0; est
<estNR
; est
++)
1563 if (EST_DISTR(est
) && state
->flags
& (1<<est
))
1567 srenew(state
->x
,state
->nalloc
);
1570 srenew(state
->v
,state
->nalloc
);
1573 srenew(state
->sd_X
,state
->nalloc
);
1576 srenew(state
->cg_p
,state
->nalloc
);
1580 case estDISRE_INITF
:
1581 case estDISRE_RM3TAV
:
1582 case estORIRE_INITF
:
1584 /* No reallocation required */
1587 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1594 srenew(*f
,state
->nalloc
);
1598 static void dd_distribute_vec_sendrecv(gmx_domdec_t
*dd
,t_block
*cgs
,
1601 gmx_domdec_master_t
*ma
;
1602 int n
,i
,c
,a
,nalloc
=0;
1609 for(n
=0; n
<dd
->nnodes
; n
++)
1613 if (ma
->nat
[n
] > nalloc
)
1615 nalloc
= over_alloc_dd(ma
->nat
[n
]);
1618 /* Use lv as a temporary buffer */
1620 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1622 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1624 copy_rvec(v
[c
],buf
[a
++]);
1627 if (a
!= ma
->nat
[n
])
1629 gmx_fatal(FARGS
,"Internal error a (%d) != nat (%d)",
1634 MPI_Send(buf
,ma
->nat
[n
]*sizeof(rvec
),MPI_BYTE
,
1635 DDRANK(dd
,n
),n
,dd
->mpi_comm_all
);
1640 n
= DDMASTERRANK(dd
);
1642 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1644 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1646 copy_rvec(v
[c
],lv
[a
++]);
1653 MPI_Recv(lv
,dd
->nat_home
*sizeof(rvec
),MPI_BYTE
,DDMASTERRANK(dd
),
1654 MPI_ANY_TAG
,dd
->mpi_comm_all
,MPI_STATUS_IGNORE
);
1659 static void dd_distribute_vec_scatterv(gmx_domdec_t
*dd
,t_block
*cgs
,
1662 gmx_domdec_master_t
*ma
;
1663 int *scounts
=NULL
,*disps
=NULL
;
1664 int n
,i
,c
,a
,nalloc
=0;
1671 get_commbuffer_counts(dd
,&scounts
,&disps
);
1675 for(n
=0; n
<dd
->nnodes
; n
++)
1677 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1679 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1681 copy_rvec(v
[c
],buf
[a
++]);
1687 dd_scatterv(dd
,scounts
,disps
,buf
,dd
->nat_home
*sizeof(rvec
),lv
);
1690 static void dd_distribute_vec(gmx_domdec_t
*dd
,t_block
*cgs
,rvec
*v
,rvec
*lv
)
1692 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
1694 dd_distribute_vec_sendrecv(dd
,cgs
,v
,lv
);
1698 dd_distribute_vec_scatterv(dd
,cgs
,v
,lv
);
1702 static void dd_distribute_state(gmx_domdec_t
*dd
,t_block
*cgs
,
1703 t_state
*state
,t_state
*state_local
,
1706 int i
,j
,ngtch
,ngtcp
,nh
;
1708 nh
= state
->nhchainlength
;
1712 state_local
->lambda
= state
->lambda
;
1713 state_local
->veta
= state
->veta
;
1714 state_local
->vol0
= state
->vol0
;
1715 copy_mat(state
->box
,state_local
->box
);
1716 copy_mat(state
->box_rel
,state_local
->box_rel
);
1717 copy_mat(state
->boxv
,state_local
->boxv
);
1718 copy_mat(state
->svir_prev
,state_local
->svir_prev
);
1719 copy_mat(state
->fvir_prev
,state_local
->fvir_prev
);
1720 for(i
=0; i
<state_local
->ngtc
; i
++)
1722 for(j
=0; j
<nh
; j
++) {
1723 state_local
->nosehoover_xi
[i
*nh
+j
] = state
->nosehoover_xi
[i
*nh
+j
];
1724 state_local
->nosehoover_vxi
[i
*nh
+j
] = state
->nosehoover_vxi
[i
*nh
+j
];
1726 state_local
->therm_integral
[i
] = state
->therm_integral
[i
];
1728 for(i
=0; i
<state_local
->nnhpres
; i
++)
1730 for(j
=0; j
<nh
; j
++) {
1731 state_local
->nhpres_xi
[i
*nh
+j
] = state
->nhpres_xi
[i
*nh
+j
];
1732 state_local
->nhpres_vxi
[i
*nh
+j
] = state
->nhpres_vxi
[i
*nh
+j
];
1736 dd_bcast(dd
,sizeof(real
),&state_local
->lambda
);
1737 dd_bcast(dd
,sizeof(real
),&state_local
->veta
);
1738 dd_bcast(dd
,sizeof(real
),&state_local
->vol0
);
1739 dd_bcast(dd
,sizeof(state_local
->box
),state_local
->box
);
1740 dd_bcast(dd
,sizeof(state_local
->box_rel
),state_local
->box_rel
);
1741 dd_bcast(dd
,sizeof(state_local
->boxv
),state_local
->boxv
);
1742 dd_bcast(dd
,sizeof(state_local
->svir_prev
),state_local
->svir_prev
);
1743 dd_bcast(dd
,sizeof(state_local
->fvir_prev
),state_local
->fvir_prev
);
1744 dd_bcast(dd
,((state_local
->ngtc
*nh
)*sizeof(double)),state_local
->nosehoover_xi
);
1745 dd_bcast(dd
,((state_local
->ngtc
*nh
)*sizeof(double)),state_local
->nosehoover_vxi
);
1746 dd_bcast(dd
,state_local
->ngtc
*sizeof(double),state_local
->therm_integral
);
1747 dd_bcast(dd
,((state_local
->nnhpres
*nh
)*sizeof(double)),state_local
->nhpres_xi
);
1748 dd_bcast(dd
,((state_local
->nnhpres
*nh
)*sizeof(double)),state_local
->nhpres_vxi
);
1750 if (dd
->nat_home
> state_local
->nalloc
)
1752 dd_realloc_state(state_local
,f
,dd
->nat_home
);
1754 for(i
=0; i
<estNR
; i
++)
1756 if (EST_DISTR(i
) && state_local
->flags
& (1<<i
))
1760 dd_distribute_vec(dd
,cgs
,state
->x
,state_local
->x
);
1763 dd_distribute_vec(dd
,cgs
,state
->v
,state_local
->v
);
1766 dd_distribute_vec(dd
,cgs
,state
->sd_X
,state_local
->sd_X
);
1769 dd_distribute_vec(dd
,cgs
,state
->cg_p
,state_local
->cg_p
);
1772 if (state
->nrngi
== 1)
1775 state_local
->nrng
*sizeof(state_local
->ld_rng
[0]),
1776 state
->ld_rng
,state_local
->ld_rng
);
1781 state_local
->nrng
*sizeof(state_local
->ld_rng
[0]),
1782 state
->ld_rng
,state_local
->ld_rng
);
1786 if (state
->nrngi
== 1)
1788 dd_bcastc(dd
,sizeof(state_local
->ld_rngi
[0]),
1789 state
->ld_rngi
,state_local
->ld_rngi
);
1793 dd_scatter(dd
,sizeof(state_local
->ld_rngi
[0]),
1794 state
->ld_rngi
,state_local
->ld_rngi
);
1797 case estDISRE_INITF
:
1798 case estDISRE_RM3TAV
:
1799 case estORIRE_INITF
:
1801 /* Not implemented yet */
1804 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1810 static char dim2char(int dim
)
1816 case XX
: c
= 'X'; break;
1817 case YY
: c
= 'Y'; break;
1818 case ZZ
: c
= 'Z'; break;
1819 default: gmx_fatal(FARGS
,"Unknown dim %d",dim
);
1825 static void write_dd_grid_pdb(const char *fn
,gmx_large_int_t step
,
1826 gmx_domdec_t
*dd
,matrix box
,gmx_ddbox_t
*ddbox
)
1828 rvec grid_s
[2],*grid_r
=NULL
,cx
,r
;
1829 char fname
[STRLEN
],format
[STRLEN
],buf
[22];
1835 copy_rvec(dd
->comm
->cell_x0
,grid_s
[0]);
1836 copy_rvec(dd
->comm
->cell_x1
,grid_s
[1]);
1840 snew(grid_r
,2*dd
->nnodes
);
1843 dd_gather(dd
,2*sizeof(rvec
),grid_s
[0],DDMASTER(dd
) ? grid_r
[0] : NULL
);
1847 for(d
=0; d
<DIM
; d
++)
1849 for(i
=0; i
<DIM
; i
++)
1857 if (dd
->nc
[d
] > 1 && d
< ddbox
->npbcdim
)
1859 tric
[d
][i
] = box
[i
][d
]/box
[i
][i
];
1868 sprintf(fname
,"%s_%s.pdb",fn
,gmx_step_str(step
,buf
));
1869 sprintf(format
,"%s%s\n",pdbformat
,"%6.2f%6.2f");
1870 out
= gmx_fio_fopen(fname
,"w");
1871 gmx_write_pdb_box(out
,dd
->bScrewPBC
? epbcSCREW
: epbcXYZ
,box
);
1873 for(i
=0; i
<dd
->nnodes
; i
++)
1875 vol
= dd
->nnodes
/(box
[XX
][XX
]*box
[YY
][YY
]*box
[ZZ
][ZZ
]);
1876 for(d
=0; d
<DIM
; d
++)
1878 vol
*= grid_r
[i
*2+1][d
] - grid_r
[i
*2][d
];
1886 cx
[XX
] = grid_r
[i
*2+x
][XX
];
1887 cx
[YY
] = grid_r
[i
*2+y
][YY
];
1888 cx
[ZZ
] = grid_r
[i
*2+z
][ZZ
];
1890 fprintf(out
,format
,"ATOM",a
++,"CA","GLY",' ',1+i
,
1891 10*r
[XX
],10*r
[YY
],10*r
[ZZ
],1.0,vol
);
1895 for(d
=0; d
<DIM
; d
++)
1901 case 0: y
= 1 + i
*8 + 2*x
; break;
1902 case 1: y
= 1 + i
*8 + 2*x
- (x
% 2); break;
1903 case 2: y
= 1 + i
*8 + x
; break;
1905 fprintf(out
,"%6s%5d%5d\n","CONECT",y
,y
+(1<<d
));
1909 gmx_fio_fclose(out
);
1914 void write_dd_pdb(const char *fn
,gmx_large_int_t step
,const char *title
,
1915 gmx_mtop_t
*mtop
,t_commrec
*cr
,
1916 int natoms
,rvec x
[],matrix box
)
1918 char fname
[STRLEN
],format
[STRLEN
],format4
[STRLEN
],buf
[22];
1921 char *atomname
,*resname
;
1928 natoms
= dd
->comm
->nat
[ddnatVSITE
];
1931 sprintf(fname
,"%s_%s_n%d.pdb",fn
,gmx_step_str(step
,buf
),cr
->sim_nodeid
);
1933 sprintf(format
,"%s%s\n",pdbformat
,"%6.2f%6.2f");
1934 sprintf(format4
,"%s%s\n",pdbformat4
,"%6.2f%6.2f");
1936 out
= gmx_fio_fopen(fname
,"w");
1938 fprintf(out
,"TITLE %s\n",title
);
1939 gmx_write_pdb_box(out
,dd
->bScrewPBC
? epbcSCREW
: epbcXYZ
,box
);
1940 for(i
=0; i
<natoms
; i
++)
1942 ii
= dd
->gatindex
[i
];
1943 gmx_mtop_atominfo_global(mtop
,ii
,&atomname
,&resnr
,&resname
);
1944 if (i
< dd
->comm
->nat
[ddnatZONE
])
1947 while (i
>= dd
->cgindex
[dd
->comm
->zones
.cg_range
[c
+1]])
1953 else if (i
< dd
->comm
->nat
[ddnatVSITE
])
1955 b
= dd
->comm
->zones
.n
;
1959 b
= dd
->comm
->zones
.n
+ 1;
1961 fprintf(out
,strlen(atomname
)<4 ? format
: format4
,
1962 "ATOM",(ii
+1)%100000,
1963 atomname
,resname
,' ',resnr
%10000,' ',
1964 10*x
[i
][XX
],10*x
[i
][YY
],10*x
[i
][ZZ
],1.0,b
);
1966 fprintf(out
,"TER\n");
1968 gmx_fio_fclose(out
);
1971 real
dd_cutoff_mbody(gmx_domdec_t
*dd
)
1973 gmx_domdec_comm_t
*comm
;
1980 if (comm
->bInterCGBondeds
)
1982 if (comm
->cutoff_mbody
> 0)
1984 r
= comm
->cutoff_mbody
;
1988 /* cutoff_mbody=0 means we do not have DLB */
1989 r
= comm
->cellsize_min
[dd
->dim
[0]];
1990 for(di
=1; di
<dd
->ndim
; di
++)
1992 r
= min(r
,comm
->cellsize_min
[dd
->dim
[di
]]);
1994 if (comm
->bBondComm
)
1996 r
= max(r
,comm
->cutoff_mbody
);
2000 r
= min(r
,comm
->cutoff
);
2008 real
dd_cutoff_twobody(gmx_domdec_t
*dd
)
2012 r_mb
= dd_cutoff_mbody(dd
);
2014 return max(dd
->comm
->cutoff
,r_mb
);
2018 static void dd_cart_coord2pmecoord(gmx_domdec_t
*dd
,ivec coord
,ivec coord_pme
)
2022 nc
= dd
->nc
[dd
->comm
->cartpmedim
];
2023 ntot
= dd
->comm
->ntot
[dd
->comm
->cartpmedim
];
2024 copy_ivec(coord
,coord_pme
);
2025 coord_pme
[dd
->comm
->cartpmedim
] =
2026 nc
+ (coord
[dd
->comm
->cartpmedim
]*(ntot
- nc
) + (ntot
- nc
)/2)/nc
;
2029 static int low_ddindex2pmeindex(int ndd
,int npme
,int ddindex
)
2031 /* Here we assign a PME node to communicate with this DD node
2032 * by assuming that the major index of both is x.
2033 * We add cr->npmenodes/2 to obtain an even distribution.
2035 return (ddindex
*npme
+ npme
/2)/ndd
;
2038 static int ddindex2pmeindex(const gmx_domdec_t
*dd
,int ddindex
)
2040 return low_ddindex2pmeindex(dd
->nnodes
,dd
->comm
->npmenodes
,ddindex
);
2043 static int cr_ddindex2pmeindex(const t_commrec
*cr
,int ddindex
)
2045 return low_ddindex2pmeindex(cr
->dd
->nnodes
,cr
->npmenodes
,ddindex
);
2048 static int *dd_pmenodes(t_commrec
*cr
)
2053 snew(pmenodes
,cr
->npmenodes
);
2055 for(i
=0; i
<cr
->dd
->nnodes
; i
++) {
2056 p0
= cr_ddindex2pmeindex(cr
,i
);
2057 p1
= cr_ddindex2pmeindex(cr
,i
+1);
2058 if (i
+1 == cr
->dd
->nnodes
|| p1
> p0
) {
2060 fprintf(debug
,"pmenode[%d] = %d\n",n
,i
+1+n
);
2061 pmenodes
[n
] = i
+ 1 + n
;
2069 static int gmx_ddcoord2pmeindex(t_commrec
*cr
,int x
,int y
,int z
)
2072 ivec coords
,coords_pme
,nc
;
2077 if (dd->comm->bCartesian) {
2078 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2079 dd_coords2pmecoords(dd,coords,coords_pme);
2080 copy_ivec(dd->ntot,nc);
2081 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2082 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2084 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2086 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2092 slab
= ddindex2pmeindex(dd
,dd_index(dd
->nc
,coords
));
2097 static int ddcoord2simnodeid(t_commrec
*cr
,int x
,int y
,int z
)
2099 gmx_domdec_comm_t
*comm
;
2101 int ddindex
,nodeid
=-1;
2103 comm
= cr
->dd
->comm
;
2108 if (comm
->bCartesianPP_PME
)
2111 MPI_Cart_rank(cr
->mpi_comm_mysim
,coords
,&nodeid
);
2116 ddindex
= dd_index(cr
->dd
->nc
,coords
);
2117 if (comm
->bCartesianPP
)
2119 nodeid
= comm
->ddindex2simnodeid
[ddindex
];
2125 nodeid
= ddindex
+ gmx_ddcoord2pmeindex(cr
,x
,y
,z
);
2137 static int dd_simnode2pmenode(t_commrec
*cr
,int sim_nodeid
)
2140 gmx_domdec_comm_t
*comm
;
2141 ivec coord
,coord_pme
;
2148 /* This assumes a uniform x domain decomposition grid cell size */
2149 if (comm
->bCartesianPP_PME
)
2152 MPI_Cart_coords(cr
->mpi_comm_mysim
,sim_nodeid
,DIM
,coord
);
2153 if (coord
[comm
->cartpmedim
] < dd
->nc
[comm
->cartpmedim
])
2155 /* This is a PP node */
2156 dd_cart_coord2pmecoord(dd
,coord
,coord_pme
);
2157 MPI_Cart_rank(cr
->mpi_comm_mysim
,coord_pme
,&pmenode
);
2161 else if (comm
->bCartesianPP
)
2163 if (sim_nodeid
< dd
->nnodes
)
2165 pmenode
= dd
->nnodes
+ ddindex2pmeindex(dd
,sim_nodeid
);
2170 /* This assumes DD cells with identical x coordinates
2171 * are numbered sequentially.
2173 if (dd
->comm
->pmenodes
== NULL
)
2175 if (sim_nodeid
< dd
->nnodes
)
2177 /* The DD index equals the nodeid */
2178 pmenode
= dd
->nnodes
+ ddindex2pmeindex(dd
,sim_nodeid
);
2184 while (sim_nodeid
> dd
->comm
->pmenodes
[i
])
2188 if (sim_nodeid
< dd
->comm
->pmenodes
[i
])
2190 pmenode
= dd
->comm
->pmenodes
[i
];
2198 gmx_bool
gmx_pmeonlynode(t_commrec
*cr
,int sim_nodeid
)
2200 gmx_bool bPMEOnlyNode
;
2202 if (DOMAINDECOMP(cr
))
2204 bPMEOnlyNode
= (dd_simnode2pmenode(cr
,sim_nodeid
) == -1);
2208 bPMEOnlyNode
= FALSE
;
2211 return bPMEOnlyNode
;
2214 void get_pme_ddnodes(t_commrec
*cr
,int pmenodeid
,
2215 int *nmy_ddnodes
,int **my_ddnodes
,int *node_peer
)
2219 ivec coord
,coord_pme
;
2223 snew(*my_ddnodes
,(dd
->nnodes
+cr
->npmenodes
-1)/cr
->npmenodes
);
2226 for(x
=0; x
<dd
->nc
[XX
]; x
++)
2228 for(y
=0; y
<dd
->nc
[YY
]; y
++)
2230 for(z
=0; z
<dd
->nc
[ZZ
]; z
++)
2232 if (dd
->comm
->bCartesianPP_PME
)
2237 dd_cart_coord2pmecoord(dd
,coord
,coord_pme
);
2238 if (dd
->ci
[XX
] == coord_pme
[XX
] &&
2239 dd
->ci
[YY
] == coord_pme
[YY
] &&
2240 dd
->ci
[ZZ
] == coord_pme
[ZZ
])
2241 (*my_ddnodes
)[(*nmy_ddnodes
)++] = ddcoord2simnodeid(cr
,x
,y
,z
);
2245 /* The slab corresponds to the nodeid in the PME group */
2246 if (gmx_ddcoord2pmeindex(cr
,x
,y
,z
) == pmenodeid
)
2248 (*my_ddnodes
)[(*nmy_ddnodes
)++] = ddcoord2simnodeid(cr
,x
,y
,z
);
2255 /* The last PP-only node is the peer node */
2256 *node_peer
= (*my_ddnodes
)[*nmy_ddnodes
-1];
2260 fprintf(debug
,"Receive coordinates from PP nodes:");
2261 for(x
=0; x
<*nmy_ddnodes
; x
++)
2263 fprintf(debug
," %d",(*my_ddnodes
)[x
]);
2265 fprintf(debug
,"\n");
2269 static gmx_bool
receive_vir_ener(t_commrec
*cr
)
2271 gmx_domdec_comm_t
*comm
;
2272 int pmenode
,coords
[DIM
],rank
;
2276 if (cr
->npmenodes
< cr
->dd
->nnodes
)
2278 comm
= cr
->dd
->comm
;
2279 if (comm
->bCartesianPP_PME
)
2281 pmenode
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
2283 MPI_Cart_coords(cr
->mpi_comm_mysim
,cr
->sim_nodeid
,DIM
,coords
);
2284 coords
[comm
->cartpmedim
]++;
2285 if (coords
[comm
->cartpmedim
] < cr
->dd
->nc
[comm
->cartpmedim
])
2287 MPI_Cart_rank(cr
->mpi_comm_mysim
,coords
,&rank
);
2288 if (dd_simnode2pmenode(cr
,rank
) == pmenode
)
2290 /* This is not the last PP node for pmenode */
2298 pmenode
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
2299 if (cr
->sim_nodeid
+1 < cr
->nnodes
&&
2300 dd_simnode2pmenode(cr
,cr
->sim_nodeid
+1) == pmenode
)
2302 /* This is not the last PP node for pmenode */
2311 static void set_zones_ncg_home(gmx_domdec_t
*dd
)
2313 gmx_domdec_zones_t
*zones
;
2316 zones
= &dd
->comm
->zones
;
2318 zones
->cg_range
[0] = 0;
2319 for(i
=1; i
<zones
->n
+1; i
++)
2321 zones
->cg_range
[i
] = dd
->ncg_home
;
2325 static void rebuild_cgindex(gmx_domdec_t
*dd
,int *gcgs_index
,t_state
*state
)
2327 int nat
,i
,*ind
,*dd_cg_gl
,*cgindex
,cg_gl
;
2330 dd_cg_gl
= dd
->index_gl
;
2331 cgindex
= dd
->cgindex
;
2334 for(i
=0; i
<state
->ncg_gl
; i
++)
2338 dd_cg_gl
[i
] = cg_gl
;
2339 nat
+= gcgs_index
[cg_gl
+1] - gcgs_index
[cg_gl
];
2343 dd
->ncg_home
= state
->ncg_gl
;
2346 set_zones_ncg_home(dd
);
2349 static int ddcginfo(const cginfo_mb_t
*cginfo_mb
,int cg
)
2351 while (cg
>= cginfo_mb
->cg_end
)
2356 return cginfo_mb
->cginfo
[(cg
- cginfo_mb
->cg_start
) % cginfo_mb
->cg_mod
];
2359 static void dd_set_cginfo(int *index_gl
,int cg0
,int cg1
,
2360 t_forcerec
*fr
,char *bLocalCG
)
2362 cginfo_mb_t
*cginfo_mb
;
2368 cginfo_mb
= fr
->cginfo_mb
;
2369 cginfo
= fr
->cginfo
;
2371 for(cg
=cg0
; cg
<cg1
; cg
++)
2373 cginfo
[cg
] = ddcginfo(cginfo_mb
,index_gl
[cg
]);
2377 if (bLocalCG
!= NULL
)
2379 for(cg
=cg0
; cg
<cg1
; cg
++)
2381 bLocalCG
[index_gl
[cg
]] = TRUE
;
2386 static void make_dd_indices(gmx_domdec_t
*dd
,int *gcgs_index
,int cg_start
)
2388 int nzone
,zone
,zone1
,cg0
,cg
,cg_gl
,a
,a_gl
;
2389 int *zone2cg
,*zone_ncg1
,*index_gl
,*gatindex
;
2393 bLocalCG
= dd
->comm
->bLocalCG
;
2395 if (dd
->nat_tot
> dd
->gatindex_nalloc
)
2397 dd
->gatindex_nalloc
= over_alloc_dd(dd
->nat_tot
);
2398 srenew(dd
->gatindex
,dd
->gatindex_nalloc
);
2401 nzone
= dd
->comm
->zones
.n
;
2402 zone2cg
= dd
->comm
->zones
.cg_range
;
2403 zone_ncg1
= dd
->comm
->zone_ncg1
;
2404 index_gl
= dd
->index_gl
;
2405 gatindex
= dd
->gatindex
;
2407 if (zone2cg
[1] != dd
->ncg_home
)
2409 gmx_incons("dd->ncg_zone is not up to date");
2412 /* Make the local to global and global to local atom index */
2413 a
= dd
->cgindex
[cg_start
];
2414 for(zone
=0; zone
<nzone
; zone
++)
2422 cg0
= zone2cg
[zone
];
2424 for(cg
=cg0
; cg
<zone2cg
[zone
+1]; cg
++)
2427 if (cg
- cg0
>= zone_ncg1
[zone
])
2429 /* Signal that this cg is from more than one zone away */
2432 cg_gl
= index_gl
[cg
];
2433 for(a_gl
=gcgs_index
[cg_gl
]; a_gl
<gcgs_index
[cg_gl
+1]; a_gl
++)
2436 ga2la_set(dd
->ga2la
,a_gl
,a
,zone1
);
2443 static int check_bLocalCG(gmx_domdec_t
*dd
,int ncg_sys
,const char *bLocalCG
,
2449 if (bLocalCG
== NULL
)
2453 for(i
=0; i
<dd
->ncg_tot
; i
++)
2455 if (!bLocalCG
[dd
->index_gl
[i
]])
2458 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n",dd
->rank
,where
,i
+1,dd
->index_gl
[i
]+1,dd
->ncg_home
);
2463 for(i
=0; i
<ncg_sys
; i
++)
2470 if (ngl
!= dd
->ncg_tot
)
2472 fprintf(stderr
,"DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n",dd
->rank
,where
,ngl
,dd
->ncg_tot
);
2479 static void check_index_consistency(gmx_domdec_t
*dd
,
2480 int natoms_sys
,int ncg_sys
,
2483 int nerr
,ngl
,i
,a
,cell
;
2488 if (dd
->comm
->DD_debug
> 1)
2490 snew(have
,natoms_sys
);
2491 for(a
=0; a
<dd
->nat_tot
; a
++)
2493 if (have
[dd
->gatindex
[a
]] > 0)
2495 fprintf(stderr
,"DD node %d: global atom %d occurs twice: index %d and %d\n",dd
->rank
,dd
->gatindex
[a
]+1,have
[dd
->gatindex
[a
]],a
+1);
2499 have
[dd
->gatindex
[a
]] = a
+ 1;
2505 snew(have
,dd
->nat_tot
);
2508 for(i
=0; i
<natoms_sys
; i
++)
2510 if (ga2la_get(dd
->ga2la
,i
,&a
,&cell
))
2512 if (a
>= dd
->nat_tot
)
2514 fprintf(stderr
,"DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n",dd
->rank
,i
+1,a
+1,dd
->nat_tot
);
2520 if (dd
->gatindex
[a
] != i
)
2522 fprintf(stderr
,"DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n",dd
->rank
,i
+1,a
+1,dd
->gatindex
[a
]+1);
2529 if (ngl
!= dd
->nat_tot
)
2532 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2533 dd
->rank
,where
,ngl
,dd
->nat_tot
);
2535 for(a
=0; a
<dd
->nat_tot
; a
++)
2540 "DD node %d, %s: local atom %d, global %d has no global index\n",
2541 dd
->rank
,where
,a
+1,dd
->gatindex
[a
]+1);
2546 nerr
+= check_bLocalCG(dd
,ncg_sys
,dd
->comm
->bLocalCG
,where
);
2549 gmx_fatal(FARGS
,"DD node %d, %s: %d atom/cg index inconsistencies",
2550 dd
->rank
,where
,nerr
);
2554 static void clear_dd_indices(gmx_domdec_t
*dd
,int cg_start
,int a_start
)
2561 /* Clear the whole list without searching */
2562 ga2la_clear(dd
->ga2la
);
2566 for(i
=a_start
; i
<dd
->nat_tot
; i
++)
2568 ga2la_del(dd
->ga2la
,dd
->gatindex
[i
]);
2572 bLocalCG
= dd
->comm
->bLocalCG
;
2575 for(i
=cg_start
; i
<dd
->ncg_tot
; i
++)
2577 bLocalCG
[dd
->index_gl
[i
]] = FALSE
;
2581 dd_clear_local_vsite_indices(dd
);
2583 if (dd
->constraints
)
2585 dd_clear_local_constraint_indices(dd
);
2589 static real
grid_jump_limit(gmx_domdec_comm_t
*comm
,int dim_ind
)
2591 real grid_jump_limit
;
2593 /* The distance between the boundaries of cells at distance
2594 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2595 * and by the fact that cells should not be shifted by more than
2596 * half their size, such that cg's only shift by one cell
2597 * at redecomposition.
2599 grid_jump_limit
= comm
->cellsize_limit
;
2600 if (!comm
->bVacDLBNoLimit
)
2602 grid_jump_limit
= max(grid_jump_limit
,
2603 comm
->cutoff
/comm
->cd
[dim_ind
].np
);
2606 return grid_jump_limit
;
2609 static void check_grid_jump(gmx_large_int_t step
,gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
2611 gmx_domdec_comm_t
*comm
;
2617 for(d
=1; d
<dd
->ndim
; d
++)
2620 limit
= grid_jump_limit(comm
,d
);
2621 bfac
= ddbox
->box_size
[dim
];
2622 if (ddbox
->tric_dir
[dim
])
2624 bfac
*= ddbox
->skew_fac
[dim
];
2626 if ((comm
->cell_f1
[d
] - comm
->cell_f_max0
[d
])*bfac
< limit
||
2627 (comm
->cell_f0
[d
] - comm
->cell_f_min1
[d
])*bfac
> -limit
)
2630 gmx_fatal(FARGS
,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d\n",
2631 gmx_step_str(step
,buf
),
2632 dim2char(dim
),dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
2637 static int dd_load_count(gmx_domdec_comm_t
*comm
)
2639 return (comm
->eFlop
? comm
->flop_n
: comm
->cycl_n
[ddCyclF
]);
2642 static float dd_force_load(gmx_domdec_comm_t
*comm
)
2649 if (comm
->eFlop
> 1)
2651 load
*= 1.0 + (comm
->eFlop
- 1)*(0.1*rand()/RAND_MAX
- 0.05);
2656 load
= comm
->cycl
[ddCyclF
];
2657 if (comm
->cycl_n
[ddCyclF
] > 1)
2659 /* Subtract the maximum of the last n cycle counts
2660 * to get rid of possible high counts due to other soures,
2661 * for instance system activity, that would otherwise
2662 * affect the dynamic load balancing.
2664 load
-= comm
->cycl_max
[ddCyclF
];
2671 static void set_slb_pme_dim_f(gmx_domdec_t
*dd
,int dim
,real
**dim_f
)
2673 gmx_domdec_comm_t
*comm
;
2678 snew(*dim_f
,dd
->nc
[dim
]+1);
2680 for(i
=1; i
<dd
->nc
[dim
]; i
++)
2682 if (comm
->slb_frac
[dim
])
2684 (*dim_f
)[i
] = (*dim_f
)[i
-1] + comm
->slb_frac
[dim
][i
-1];
2688 (*dim_f
)[i
] = (real
)i
/(real
)dd
->nc
[dim
];
2691 (*dim_f
)[dd
->nc
[dim
]] = 1;
2694 static void init_ddpme(gmx_domdec_t
*dd
,gmx_ddpme_t
*ddpme
,int dimind
)
2696 int pmeindex
,slab
,nso
,i
;
2699 if (dimind
== 0 && dd
->dim
[0] == YY
&& dd
->comm
->npmenodes_x
== 1)
2705 ddpme
->dim
= dimind
;
2707 ddpme
->dim_match
= (ddpme
->dim
== dd
->dim
[dimind
]);
2709 ddpme
->nslab
= (ddpme
->dim
== 0 ?
2710 dd
->comm
->npmenodes_x
:
2711 dd
->comm
->npmenodes_y
);
2713 if (ddpme
->nslab
<= 1)
2718 nso
= dd
->comm
->npmenodes
/ddpme
->nslab
;
2719 /* Determine for each PME slab the PP location range for dimension dim */
2720 snew(ddpme
->pp_min
,ddpme
->nslab
);
2721 snew(ddpme
->pp_max
,ddpme
->nslab
);
2722 for(slab
=0; slab
<ddpme
->nslab
; slab
++) {
2723 ddpme
->pp_min
[slab
] = dd
->nc
[dd
->dim
[dimind
]] - 1;
2724 ddpme
->pp_max
[slab
] = 0;
2726 for(i
=0; i
<dd
->nnodes
; i
++) {
2727 ddindex2xyz(dd
->nc
,i
,xyz
);
2728 /* For y only use our y/z slab.
2729 * This assumes that the PME x grid size matches the DD grid size.
2731 if (dimind
== 0 || xyz
[XX
] == dd
->ci
[XX
]) {
2732 pmeindex
= ddindex2pmeindex(dd
,i
);
2734 slab
= pmeindex
/nso
;
2736 slab
= pmeindex
% ddpme
->nslab
;
2738 ddpme
->pp_min
[slab
] = min(ddpme
->pp_min
[slab
],xyz
[dimind
]);
2739 ddpme
->pp_max
[slab
] = max(ddpme
->pp_max
[slab
],xyz
[dimind
]);
2743 set_slb_pme_dim_f(dd
,ddpme
->dim
,&ddpme
->slb_dim_f
);
2746 int dd_pme_maxshift_x(gmx_domdec_t
*dd
)
2748 if (dd
->comm
->ddpme
[0].dim
== XX
)
2750 return dd
->comm
->ddpme
[0].maxshift
;
2758 int dd_pme_maxshift_y(gmx_domdec_t
*dd
)
2760 if (dd
->comm
->ddpme
[0].dim
== YY
)
2762 return dd
->comm
->ddpme
[0].maxshift
;
2764 else if (dd
->comm
->npmedecompdim
>= 2 && dd
->comm
->ddpme
[1].dim
== YY
)
2766 return dd
->comm
->ddpme
[1].maxshift
;
2774 static void set_pme_maxshift(gmx_domdec_t
*dd
,gmx_ddpme_t
*ddpme
,
2775 gmx_bool bUniform
,gmx_ddbox_t
*ddbox
,real
*cell_f
)
2777 gmx_domdec_comm_t
*comm
;
2780 real range
,pme_boundary
;
2784 nc
= dd
->nc
[ddpme
->dim
];
2787 if (!ddpme
->dim_match
)
2789 /* PP decomposition is not along dim: the worst situation */
2792 else if (ns
<= 3 || (bUniform
&& ns
== nc
))
2794 /* The optimal situation */
2799 /* We need to check for all pme nodes which nodes they
2800 * could possibly need to communicate with.
2802 xmin
= ddpme
->pp_min
;
2803 xmax
= ddpme
->pp_max
;
2804 /* Allow for atoms to be maximally 2/3 times the cut-off
2805 * out of their DD cell. This is a reasonable balance between
2806 * between performance and support for most charge-group/cut-off
2809 range
= 2.0/3.0*comm
->cutoff
/ddbox
->box_size
[ddpme
->dim
];
2810 /* Avoid extra communication when we are exactly at a boundary */
2816 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
2817 pme_boundary
= (real
)s
/ns
;
2820 cell_f
[xmax
[s
-(sh
+1) ]+1] + range
> pme_boundary
) ||
2822 cell_f
[xmax
[s
-(sh
+1)+ns
]+1] - 1 + range
> pme_boundary
)))
2826 pme_boundary
= (real
)(s
+1)/ns
;
2829 cell_f
[xmin
[s
+(sh
+1) ] ] - range
< pme_boundary
) ||
2831 cell_f
[xmin
[s
+(sh
+1)-ns
] ] + 1 - range
< pme_boundary
)))
2838 ddpme
->maxshift
= sh
;
2842 fprintf(debug
,"PME slab communication range for dim %d is %d\n",
2843 ddpme
->dim
,ddpme
->maxshift
);
2847 static void check_box_size(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
2851 for(d
=0; d
<dd
->ndim
; d
++)
2854 if (dim
< ddbox
->nboundeddim
&&
2855 ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
] <
2856 dd
->nc
[dim
]*dd
->comm
->cellsize_limit
*DD_CELL_MARGIN
)
2858 gmx_fatal(FARGS
,"The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
2859 dim2char(dim
),ddbox
->box_size
[dim
],ddbox
->skew_fac
[dim
],
2860 dd
->nc
[dim
],dd
->comm
->cellsize_limit
);
2865 static void set_dd_cell_sizes_slb(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
,
2866 gmx_bool bMaster
,ivec npulse
)
2868 gmx_domdec_comm_t
*comm
;
2871 real
*cell_x
,cell_dx
,cellsize
;
2875 for(d
=0; d
<DIM
; d
++)
2877 cellsize_min
[d
] = ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
];
2879 if (dd
->nc
[d
] == 1 || comm
->slb_frac
[d
] == NULL
)
2882 cell_dx
= ddbox
->box_size
[d
]/dd
->nc
[d
];
2885 for(j
=0; j
<dd
->nc
[d
]+1; j
++)
2887 dd
->ma
->cell_x
[d
][j
] = ddbox
->box0
[d
] + j
*cell_dx
;
2892 comm
->cell_x0
[d
] = ddbox
->box0
[d
] + (dd
->ci
[d
] )*cell_dx
;
2893 comm
->cell_x1
[d
] = ddbox
->box0
[d
] + (dd
->ci
[d
]+1)*cell_dx
;
2895 cellsize
= cell_dx
*ddbox
->skew_fac
[d
];
2896 while (cellsize
*npulse
[d
] < comm
->cutoff
&& npulse
[d
] < dd
->nc
[d
]-1)
2900 cellsize_min
[d
] = cellsize
;
2904 /* Statically load balanced grid */
2905 /* Also when we are not doing a master distribution we determine
2906 * all cell borders in a loop to obtain identical values
2907 * to the master distribution case and to determine npulse.
2911 cell_x
= dd
->ma
->cell_x
[d
];
2915 snew(cell_x
,dd
->nc
[d
]+1);
2917 cell_x
[0] = ddbox
->box0
[d
];
2918 for(j
=0; j
<dd
->nc
[d
]; j
++)
2920 cell_dx
= ddbox
->box_size
[d
]*comm
->slb_frac
[d
][j
];
2921 cell_x
[j
+1] = cell_x
[j
] + cell_dx
;
2922 cellsize
= cell_dx
*ddbox
->skew_fac
[d
];
2923 while (cellsize
*npulse
[d
] < comm
->cutoff
&&
2924 npulse
[d
] < dd
->nc
[d
]-1)
2928 cellsize_min
[d
] = min(cellsize_min
[d
],cellsize
);
2932 comm
->cell_x0
[d
] = cell_x
[dd
->ci
[d
]];
2933 comm
->cell_x1
[d
] = cell_x
[dd
->ci
[d
]+1];
2937 /* The following limitation is to avoid that a cell would receive
2938 * some of its own home charge groups back over the periodic boundary.
2939 * Double charge groups cause trouble with the global indices.
2941 if (d
< ddbox
->npbcdim
&&
2942 dd
->nc
[d
] > 1 && npulse
[d
] >= dd
->nc
[d
])
2944 gmx_fatal_collective(FARGS
,NULL
,dd
,
2945 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
2946 dim2char(d
),ddbox
->box_size
[d
],ddbox
->skew_fac
[d
],
2948 dd
->nc
[d
],dd
->nc
[d
],
2949 dd
->nnodes
> dd
->nc
[d
] ? "cells" : "processors");
2953 if (!comm
->bDynLoadBal
)
2955 copy_rvec(cellsize_min
,comm
->cellsize_min
);
2958 for(d
=0; d
<comm
->npmedecompdim
; d
++)
2960 set_pme_maxshift(dd
,&comm
->ddpme
[d
],
2961 comm
->slb_frac
[dd
->dim
[d
]]==NULL
,ddbox
,
2962 comm
->ddpme
[d
].slb_dim_f
);
2967 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t
*dd
,
2968 int d
,int dim
,gmx_domdec_root_t
*root
,
2970 gmx_bool bUniform
,gmx_large_int_t step
, real cellsize_limit_f
, int range
[])
2972 gmx_domdec_comm_t
*comm
;
2973 int ncd
,i
,j
,nmin
,nmin_old
;
2974 gmx_bool bLimLo
,bLimHi
;
2976 real fac
,halfway
,cellsize_limit_f_i
,region_size
;
2977 gmx_bool bPBC
,bLastHi
=FALSE
;
2978 int nrange
[]={range
[0],range
[1]};
2980 region_size
= root
->cell_f
[range
[1]]-root
->cell_f
[range
[0]];
2986 bPBC
= (dim
< ddbox
->npbcdim
);
2988 cell_size
= root
->buf_ncd
;
2992 fprintf(debug
,"enforce_limits: %d %d\n",range
[0],range
[1]);
2995 /* First we need to check if the scaling does not make cells
2996 * smaller than the smallest allowed size.
2997 * We need to do this iteratively, since if a cell is too small,
2998 * it needs to be enlarged, which makes all the other cells smaller,
2999 * which could in turn make another cell smaller than allowed.
3001 for(i
=range
[0]; i
<range
[1]; i
++)
3003 root
->bCellMin
[i
] = FALSE
;
3009 /* We need the total for normalization */
3011 for(i
=range
[0]; i
<range
[1]; i
++)
3013 if (root
->bCellMin
[i
] == FALSE
)
3015 fac
+= cell_size
[i
];
3018 fac
= ( region_size
- nmin
*cellsize_limit_f
)/fac
; /* substracting cells already set to cellsize_limit_f */
3019 /* Determine the cell boundaries */
3020 for(i
=range
[0]; i
<range
[1]; i
++)
3022 if (root
->bCellMin
[i
] == FALSE
)
3024 cell_size
[i
] *= fac
;
3025 if (!bPBC
&& (i
== 0 || i
== dd
->nc
[dim
] -1))
3027 cellsize_limit_f_i
= 0;
3031 cellsize_limit_f_i
= cellsize_limit_f
;
3033 if (cell_size
[i
] < cellsize_limit_f_i
)
3035 root
->bCellMin
[i
] = TRUE
;
3036 cell_size
[i
] = cellsize_limit_f_i
;
3040 root
->cell_f
[i
+1] = root
->cell_f
[i
] + cell_size
[i
];
3043 while (nmin
> nmin_old
);
3046 cell_size
[i
] = root
->cell_f
[i
+1] - root
->cell_f
[i
];
3047 /* For this check we should not use DD_CELL_MARGIN,
3048 * but a slightly smaller factor,
3049 * since rounding could get use below the limit.
3051 if (bPBC
&& cell_size
[i
] < cellsize_limit_f
*DD_CELL_MARGIN2
/DD_CELL_MARGIN
)
3054 gmx_fatal(FARGS
,"Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3055 gmx_step_str(step
,buf
),
3056 dim2char(dim
),ddbox
->box_size
[dim
],ddbox
->skew_fac
[dim
],
3057 ncd
,comm
->cellsize_min
[dim
]);
3060 root
->bLimited
= (nmin
> 0) || (range
[0]>0) || (range
[1]<ncd
);
3064 /* Check if the boundary did not displace more than halfway
3065 * each of the cells it bounds, as this could cause problems,
3066 * especially when the differences between cell sizes are large.
3067 * If changes are applied, they will not make cells smaller
3068 * than the cut-off, as we check all the boundaries which
3069 * might be affected by a change and if the old state was ok,
3070 * the cells will at most be shrunk back to their old size.
3072 for(i
=range
[0]+1; i
<range
[1]; i
++)
3074 halfway
= 0.5*(root
->old_cell_f
[i
] + root
->old_cell_f
[i
-1]);
3075 if (root
->cell_f
[i
] < halfway
)
3077 root
->cell_f
[i
] = halfway
;
3078 /* Check if the change also causes shifts of the next boundaries */
3079 for(j
=i
+1; j
<range
[1]; j
++)
3081 if (root
->cell_f
[j
] < root
->cell_f
[j
-1] + cellsize_limit_f
)
3082 root
->cell_f
[j
] = root
->cell_f
[j
-1] + cellsize_limit_f
;
3085 halfway
= 0.5*(root
->old_cell_f
[i
] + root
->old_cell_f
[i
+1]);
3086 if (root
->cell_f
[i
] > halfway
)
3088 root
->cell_f
[i
] = halfway
;
3089 /* Check if the change also causes shifts of the next boundaries */
3090 for(j
=i
-1; j
>=range
[0]+1; j
--)
3092 if (root
->cell_f
[j
] > root
->cell_f
[j
+1] - cellsize_limit_f
)
3093 root
->cell_f
[j
] = root
->cell_f
[j
+1] - cellsize_limit_f
;
3099 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3100 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3101 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3102 * for a and b nrange is used */
3105 /* Take care of the staggering of the cell boundaries */
3108 for(i
=range
[0]; i
<range
[1]; i
++)
3110 root
->cell_f_max0
[i
] = root
->cell_f
[i
];
3111 root
->cell_f_min1
[i
] = root
->cell_f
[i
+1];
3116 for(i
=range
[0]+1; i
<range
[1]; i
++)
3118 bLimLo
= (root
->cell_f
[i
] < root
->bound_min
[i
]);
3119 bLimHi
= (root
->cell_f
[i
] > root
->bound_max
[i
]);
3120 if (bLimLo
&& bLimHi
)
3122 /* Both limits violated, try the best we can */
3123 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3124 root
->cell_f
[i
] = 0.5*(root
->bound_min
[i
] + root
->bound_max
[i
]);
3127 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3131 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3137 /* root->cell_f[i] = root->bound_min[i]; */
3138 nrange
[1]=i
; /* only store violation location. There could be a LimLo violation following with an higher index */
3141 else if (bLimHi
&& !bLastHi
)
3144 if (nrange
[1] < range
[1]) /* found a LimLo before */
3146 root
->cell_f
[nrange
[1]] = root
->bound_min
[nrange
[1]];
3147 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3148 nrange
[0]=nrange
[1];
3150 root
->cell_f
[i
] = root
->bound_max
[i
];
3152 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3157 if (nrange
[1] < range
[1]) /* found last a LimLo */
3159 root
->cell_f
[nrange
[1]] = root
->bound_min
[nrange
[1]];
3160 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3161 nrange
[0]=nrange
[1];
3163 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3165 else if (nrange
[0] > range
[0]) /* found at least one LimHi */
3167 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3174 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t
*dd
,
3175 int d
,int dim
,gmx_domdec_root_t
*root
,
3176 gmx_ddbox_t
*ddbox
,gmx_bool bDynamicBox
,
3177 gmx_bool bUniform
,gmx_large_int_t step
)
3179 gmx_domdec_comm_t
*comm
;
3182 real load_aver
,load_i
,imbalance
,change
,change_max
,sc
;
3183 real cellsize_limit_f
,dist_min_f
,dist_min_f_hard
,space
;
3184 real change_limit
= 0.1;
3187 int range
[] = { 0, 0 };
3193 bPBC
= (dim
< ddbox
->npbcdim
);
3195 cell_size
= root
->buf_ncd
;
3197 /* Store the original boundaries */
3198 for(i
=0; i
<ncd
+1; i
++)
3200 root
->old_cell_f
[i
] = root
->cell_f
[i
];
3203 for(i
=0; i
<ncd
; i
++)
3205 cell_size
[i
] = 1.0/ncd
;
3208 else if (dd_load_count(comm
))
3210 load_aver
= comm
->load
[d
].sum_m
/ncd
;
3212 for(i
=0; i
<ncd
; i
++)
3214 /* Determine the relative imbalance of cell i */
3215 load_i
= comm
->load
[d
].load
[i
*comm
->load
[d
].nload
+2];
3216 imbalance
= (load_i
- load_aver
)/(load_aver
>0 ? load_aver
: 1);
3217 /* Determine the change of the cell size using underrelaxation */
3218 change
= -relax
*imbalance
;
3219 change_max
= max(change_max
,max(change
,-change
));
3221 /* Limit the amount of scaling.
3222 * We need to use the same rescaling for all cells in one row,
3223 * otherwise the load balancing might not converge.
3226 if (change_max
> change_limit
)
3228 sc
*= change_limit
/change_max
;
3230 for(i
=0; i
<ncd
; i
++)
3232 /* Determine the relative imbalance of cell i */
3233 load_i
= comm
->load
[d
].load
[i
*comm
->load
[d
].nload
+2];
3234 imbalance
= (load_i
- load_aver
)/(load_aver
>0 ? load_aver
: 1);
3235 /* Determine the change of the cell size using underrelaxation */
3236 change
= -sc
*imbalance
;
3237 cell_size
[i
] = (root
->cell_f
[i
+1]-root
->cell_f
[i
])*(1 + change
);
3241 cellsize_limit_f
= comm
->cellsize_min
[dim
]/ddbox
->box_size
[dim
];
3242 cellsize_limit_f
*= DD_CELL_MARGIN
;
3243 dist_min_f_hard
= grid_jump_limit(comm
,d
)/ddbox
->box_size
[dim
];
3244 dist_min_f
= dist_min_f_hard
* DD_CELL_MARGIN
;
3245 if (ddbox
->tric_dir
[dim
])
3247 cellsize_limit_f
/= ddbox
->skew_fac
[dim
];
3248 dist_min_f
/= ddbox
->skew_fac
[dim
];
3250 if (bDynamicBox
&& d
> 0)
3252 dist_min_f
*= DD_PRES_SCALE_MARGIN
;
3254 if (d
> 0 && !bUniform
)
3256 /* Make sure that the grid is not shifted too much */
3257 for(i
=1; i
<ncd
; i
++) {
3258 if (root
->cell_f_min1
[i
] - root
->cell_f_max0
[i
-1] < 2 * dist_min_f_hard
)
3260 gmx_incons("Inconsistent DD boundary staggering limits!");
3262 root
->bound_min
[i
] = root
->cell_f_max0
[i
-1] + dist_min_f
;
3263 space
= root
->cell_f
[i
] - (root
->cell_f_max0
[i
-1] + dist_min_f
);
3265 root
->bound_min
[i
] += 0.5*space
;
3267 root
->bound_max
[i
] = root
->cell_f_min1
[i
] - dist_min_f
;
3268 space
= root
->cell_f
[i
] - (root
->cell_f_min1
[i
] - dist_min_f
);
3270 root
->bound_max
[i
] += 0.5*space
;
3275 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3277 root
->cell_f_max0
[i
-1] + dist_min_f
,
3278 root
->bound_min
[i
],root
->cell_f
[i
],root
->bound_max
[i
],
3279 root
->cell_f_min1
[i
] - dist_min_f
);
3284 root
->cell_f
[0] = 0;
3285 root
->cell_f
[ncd
] = 1;
3286 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, range
);
3289 /* After the checks above, the cells should obey the cut-off
3290 * restrictions, but it does not hurt to check.
3292 for(i
=0; i
<ncd
; i
++)
3296 fprintf(debug
,"Relative bounds dim %d cell %d: %f %f\n",
3297 dim
,i
,root
->cell_f
[i
],root
->cell_f
[i
+1]);
3300 if ((bPBC
|| (i
!= 0 && i
!= dd
->nc
[dim
]-1)) &&
3301 root
->cell_f
[i
+1] - root
->cell_f
[i
] <
3302 cellsize_limit_f
/DD_CELL_MARGIN
)
3306 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3307 gmx_step_str(step
,buf
),dim2char(dim
),i
,
3308 (root
->cell_f
[i
+1] - root
->cell_f
[i
])
3309 *ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
]);
3314 /* Store the cell boundaries of the lower dimensions at the end */
3315 for(d1
=0; d1
<d
; d1
++)
3317 root
->cell_f
[pos
++] = comm
->cell_f0
[d1
];
3318 root
->cell_f
[pos
++] = comm
->cell_f1
[d1
];
3321 if (d
< comm
->npmedecompdim
)
3323 /* The master determines the maximum shift for
3324 * the coordinate communication between separate PME nodes.
3326 set_pme_maxshift(dd
,&comm
->ddpme
[d
],bUniform
,ddbox
,root
->cell_f
);
3328 root
->cell_f
[pos
++] = comm
->ddpme
[0].maxshift
;
3331 root
->cell_f
[pos
++] = comm
->ddpme
[1].maxshift
;
3335 static void relative_to_absolute_cell_bounds(gmx_domdec_t
*dd
,
3336 gmx_ddbox_t
*ddbox
,int dimind
)
3338 gmx_domdec_comm_t
*comm
;
3343 /* Set the cell dimensions */
3344 dim
= dd
->dim
[dimind
];
3345 comm
->cell_x0
[dim
] = comm
->cell_f0
[dimind
]*ddbox
->box_size
[dim
];
3346 comm
->cell_x1
[dim
] = comm
->cell_f1
[dimind
]*ddbox
->box_size
[dim
];
3347 if (dim
>= ddbox
->nboundeddim
)
3349 comm
->cell_x0
[dim
] += ddbox
->box0
[dim
];
3350 comm
->cell_x1
[dim
] += ddbox
->box0
[dim
];
3354 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t
*dd
,
3355 int d
,int dim
,real
*cell_f_row
,
3358 gmx_domdec_comm_t
*comm
;
3364 /* Each node would only need to know two fractions,
3365 * but it is probably cheaper to broadcast the whole array.
3367 MPI_Bcast(cell_f_row
,DD_CELL_F_SIZE(dd
,d
)*sizeof(real
),MPI_BYTE
,
3368 0,comm
->mpi_comm_load
[d
]);
3370 /* Copy the fractions for this dimension from the buffer */
3371 comm
->cell_f0
[d
] = cell_f_row
[dd
->ci
[dim
] ];
3372 comm
->cell_f1
[d
] = cell_f_row
[dd
->ci
[dim
]+1];
3373 /* The whole array was communicated, so set the buffer position */
3374 pos
= dd
->nc
[dim
] + 1;
3375 for(d1
=0; d1
<=d
; d1
++)
3379 /* Copy the cell fractions of the lower dimensions */
3380 comm
->cell_f0
[d1
] = cell_f_row
[pos
++];
3381 comm
->cell_f1
[d1
] = cell_f_row
[pos
++];
3383 relative_to_absolute_cell_bounds(dd
,ddbox
,d1
);
3385 /* Convert the communicated shift from float to int */
3386 comm
->ddpme
[0].maxshift
= (int)(cell_f_row
[pos
++] + 0.5);
3389 comm
->ddpme
[1].maxshift
= (int)(cell_f_row
[pos
++] + 0.5);
3393 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t
*dd
,
3394 gmx_ddbox_t
*ddbox
,gmx_bool bDynamicBox
,
3395 gmx_bool bUniform
,gmx_large_int_t step
)
3397 gmx_domdec_comm_t
*comm
;
3399 gmx_bool bRowMember
,bRowRoot
;
3404 for(d
=0; d
<dd
->ndim
; d
++)
3409 for(d1
=d
; d1
<dd
->ndim
; d1
++)
3411 if (dd
->ci
[dd
->dim
[d1
]] > 0)
3424 set_dd_cell_sizes_dlb_root(dd
,d
,dim
,comm
->root
[d
],
3425 ddbox
,bDynamicBox
,bUniform
,step
);
3426 cell_f_row
= comm
->root
[d
]->cell_f
;
3430 cell_f_row
= comm
->cell_f_row
;
3432 distribute_dd_cell_sizes_dlb(dd
,d
,dim
,cell_f_row
,ddbox
);
3437 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
3441 /* This function assumes the box is static and should therefore
3442 * not be called when the box has changed since the last
3443 * call to dd_partition_system.
3445 for(d
=0; d
<dd
->ndim
; d
++)
3447 relative_to_absolute_cell_bounds(dd
,ddbox
,d
);
3453 static void set_dd_cell_sizes_dlb(gmx_domdec_t
*dd
,
3454 gmx_ddbox_t
*ddbox
,gmx_bool bDynamicBox
,
3455 gmx_bool bUniform
,gmx_bool bDoDLB
,gmx_large_int_t step
,
3456 gmx_wallcycle_t wcycle
)
3458 gmx_domdec_comm_t
*comm
;
3465 wallcycle_start(wcycle
,ewcDDCOMMBOUND
);
3466 set_dd_cell_sizes_dlb_change(dd
,ddbox
,bDynamicBox
,bUniform
,step
);
3467 wallcycle_stop(wcycle
,ewcDDCOMMBOUND
);
3469 else if (bDynamicBox
)
3471 set_dd_cell_sizes_dlb_nochange(dd
,ddbox
);
3474 /* Set the dimensions for which no DD is used */
3475 for(dim
=0; dim
<DIM
; dim
++) {
3476 if (dd
->nc
[dim
] == 1) {
3477 comm
->cell_x0
[dim
] = 0;
3478 comm
->cell_x1
[dim
] = ddbox
->box_size
[dim
];
3479 if (dim
>= ddbox
->nboundeddim
)
3481 comm
->cell_x0
[dim
] += ddbox
->box0
[dim
];
3482 comm
->cell_x1
[dim
] += ddbox
->box0
[dim
];
3488 static void realloc_comm_ind(gmx_domdec_t
*dd
,ivec npulse
)
3491 gmx_domdec_comm_dim_t
*cd
;
3493 for(d
=0; d
<dd
->ndim
; d
++)
3495 cd
= &dd
->comm
->cd
[d
];
3496 np
= npulse
[dd
->dim
[d
]];
3497 if (np
> cd
->np_nalloc
)
3501 fprintf(debug
,"(Re)allocing cd for %c to %d pulses\n",
3502 dim2char(dd
->dim
[d
]),np
);
3504 if (DDMASTER(dd
) && cd
->np_nalloc
> 0)
3506 fprintf(stderr
,"\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n",dim2char(dd
->dim
[d
]),np
);
3509 for(i
=cd
->np_nalloc
; i
<np
; i
++)
3511 cd
->ind
[i
].index
= NULL
;
3512 cd
->ind
[i
].nalloc
= 0;
3521 static void set_dd_cell_sizes(gmx_domdec_t
*dd
,
3522 gmx_ddbox_t
*ddbox
,gmx_bool bDynamicBox
,
3523 gmx_bool bUniform
,gmx_bool bDoDLB
,gmx_large_int_t step
,
3524 gmx_wallcycle_t wcycle
)
3526 gmx_domdec_comm_t
*comm
;
3532 /* Copy the old cell boundaries for the cg displacement check */
3533 copy_rvec(comm
->cell_x0
,comm
->old_cell_x0
);
3534 copy_rvec(comm
->cell_x1
,comm
->old_cell_x1
);
3536 if (comm
->bDynLoadBal
)
3540 check_box_size(dd
,ddbox
);
3542 set_dd_cell_sizes_dlb(dd
,ddbox
,bDynamicBox
,bUniform
,bDoDLB
,step
,wcycle
);
3546 set_dd_cell_sizes_slb(dd
,ddbox
,FALSE
,npulse
);
3547 realloc_comm_ind(dd
,npulse
);
3552 for(d
=0; d
<DIM
; d
++)
3554 fprintf(debug
,"cell_x[%d] %f - %f skew_fac %f\n",
3555 d
,comm
->cell_x0
[d
],comm
->cell_x1
[d
],ddbox
->skew_fac
[d
]);
3560 static void comm_dd_ns_cell_sizes(gmx_domdec_t
*dd
,
3562 rvec cell_ns_x0
,rvec cell_ns_x1
,
3563 gmx_large_int_t step
)
3565 gmx_domdec_comm_t
*comm
;
3570 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
3572 dim
= dd
->dim
[dim_ind
];
3574 /* Without PBC we don't have restrictions on the outer cells */
3575 if (!(dim
>= ddbox
->npbcdim
&&
3576 (dd
->ci
[dim
] == 0 || dd
->ci
[dim
] == dd
->nc
[dim
] - 1)) &&
3577 comm
->bDynLoadBal
&&
3578 (comm
->cell_x1
[dim
] - comm
->cell_x0
[dim
])*ddbox
->skew_fac
[dim
] <
3579 comm
->cellsize_min
[dim
])
3582 gmx_fatal(FARGS
,"Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3583 gmx_step_str(step
,buf
),dim2char(dim
),
3584 comm
->cell_x1
[dim
] - comm
->cell_x0
[dim
],
3585 ddbox
->skew_fac
[dim
],
3586 dd
->comm
->cellsize_min
[dim
],
3587 dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
3591 if ((dd
->bGridJump
&& dd
->ndim
> 1) || ddbox
->nboundeddim
< DIM
)
3593 /* Communicate the boundaries and update cell_ns_x0/1 */
3594 dd_move_cellx(dd
,ddbox
,cell_ns_x0
,cell_ns_x1
);
3595 if (dd
->bGridJump
&& dd
->ndim
> 1)
3597 check_grid_jump(step
,dd
,ddbox
);
3602 static void make_tric_corr_matrix(int npbcdim
,matrix box
,matrix tcm
)
3606 tcm
[YY
][XX
] = -box
[YY
][XX
]/box
[YY
][YY
];
3614 tcm
[ZZ
][XX
] = -(box
[ZZ
][YY
]*tcm
[YY
][XX
] + box
[ZZ
][XX
])/box
[ZZ
][ZZ
];
3615 tcm
[ZZ
][YY
] = -box
[ZZ
][YY
]/box
[ZZ
][ZZ
];
3624 static void check_screw_box(matrix box
)
3626 /* Mathematical limitation */
3627 if (box
[YY
][XX
] != 0 || box
[ZZ
][XX
] != 0)
3629 gmx_fatal(FARGS
,"With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3632 /* Limitation due to the asymmetry of the eighth shell method */
3633 if (box
[ZZ
][YY
] != 0)
3635 gmx_fatal(FARGS
,"pbc=screw with non-zero box_zy is not supported");
3639 static void distribute_cg(FILE *fplog
,gmx_large_int_t step
,
3640 matrix box
,ivec tric_dir
,t_block
*cgs
,rvec pos
[],
3643 gmx_domdec_master_t
*ma
;
3644 int **tmp_ind
=NULL
,*tmp_nalloc
=NULL
;
3645 int i
,icg
,j
,k
,k0
,k1
,d
,npbcdim
;
3647 rvec box_size
,cg_cm
;
3649 real nrcg
,inv_ncg
,pos_d
;
3651 gmx_bool bUnbounded
,bScrew
;
3655 if (tmp_ind
== NULL
)
3657 snew(tmp_nalloc
,dd
->nnodes
);
3658 snew(tmp_ind
,dd
->nnodes
);
3659 for(i
=0; i
<dd
->nnodes
; i
++)
3661 tmp_nalloc
[i
] = over_alloc_large(cgs
->nr
/dd
->nnodes
+1);
3662 snew(tmp_ind
[i
],tmp_nalloc
[i
]);
3666 /* Clear the count */
3667 for(i
=0; i
<dd
->nnodes
; i
++)
3673 make_tric_corr_matrix(dd
->npbcdim
,box
,tcm
);
3675 cgindex
= cgs
->index
;
3677 /* Compute the center of geometry for all charge groups */
3678 for(icg
=0; icg
<cgs
->nr
; icg
++)
3681 k1
= cgindex
[icg
+1];
3685 copy_rvec(pos
[k0
],cg_cm
);
3692 for(k
=k0
; (k
<k1
); k
++)
3694 rvec_inc(cg_cm
,pos
[k
]);
3696 for(d
=0; (d
<DIM
); d
++)
3698 cg_cm
[d
] *= inv_ncg
;
3701 /* Put the charge group in the box and determine the cell index */
3702 for(d
=DIM
-1; d
>=0; d
--) {
3704 if (d
< dd
->npbcdim
)
3706 bScrew
= (dd
->bScrewPBC
&& d
== XX
);
3707 if (tric_dir
[d
] && dd
->nc
[d
] > 1)
3709 /* Use triclinic coordintates for this dimension */
3710 for(j
=d
+1; j
<DIM
; j
++)
3712 pos_d
+= cg_cm
[j
]*tcm
[j
][d
];
3715 while(pos_d
>= box
[d
][d
])
3718 rvec_dec(cg_cm
,box
[d
]);
3721 cg_cm
[YY
] = box
[YY
][YY
] - cg_cm
[YY
];
3722 cg_cm
[ZZ
] = box
[ZZ
][ZZ
] - cg_cm
[ZZ
];
3724 for(k
=k0
; (k
<k1
); k
++)
3726 rvec_dec(pos
[k
],box
[d
]);
3729 pos
[k
][YY
] = box
[YY
][YY
] - pos
[k
][YY
];
3730 pos
[k
][ZZ
] = box
[ZZ
][ZZ
] - pos
[k
][ZZ
];
3737 rvec_inc(cg_cm
,box
[d
]);
3740 cg_cm
[YY
] = box
[YY
][YY
] - cg_cm
[YY
];
3741 cg_cm
[ZZ
] = box
[ZZ
][ZZ
] - cg_cm
[ZZ
];
3743 for(k
=k0
; (k
<k1
); k
++)
3745 rvec_inc(pos
[k
],box
[d
]);
3747 pos
[k
][YY
] = box
[YY
][YY
] - pos
[k
][YY
];
3748 pos
[k
][ZZ
] = box
[ZZ
][ZZ
] - pos
[k
][ZZ
];
3753 /* This could be done more efficiently */
3755 while(ind
[d
]+1 < dd
->nc
[d
] && pos_d
>= ma
->cell_x
[d
][ind
[d
]+1])
3760 i
= dd_index(dd
->nc
,ind
);
3761 if (ma
->ncg
[i
] == tmp_nalloc
[i
])
3763 tmp_nalloc
[i
] = over_alloc_large(ma
->ncg
[i
]+1);
3764 srenew(tmp_ind
[i
],tmp_nalloc
[i
]);
3766 tmp_ind
[i
][ma
->ncg
[i
]] = icg
;
3768 ma
->nat
[i
] += cgindex
[icg
+1] - cgindex
[icg
];
3772 for(i
=0; i
<dd
->nnodes
; i
++)
3775 for(k
=0; k
<ma
->ncg
[i
]; k
++)
3777 ma
->cg
[k1
++] = tmp_ind
[i
][k
];
3780 ma
->index
[dd
->nnodes
] = k1
;
3782 for(i
=0; i
<dd
->nnodes
; i
++)
3792 fprintf(fplog
,"Charge group distribution at step %s:",
3793 gmx_step_str(step
,buf
));
3794 for(i
=0; i
<dd
->nnodes
; i
++)
3796 fprintf(fplog
," %d",ma
->ncg
[i
]);
3798 fprintf(fplog
,"\n");
3802 static void get_cg_distribution(FILE *fplog
,gmx_large_int_t step
,gmx_domdec_t
*dd
,
3803 t_block
*cgs
,matrix box
,gmx_ddbox_t
*ddbox
,
3806 gmx_domdec_master_t
*ma
=NULL
;
3809 int *ibuf
,buf2
[2] = { 0, 0 };
3817 check_screw_box(box
);
3820 set_dd_cell_sizes_slb(dd
,ddbox
,TRUE
,npulse
);
3822 distribute_cg(fplog
,step
,box
,ddbox
->tric_dir
,cgs
,pos
,dd
);
3823 for(i
=0; i
<dd
->nnodes
; i
++)
3825 ma
->ibuf
[2*i
] = ma
->ncg
[i
];
3826 ma
->ibuf
[2*i
+1] = ma
->nat
[i
];
3834 dd_scatter(dd
,2*sizeof(int),ibuf
,buf2
);
3836 dd
->ncg_home
= buf2
[0];
3837 dd
->nat_home
= buf2
[1];
3838 dd
->ncg_tot
= dd
->ncg_home
;
3839 dd
->nat_tot
= dd
->nat_home
;
3840 if (dd
->ncg_home
> dd
->cg_nalloc
|| dd
->cg_nalloc
== 0)
3842 dd
->cg_nalloc
= over_alloc_dd(dd
->ncg_home
);
3843 srenew(dd
->index_gl
,dd
->cg_nalloc
);
3844 srenew(dd
->cgindex
,dd
->cg_nalloc
+1);
3848 for(i
=0; i
<dd
->nnodes
; i
++)
3850 ma
->ibuf
[i
] = ma
->ncg
[i
]*sizeof(int);
3851 ma
->ibuf
[dd
->nnodes
+i
] = ma
->index
[i
]*sizeof(int);
3856 DDMASTER(dd
) ? ma
->ibuf
: NULL
,
3857 DDMASTER(dd
) ? ma
->ibuf
+dd
->nnodes
: NULL
,
3858 DDMASTER(dd
) ? ma
->cg
: NULL
,
3859 dd
->ncg_home
*sizeof(int),dd
->index_gl
);
3861 /* Determine the home charge group sizes */
3863 for(i
=0; i
<dd
->ncg_home
; i
++)
3865 cg_gl
= dd
->index_gl
[i
];
3867 dd
->cgindex
[i
] + cgs
->index
[cg_gl
+1] - cgs
->index
[cg_gl
];
3872 fprintf(debug
,"Home charge groups:\n");
3873 for(i
=0; i
<dd
->ncg_home
; i
++)
3875 fprintf(debug
," %d",dd
->index_gl
[i
]);
3877 fprintf(debug
,"\n");
3879 fprintf(debug
,"\n");
3883 static int compact_and_copy_vec_at(int ncg
,int *move
,
3886 rvec
*src
,gmx_domdec_comm_t
*comm
,
3889 int m
,icg
,i
,i0
,i1
,nrcg
;
3895 for(m
=0; m
<DIM
*2; m
++)
3901 for(icg
=0; icg
<ncg
; icg
++)
3903 i1
= cgindex
[icg
+1];
3909 /* Compact the home array in place */
3910 for(i
=i0
; i
<i1
; i
++)
3912 copy_rvec(src
[i
],src
[home_pos
++]);
3918 /* Copy to the communication buffer */
3920 pos_vec
[m
] += 1 + vec
*nrcg
;
3921 for(i
=i0
; i
<i1
; i
++)
3923 copy_rvec(src
[i
],comm
->cgcm_state
[m
][pos_vec
[m
]++]);
3925 pos_vec
[m
] += (nvec
- vec
- 1)*nrcg
;
3929 home_pos
+= i1
- i0
;
3937 static int compact_and_copy_vec_cg(int ncg
,int *move
,
3939 int nvec
,rvec
*src
,gmx_domdec_comm_t
*comm
,
3942 int m
,icg
,i0
,i1
,nrcg
;
3948 for(m
=0; m
<DIM
*2; m
++)
3954 for(icg
=0; icg
<ncg
; icg
++)
3956 i1
= cgindex
[icg
+1];
3962 /* Compact the home array in place */
3963 copy_rvec(src
[icg
],src
[home_pos
++]);
3969 /* Copy to the communication buffer */
3970 copy_rvec(src
[icg
],comm
->cgcm_state
[m
][pos_vec
[m
]]);
3971 pos_vec
[m
] += 1 + nrcg
*nvec
;
3983 static int compact_ind(int ncg
,int *move
,
3984 int *index_gl
,int *cgindex
,
3986 gmx_ga2la_t ga2la
,char *bLocalCG
,
3989 int cg
,nat
,a0
,a1
,a
,a_gl
;
3994 for(cg
=0; cg
<ncg
; cg
++)
4000 /* Compact the home arrays in place.
4001 * Anything that can be done here avoids access to global arrays.
4003 cgindex
[home_pos
] = nat
;
4004 for(a
=a0
; a
<a1
; a
++)
4007 gatindex
[nat
] = a_gl
;
4008 /* The cell number stays 0, so we don't need to set it */
4009 ga2la_change_la(ga2la
,a_gl
,nat
);
4012 index_gl
[home_pos
] = index_gl
[cg
];
4013 cginfo
[home_pos
] = cginfo
[cg
];
4014 /* The charge group remains local, so bLocalCG does not change */
4019 /* Clear the global indices */
4020 for(a
=a0
; a
<a1
; a
++)
4022 ga2la_del(ga2la
,gatindex
[a
]);
4026 bLocalCG
[index_gl
[cg
]] = FALSE
;
4030 cgindex
[home_pos
] = nat
;
4035 static void clear_and_mark_ind(int ncg
,int *move
,
4036 int *index_gl
,int *cgindex
,int *gatindex
,
4037 gmx_ga2la_t ga2la
,char *bLocalCG
,
4042 for(cg
=0; cg
<ncg
; cg
++)
4048 /* Clear the global indices */
4049 for(a
=a0
; a
<a1
; a
++)
4051 ga2la_del(ga2la
,gatindex
[a
]);
4055 bLocalCG
[index_gl
[cg
]] = FALSE
;
4057 /* Signal that this cg has moved using the ns cell index.
4058 * Here we set it to -1.
4059 * fill_grid will change it from -1 to 4*grid->ncells.
4061 cell_index
[cg
] = -1;
4066 static void print_cg_move(FILE *fplog
,
4068 gmx_large_int_t step
,int cg
,int dim
,int dir
,
4069 gmx_bool bHaveLimitdAndCMOld
,real limitd
,
4070 rvec cm_old
,rvec cm_new
,real pos_d
)
4072 gmx_domdec_comm_t
*comm
;
4077 fprintf(fplog
,"\nStep %s:\n",gmx_step_str(step
,buf
));
4078 if (bHaveLimitdAndCMOld
)
4080 fprintf(fplog
,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition (%f) in direction %c\n",
4081 ddglatnr(dd
,dd
->cgindex
[cg
]),limitd
,dim2char(dim
));
4085 fprintf(fplog
,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
4086 ddglatnr(dd
,dd
->cgindex
[cg
]),dim2char(dim
));
4088 fprintf(fplog
,"distance out of cell %f\n",
4089 dir
==1 ? pos_d
- comm
->cell_x1
[dim
] : pos_d
- comm
->cell_x0
[dim
]);
4090 if (bHaveLimitdAndCMOld
)
4092 fprintf(fplog
,"Old coordinates: %8.3f %8.3f %8.3f\n",
4093 cm_old
[XX
],cm_old
[YY
],cm_old
[ZZ
]);
4095 fprintf(fplog
,"New coordinates: %8.3f %8.3f %8.3f\n",
4096 cm_new
[XX
],cm_new
[YY
],cm_new
[ZZ
]);
4097 fprintf(fplog
,"Old cell boundaries in direction %c: %8.3f %8.3f\n",
4099 comm
->old_cell_x0
[dim
],comm
->old_cell_x1
[dim
]);
4100 fprintf(fplog
,"New cell boundaries in direction %c: %8.3f %8.3f\n",
4102 comm
->cell_x0
[dim
],comm
->cell_x1
[dim
]);
4105 static void cg_move_error(FILE *fplog
,
4107 gmx_large_int_t step
,int cg
,int dim
,int dir
,
4108 gmx_bool bHaveLimitdAndCMOld
,real limitd
,
4109 rvec cm_old
,rvec cm_new
,real pos_d
)
4113 print_cg_move(fplog
, dd
,step
,cg
,dim
,dir
,
4114 bHaveLimitdAndCMOld
,limitd
,cm_old
,cm_new
,pos_d
);
4116 print_cg_move(stderr
,dd
,step
,cg
,dim
,dir
,
4117 bHaveLimitdAndCMOld
,limitd
,cm_old
,cm_new
,pos_d
);
4119 "A charge group moved too far between two domain decomposition steps\n"
4120 "This usually means that your system is not well equilibrated");
4123 static void rotate_state_atom(t_state
*state
,int a
)
4127 for(est
=0; est
<estNR
; est
++)
4129 if (EST_DISTR(est
) && state
->flags
& (1<<est
)) {
4132 /* Rotate the complete state; for a rectangular box only */
4133 state
->x
[a
][YY
] = state
->box
[YY
][YY
] - state
->x
[a
][YY
];
4134 state
->x
[a
][ZZ
] = state
->box
[ZZ
][ZZ
] - state
->x
[a
][ZZ
];
4137 state
->v
[a
][YY
] = -state
->v
[a
][YY
];
4138 state
->v
[a
][ZZ
] = -state
->v
[a
][ZZ
];
4141 state
->sd_X
[a
][YY
] = -state
->sd_X
[a
][YY
];
4142 state
->sd_X
[a
][ZZ
] = -state
->sd_X
[a
][ZZ
];
4145 state
->cg_p
[a
][YY
] = -state
->cg_p
[a
][YY
];
4146 state
->cg_p
[a
][ZZ
] = -state
->cg_p
[a
][ZZ
];
4148 case estDISRE_INITF
:
4149 case estDISRE_RM3TAV
:
4150 case estORIRE_INITF
:
4152 /* These are distances, so not affected by rotation */
4155 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4161 static int dd_redistribute_cg(FILE *fplog
,gmx_large_int_t step
,
4162 gmx_domdec_t
*dd
,ivec tric_dir
,
4163 t_state
*state
,rvec
**f
,
4164 t_forcerec
*fr
,t_mdatoms
*md
,
4170 int ncg
[DIM
*2],nat
[DIM
*2];
4171 int c
,i
,cg
,k
,k0
,k1
,d
,dim
,dim2
,dir
,d2
,d3
,d4
,cell_d
;
4172 int mc
,cdd
,nrcg
,ncg_recv
,nat_recv
,nvs
,nvr
,nvec
,vec
;
4173 int sbuf
[2],rbuf
[2];
4174 int home_pos_cg
,home_pos_at
,ncg_stay_home
,buf_pos
;
4176 gmx_bool bV
=FALSE
,bSDX
=FALSE
,bCGP
=FALSE
;
4181 rvec
*cg_cm
,cell_x0
,cell_x1
,limitd
,limit0
,limit1
,cm_new
;
4183 cginfo_mb_t
*cginfo_mb
;
4184 gmx_domdec_comm_t
*comm
;
4188 check_screw_box(state
->box
);
4194 for(i
=0; i
<estNR
; i
++)
4200 case estX
: /* Always present */ break;
4201 case estV
: bV
= (state
->flags
& (1<<i
)); break;
4202 case estSDX
: bSDX
= (state
->flags
& (1<<i
)); break;
4203 case estCGP
: bCGP
= (state
->flags
& (1<<i
)); break;
4206 case estDISRE_INITF
:
4207 case estDISRE_RM3TAV
:
4208 case estORIRE_INITF
:
4210 /* No processing required */
4213 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4218 if (dd
->ncg_tot
> comm
->nalloc_int
)
4220 comm
->nalloc_int
= over_alloc_dd(dd
->ncg_tot
);
4221 srenew(comm
->buf_int
,comm
->nalloc_int
);
4223 move
= comm
->buf_int
;
4225 /* Clear the count */
4226 for(c
=0; c
<dd
->ndim
*2; c
++)
4232 npbcdim
= dd
->npbcdim
;
4234 for(d
=0; (d
<DIM
); d
++)
4236 limitd
[d
] = dd
->comm
->cellsize_min
[d
];
4237 if (d
>= npbcdim
&& dd
->ci
[d
] == 0)
4239 cell_x0
[d
] = -GMX_FLOAT_MAX
;
4243 cell_x0
[d
] = comm
->cell_x0
[d
];
4245 if (d
>= npbcdim
&& dd
->ci
[d
] == dd
->nc
[d
] - 1)
4247 cell_x1
[d
] = GMX_FLOAT_MAX
;
4251 cell_x1
[d
] = comm
->cell_x1
[d
];
4255 limit0
[d
] = comm
->old_cell_x0
[d
] - limitd
[d
];
4256 limit1
[d
] = comm
->old_cell_x1
[d
] + limitd
[d
];
4260 /* We check after communication if a charge group moved
4261 * more than one cell. Set the pre-comm check limit to float_max.
4263 limit0
[d
] = -GMX_FLOAT_MAX
;
4264 limit1
[d
] = GMX_FLOAT_MAX
;
4268 make_tric_corr_matrix(npbcdim
,state
->box
,tcm
);
4270 cgindex
= dd
->cgindex
;
4272 /* Compute the center of geometry for all home charge groups
4273 * and put them in the box and determine where they should go.
4275 for(cg
=0; cg
<dd
->ncg_home
; cg
++)
4282 copy_rvec(state
->x
[k0
],cm_new
);
4289 for(k
=k0
; (k
<k1
); k
++)
4291 rvec_inc(cm_new
,state
->x
[k
]);
4293 for(d
=0; (d
<DIM
); d
++)
4295 cm_new
[d
] = inv_ncg
*cm_new
[d
];
4300 /* Do pbc and check DD cell boundary crossings */
4301 for(d
=DIM
-1; d
>=0; d
--)
4305 bScrew
= (dd
->bScrewPBC
&& d
== XX
);
4306 /* Determine the location of this cg in lattice coordinates */
4310 for(d2
=d
+1; d2
<DIM
; d2
++)
4312 pos_d
+= cm_new
[d2
]*tcm
[d2
][d
];
4315 /* Put the charge group in the triclinic unit-cell */
4316 if (pos_d
>= cell_x1
[d
])
4318 if (pos_d
>= limit1
[d
])
4320 cg_move_error(fplog
,dd
,step
,cg
,d
,1,TRUE
,limitd
[d
],
4321 cg_cm
[cg
],cm_new
,pos_d
);
4324 if (dd
->ci
[d
] == dd
->nc
[d
] - 1)
4326 rvec_dec(cm_new
,state
->box
[d
]);
4329 cm_new
[YY
] = state
->box
[YY
][YY
] - cm_new
[YY
];
4330 cm_new
[ZZ
] = state
->box
[ZZ
][ZZ
] - cm_new
[ZZ
];
4332 for(k
=k0
; (k
<k1
); k
++)
4334 rvec_dec(state
->x
[k
],state
->box
[d
]);
4337 rotate_state_atom(state
,k
);
4342 else if (pos_d
< cell_x0
[d
])
4344 if (pos_d
< limit0
[d
])
4346 cg_move_error(fplog
,dd
,step
,cg
,d
,-1,TRUE
,limitd
[d
],
4347 cg_cm
[cg
],cm_new
,pos_d
);
4352 rvec_inc(cm_new
,state
->box
[d
]);
4355 cm_new
[YY
] = state
->box
[YY
][YY
] - cm_new
[YY
];
4356 cm_new
[ZZ
] = state
->box
[ZZ
][ZZ
] - cm_new
[ZZ
];
4358 for(k
=k0
; (k
<k1
); k
++)
4360 rvec_inc(state
->x
[k
],state
->box
[d
]);
4363 rotate_state_atom(state
,k
);
4369 else if (d
< npbcdim
)
4371 /* Put the charge group in the rectangular unit-cell */
4372 while (cm_new
[d
] >= state
->box
[d
][d
])
4374 rvec_dec(cm_new
,state
->box
[d
]);
4375 for(k
=k0
; (k
<k1
); k
++)
4377 rvec_dec(state
->x
[k
],state
->box
[d
]);
4380 while (cm_new
[d
] < 0)
4382 rvec_inc(cm_new
,state
->box
[d
]);
4383 for(k
=k0
; (k
<k1
); k
++)
4385 rvec_inc(state
->x
[k
],state
->box
[d
]);
4391 copy_rvec(cm_new
,cg_cm
[cg
]);
4393 /* Determine where this cg should go */
4396 for(d
=0; d
<dd
->ndim
; d
++)
4401 flag
|= DD_FLAG_FW(d
);
4407 else if (dev
[dim
] == -1)
4409 flag
|= DD_FLAG_BW(d
);
4411 if (dd
->nc
[dim
] > 2)
4425 if (ncg
[mc
]+1 > comm
->cggl_flag_nalloc
[mc
])
4427 comm
->cggl_flag_nalloc
[mc
] = over_alloc_dd(ncg
[mc
]+1);
4428 srenew(comm
->cggl_flag
[mc
],comm
->cggl_flag_nalloc
[mc
]*DD_CGIBS
);
4430 comm
->cggl_flag
[mc
][ncg
[mc
]*DD_CGIBS
] = dd
->index_gl
[cg
];
4431 /* We store the cg size in the lower 16 bits
4432 * and the place where the charge group should go
4433 * in the next 6 bits. This saves some communication volume.
4435 comm
->cggl_flag
[mc
][ncg
[mc
]*DD_CGIBS
+1] = nrcg
| flag
;
4441 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
4442 inc_nrnb(nrnb
,eNR_RESETX
,dd
->ncg_home
);
4458 /* Make sure the communication buffers are large enough */
4459 for(mc
=0; mc
<dd
->ndim
*2; mc
++)
4461 nvr
= ncg
[mc
] + nat
[mc
]*nvec
;
4462 if (nvr
> comm
->cgcm_state_nalloc
[mc
])
4464 comm
->cgcm_state_nalloc
[mc
] = over_alloc_dd(nvr
);
4465 srenew(comm
->cgcm_state
[mc
],comm
->cgcm_state_nalloc
[mc
]);
4469 /* Recalculating cg_cm might be cheaper than communicating,
4470 * but that could give rise to rounding issues.
4473 compact_and_copy_vec_cg(dd
->ncg_home
,move
,cgindex
,
4474 nvec
,cg_cm
,comm
,bCompact
);
4478 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4479 nvec
,vec
++,state
->x
,comm
,bCompact
);
4482 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4483 nvec
,vec
++,state
->v
,comm
,bCompact
);
4487 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4488 nvec
,vec
++,state
->sd_X
,comm
,bCompact
);
4492 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4493 nvec
,vec
++,state
->cg_p
,comm
,bCompact
);
4498 compact_ind(dd
->ncg_home
,move
,
4499 dd
->index_gl
,dd
->cgindex
,dd
->gatindex
,
4500 dd
->ga2la
,comm
->bLocalCG
,
4505 clear_and_mark_ind(dd
->ncg_home
,move
,
4506 dd
->index_gl
,dd
->cgindex
,dd
->gatindex
,
4507 dd
->ga2la
,comm
->bLocalCG
,
4508 fr
->ns
.grid
->cell_index
);
4511 cginfo_mb
= fr
->cginfo_mb
;
4513 ncg_stay_home
= home_pos_cg
;
4514 for(d
=0; d
<dd
->ndim
; d
++)
4520 for(dir
=0; dir
<(dd
->nc
[dim
]==2 ? 1 : 2); dir
++)
4523 /* Communicate the cg and atom counts */
4528 fprintf(debug
,"Sending ddim %d dir %d: ncg %d nat %d\n",
4529 d
,dir
,sbuf
[0],sbuf
[1]);
4531 dd_sendrecv_int(dd
, d
, dir
, sbuf
, 2, rbuf
, 2);
4533 if ((ncg_recv
+rbuf
[0])*DD_CGIBS
> comm
->nalloc_int
)
4535 comm
->nalloc_int
= over_alloc_dd((ncg_recv
+rbuf
[0])*DD_CGIBS
);
4536 srenew(comm
->buf_int
,comm
->nalloc_int
);
4539 /* Communicate the charge group indices, sizes and flags */
4540 dd_sendrecv_int(dd
, d
, dir
,
4541 comm
->cggl_flag
[cdd
], sbuf
[0]*DD_CGIBS
,
4542 comm
->buf_int
+ncg_recv
*DD_CGIBS
, rbuf
[0]*DD_CGIBS
);
4544 nvs
= ncg
[cdd
] + nat
[cdd
]*nvec
;
4545 i
= rbuf
[0] + rbuf
[1] *nvec
;
4546 vec_rvec_check_alloc(&comm
->vbuf
,nvr
+i
);
4548 /* Communicate cgcm and state */
4549 dd_sendrecv_rvec(dd
, d
, dir
,
4550 comm
->cgcm_state
[cdd
], nvs
,
4551 comm
->vbuf
.v
+nvr
, i
);
4552 ncg_recv
+= rbuf
[0];
4553 nat_recv
+= rbuf
[1];
4557 /* Process the received charge groups */
4559 for(cg
=0; cg
<ncg_recv
; cg
++)
4561 flag
= comm
->buf_int
[cg
*DD_CGIBS
+1];
4563 if (dim
>= npbcdim
&& dd
->nc
[dim
] > 2)
4565 /* No pbc in this dim and more than one domain boundary.
4566 * We to a separate check if a charge did not move too far.
4568 if (((flag
& DD_FLAG_FW(d
)) &&
4569 comm
->vbuf
.v
[buf_pos
][d
] > cell_x1
[dim
]) ||
4570 ((flag
& DD_FLAG_BW(d
)) &&
4571 comm
->vbuf
.v
[buf_pos
][d
] < cell_x0
[dim
]))
4573 cg_move_error(fplog
,dd
,step
,cg
,d
,
4574 (flag
& DD_FLAG_FW(d
)) ? 1 : 0,
4576 comm
->vbuf
.v
[buf_pos
],
4577 comm
->vbuf
.v
[buf_pos
],
4578 comm
->vbuf
.v
[buf_pos
][d
]);
4585 /* Check which direction this cg should go */
4586 for(d2
=d
+1; (d2
<dd
->ndim
&& mc
==-1); d2
++)
4590 /* The cell boundaries for dimension d2 are not equal
4591 * for each cell row of the lower dimension(s),
4592 * therefore we might need to redetermine where
4593 * this cg should go.
4596 /* If this cg crosses the box boundary in dimension d2
4597 * we can use the communicated flag, so we do not
4598 * have to worry about pbc.
4600 if (!((dd
->ci
[dim2
] == dd
->nc
[dim2
]-1 &&
4601 (flag
& DD_FLAG_FW(d2
))) ||
4602 (dd
->ci
[dim2
] == 0 &&
4603 (flag
& DD_FLAG_BW(d2
)))))
4605 /* Clear the two flags for this dimension */
4606 flag
&= ~(DD_FLAG_FW(d2
) | DD_FLAG_BW(d2
));
4607 /* Determine the location of this cg
4608 * in lattice coordinates
4610 pos_d
= comm
->vbuf
.v
[buf_pos
][dim2
];
4613 for(d3
=dim2
+1; d3
<DIM
; d3
++)
4616 comm
->vbuf
.v
[buf_pos
][d3
]*tcm
[d3
][dim2
];
4619 /* Check of we are not at the box edge.
4620 * pbc is only handled in the first step above,
4621 * but this check could move over pbc while
4622 * the first step did not due to different rounding.
4624 if (pos_d
>= cell_x1
[dim2
] &&
4625 dd
->ci
[dim2
] != dd
->nc
[dim2
]-1)
4627 flag
|= DD_FLAG_FW(d2
);
4629 else if (pos_d
< cell_x0
[dim2
] &&
4632 flag
|= DD_FLAG_BW(d2
);
4634 comm
->buf_int
[cg
*DD_CGIBS
+1] = flag
;
4637 /* Set to which neighboring cell this cg should go */
4638 if (flag
& DD_FLAG_FW(d2
))
4642 else if (flag
& DD_FLAG_BW(d2
))
4644 if (dd
->nc
[dd
->dim
[d2
]] > 2)
4656 nrcg
= flag
& DD_FLAG_NRCG
;
4659 if (home_pos_cg
+1 > dd
->cg_nalloc
)
4661 dd
->cg_nalloc
= over_alloc_dd(home_pos_cg
+1);
4662 srenew(dd
->index_gl
,dd
->cg_nalloc
);
4663 srenew(dd
->cgindex
,dd
->cg_nalloc
+1);
4665 /* Set the global charge group index and size */
4666 dd
->index_gl
[home_pos_cg
] = comm
->buf_int
[cg
*DD_CGIBS
];
4667 dd
->cgindex
[home_pos_cg
+1] = dd
->cgindex
[home_pos_cg
] + nrcg
;
4668 /* Copy the state from the buffer */
4669 if (home_pos_cg
>= fr
->cg_nalloc
)
4671 dd_realloc_fr_cg(fr
,home_pos_cg
+1);
4674 copy_rvec(comm
->vbuf
.v
[buf_pos
++],cg_cm
[home_pos_cg
]);
4675 /* Set the cginfo */
4676 fr
->cginfo
[home_pos_cg
] = ddcginfo(cginfo_mb
,
4677 dd
->index_gl
[home_pos_cg
]);
4680 comm
->bLocalCG
[dd
->index_gl
[home_pos_cg
]] = TRUE
;
4683 if (home_pos_at
+nrcg
> state
->nalloc
)
4685 dd_realloc_state(state
,f
,home_pos_at
+nrcg
);
4687 for(i
=0; i
<nrcg
; i
++)
4689 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4690 state
->x
[home_pos_at
+i
]);
4694 for(i
=0; i
<nrcg
; i
++)
4696 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4697 state
->v
[home_pos_at
+i
]);
4702 for(i
=0; i
<nrcg
; i
++)
4704 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4705 state
->sd_X
[home_pos_at
+i
]);
4710 for(i
=0; i
<nrcg
; i
++)
4712 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4713 state
->cg_p
[home_pos_at
+i
]);
4717 home_pos_at
+= nrcg
;
4721 /* Reallocate the buffers if necessary */
4722 if (ncg
[mc
]+1 > comm
->cggl_flag_nalloc
[mc
])
4724 comm
->cggl_flag_nalloc
[mc
] = over_alloc_dd(ncg
[mc
]+1);
4725 srenew(comm
->cggl_flag
[mc
],comm
->cggl_flag_nalloc
[mc
]*DD_CGIBS
);
4727 nvr
= ncg
[mc
] + nat
[mc
]*nvec
;
4728 if (nvr
+ 1 + nrcg
*nvec
> comm
->cgcm_state_nalloc
[mc
])
4730 comm
->cgcm_state_nalloc
[mc
] = over_alloc_dd(nvr
+ 1 + nrcg
*nvec
);
4731 srenew(comm
->cgcm_state
[mc
],comm
->cgcm_state_nalloc
[mc
]);
4733 /* Copy from the receive to the send buffers */
4734 memcpy(comm
->cggl_flag
[mc
] + ncg
[mc
]*DD_CGIBS
,
4735 comm
->buf_int
+ cg
*DD_CGIBS
,
4736 DD_CGIBS
*sizeof(int));
4737 memcpy(comm
->cgcm_state
[mc
][nvr
],
4738 comm
->vbuf
.v
[buf_pos
],
4739 (1+nrcg
*nvec
)*sizeof(rvec
));
4740 buf_pos
+= 1 + nrcg
*nvec
;
4747 /* With sorting (!bCompact) the indices are now only partially up to date
4748 * and ncg_home and nat_home are not the real count, since there are
4749 * "holes" in the arrays for the charge groups that moved to neighbors.
4751 dd
->ncg_home
= home_pos_cg
;
4752 dd
->nat_home
= home_pos_at
;
4756 fprintf(debug
,"Finished repartitioning\n");
4759 return ncg_stay_home
;
4762 void dd_cycles_add(gmx_domdec_t
*dd
,float cycles
,int ddCycl
)
4764 dd
->comm
->cycl
[ddCycl
] += cycles
;
4765 dd
->comm
->cycl_n
[ddCycl
]++;
4766 if (cycles
> dd
->comm
->cycl_max
[ddCycl
])
4768 dd
->comm
->cycl_max
[ddCycl
] = cycles
;
4772 static double force_flop_count(t_nrnb
*nrnb
)
4779 for(i
=eNR_NBKERNEL010
; i
<eNR_NBKERNEL_FREE_ENERGY
; i
++)
4781 /* To get closer to the real timings, we half the count
4782 * for the normal loops and again half it for water loops.
4785 if (strstr(name
,"W3") != NULL
|| strstr(name
,"W4") != NULL
)
4787 sum
+= nrnb
->n
[i
]*0.25*cost_nrnb(i
);
4791 sum
+= nrnb
->n
[i
]*0.50*cost_nrnb(i
);
4794 for(i
=eNR_NBKERNEL_FREE_ENERGY
; i
<=eNR_NB14
; i
++)
4797 if (strstr(name
,"W3") != NULL
|| strstr(name
,"W4") != NULL
)
4798 sum
+= nrnb
->n
[i
]*cost_nrnb(i
);
4800 for(i
=eNR_BONDS
; i
<=eNR_WALLS
; i
++)
4802 sum
+= nrnb
->n
[i
]*cost_nrnb(i
);
4808 void dd_force_flop_start(gmx_domdec_t
*dd
,t_nrnb
*nrnb
)
4810 if (dd
->comm
->eFlop
)
4812 dd
->comm
->flop
-= force_flop_count(nrnb
);
4815 void dd_force_flop_stop(gmx_domdec_t
*dd
,t_nrnb
*nrnb
)
4817 if (dd
->comm
->eFlop
)
4819 dd
->comm
->flop
+= force_flop_count(nrnb
);
4824 static void clear_dd_cycle_counts(gmx_domdec_t
*dd
)
4828 for(i
=0; i
<ddCyclNr
; i
++)
4830 dd
->comm
->cycl
[i
] = 0;
4831 dd
->comm
->cycl_n
[i
] = 0;
4832 dd
->comm
->cycl_max
[i
] = 0;
4835 dd
->comm
->flop_n
= 0;
4838 static void get_load_distribution(gmx_domdec_t
*dd
,gmx_wallcycle_t wcycle
)
4840 gmx_domdec_comm_t
*comm
;
4841 gmx_domdec_load_t
*load
;
4842 gmx_domdec_root_t
*root
=NULL
;
4843 int d
,dim
,cid
,i
,pos
;
4844 float cell_frac
=0,sbuf
[DD_NLOAD_MAX
];
4849 fprintf(debug
,"get_load_distribution start\n");
4852 wallcycle_start(wcycle
,ewcDDCOMMLOAD
);
4856 bSepPME
= (dd
->pme_nodeid
>= 0);
4858 for(d
=dd
->ndim
-1; d
>=0; d
--)
4861 /* Check if we participate in the communication in this dimension */
4862 if (d
== dd
->ndim
-1 ||
4863 (dd
->ci
[dd
->dim
[d
+1]]==0 && dd
->ci
[dd
->dim
[dd
->ndim
-1]]==0))
4865 load
= &comm
->load
[d
];
4868 cell_frac
= comm
->cell_f1
[d
] - comm
->cell_f0
[d
];
4871 if (d
== dd
->ndim
-1)
4873 sbuf
[pos
++] = dd_force_load(comm
);
4874 sbuf
[pos
++] = sbuf
[0];
4877 sbuf
[pos
++] = sbuf
[0];
4878 sbuf
[pos
++] = cell_frac
;
4881 sbuf
[pos
++] = comm
->cell_f_max0
[d
];
4882 sbuf
[pos
++] = comm
->cell_f_min1
[d
];
4887 sbuf
[pos
++] = comm
->cycl
[ddCyclPPduringPME
];
4888 sbuf
[pos
++] = comm
->cycl
[ddCyclPME
];
4893 sbuf
[pos
++] = comm
->load
[d
+1].sum
;
4894 sbuf
[pos
++] = comm
->load
[d
+1].max
;
4897 sbuf
[pos
++] = comm
->load
[d
+1].sum_m
;
4898 sbuf
[pos
++] = comm
->load
[d
+1].cvol_min
*cell_frac
;
4899 sbuf
[pos
++] = comm
->load
[d
+1].flags
;
4902 sbuf
[pos
++] = comm
->cell_f_max0
[d
];
4903 sbuf
[pos
++] = comm
->cell_f_min1
[d
];
4908 sbuf
[pos
++] = comm
->load
[d
+1].mdf
;
4909 sbuf
[pos
++] = comm
->load
[d
+1].pme
;
4913 /* Communicate a row in DD direction d.
4914 * The communicators are setup such that the root always has rank 0.
4917 MPI_Gather(sbuf
,load
->nload
*sizeof(float),MPI_BYTE
,
4918 load
->load
,load
->nload
*sizeof(float),MPI_BYTE
,
4919 0,comm
->mpi_comm_load
[d
]);
4921 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
4923 /* We are the root, process this row */
4924 if (comm
->bDynLoadBal
)
4926 root
= comm
->root
[d
];
4936 for(i
=0; i
<dd
->nc
[dim
]; i
++)
4938 load
->sum
+= load
->load
[pos
++];
4939 load
->max
= max(load
->max
,load
->load
[pos
]);
4945 /* This direction could not be load balanced properly,
4946 * therefore we need to use the maximum iso the average load.
4948 load
->sum_m
= max(load
->sum_m
,load
->load
[pos
]);
4952 load
->sum_m
+= load
->load
[pos
];
4955 load
->cvol_min
= min(load
->cvol_min
,load
->load
[pos
]);
4959 load
->flags
= (int)(load
->load
[pos
++] + 0.5);
4963 root
->cell_f_max0
[i
] = load
->load
[pos
++];
4964 root
->cell_f_min1
[i
] = load
->load
[pos
++];
4969 load
->mdf
= max(load
->mdf
,load
->load
[pos
]);
4971 load
->pme
= max(load
->pme
,load
->load
[pos
]);
4975 if (comm
->bDynLoadBal
&& root
->bLimited
)
4977 load
->sum_m
*= dd
->nc
[dim
];
4978 load
->flags
|= (1<<d
);
4986 comm
->nload
+= dd_load_count(comm
);
4987 comm
->load_step
+= comm
->cycl
[ddCyclStep
];
4988 comm
->load_sum
+= comm
->load
[0].sum
;
4989 comm
->load_max
+= comm
->load
[0].max
;
4990 if (comm
->bDynLoadBal
)
4992 for(d
=0; d
<dd
->ndim
; d
++)
4994 if (comm
->load
[0].flags
& (1<<d
))
4996 comm
->load_lim
[d
]++;
5002 comm
->load_mdf
+= comm
->load
[0].mdf
;
5003 comm
->load_pme
+= comm
->load
[0].pme
;
5007 wallcycle_stop(wcycle
,ewcDDCOMMLOAD
);
5011 fprintf(debug
,"get_load_distribution finished\n");
5015 static float dd_force_imb_perf_loss(gmx_domdec_t
*dd
)
5017 /* Return the relative performance loss on the total run time
5018 * due to the force calculation load imbalance.
5020 if (dd
->comm
->nload
> 0)
5023 (dd
->comm
->load_max
*dd
->nnodes
- dd
->comm
->load_sum
)/
5024 (dd
->comm
->load_step
*dd
->nnodes
);
5032 static void print_dd_load_av(FILE *fplog
,gmx_domdec_t
*dd
)
5035 int npp
,npme
,nnodes
,d
,limp
;
5036 float imbal
,pme_f_ratio
,lossf
,lossp
=0;
5038 gmx_domdec_comm_t
*comm
;
5041 if (DDMASTER(dd
) && comm
->nload
> 0)
5044 npme
= (dd
->pme_nodeid
>= 0) ? comm
->npmenodes
: 0;
5045 nnodes
= npp
+ npme
;
5046 imbal
= comm
->load_max
*npp
/comm
->load_sum
- 1;
5047 lossf
= dd_force_imb_perf_loss(dd
);
5048 sprintf(buf
," Average load imbalance: %.1f %%\n",imbal
*100);
5049 fprintf(fplog
,"%s",buf
);
5050 fprintf(stderr
,"\n");
5051 fprintf(stderr
,"%s",buf
);
5052 sprintf(buf
," Part of the total run time spent waiting due to load imbalance: %.1f %%\n",lossf
*100);
5053 fprintf(fplog
,"%s",buf
);
5054 fprintf(stderr
,"%s",buf
);
5056 if (comm
->bDynLoadBal
)
5058 sprintf(buf
," Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
5059 for(d
=0; d
<dd
->ndim
; d
++)
5061 limp
= (200*comm
->load_lim
[d
]+1)/(2*comm
->nload
);
5062 sprintf(buf
+strlen(buf
)," %c %d %%",dim2char(dd
->dim
[d
]),limp
);
5068 sprintf(buf
+strlen(buf
),"\n");
5069 fprintf(fplog
,"%s",buf
);
5070 fprintf(stderr
,"%s",buf
);
5074 pme_f_ratio
= comm
->load_pme
/comm
->load_mdf
;
5075 lossp
= (comm
->load_pme
-comm
->load_mdf
)/comm
->load_step
;
5078 lossp
*= (float)npme
/(float)nnodes
;
5082 lossp
*= (float)npp
/(float)nnodes
;
5084 sprintf(buf
," Average PME mesh/force load: %5.3f\n",pme_f_ratio
);
5085 fprintf(fplog
,"%s",buf
);
5086 fprintf(stderr
,"%s",buf
);
5087 sprintf(buf
," Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n",fabs(lossp
)*100);
5088 fprintf(fplog
,"%s",buf
);
5089 fprintf(stderr
,"%s",buf
);
5091 fprintf(fplog
,"\n");
5092 fprintf(stderr
,"\n");
5094 if (lossf
>= DD_PERF_LOSS
)
5097 "NOTE: %.1f %% performance was lost due to load imbalance\n"
5098 " in the domain decomposition.\n",lossf
*100);
5099 if (!comm
->bDynLoadBal
)
5101 sprintf(buf
+strlen(buf
)," You might want to use dynamic load balancing (option -dlb.)\n");
5105 sprintf(buf
+strlen(buf
)," You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5107 fprintf(fplog
,"%s\n",buf
);
5108 fprintf(stderr
,"%s\n",buf
);
5110 if (npme
> 0 && fabs(lossp
) >= DD_PERF_LOSS
)
5113 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5114 " had %s work to do than the PP nodes.\n"
5115 " You might want to %s the number of PME nodes\n"
5116 " or %s the cut-off and the grid spacing.\n",
5118 (lossp
< 0) ? "less" : "more",
5119 (lossp
< 0) ? "decrease" : "increase",
5120 (lossp
< 0) ? "decrease" : "increase");
5121 fprintf(fplog
,"%s\n",buf
);
5122 fprintf(stderr
,"%s\n",buf
);
5127 static float dd_vol_min(gmx_domdec_t
*dd
)
5129 return dd
->comm
->load
[0].cvol_min
*dd
->nnodes
;
5132 static gmx_bool
dd_load_flags(gmx_domdec_t
*dd
)
5134 return dd
->comm
->load
[0].flags
;
5137 static float dd_f_imbal(gmx_domdec_t
*dd
)
5139 return dd
->comm
->load
[0].max
*dd
->nnodes
/dd
->comm
->load
[0].sum
- 1;
5142 static float dd_pme_f_ratio(gmx_domdec_t
*dd
)
5144 return dd
->comm
->load
[0].pme
/dd
->comm
->load
[0].mdf
;
5147 static void dd_print_load(FILE *fplog
,gmx_domdec_t
*dd
,gmx_large_int_t step
)
5152 flags
= dd_load_flags(dd
);
5156 "DD load balancing is limited by minimum cell size in dimension");
5157 for(d
=0; d
<dd
->ndim
; d
++)
5161 fprintf(fplog
," %c",dim2char(dd
->dim
[d
]));
5164 fprintf(fplog
,"\n");
5166 fprintf(fplog
,"DD step %s",gmx_step_str(step
,buf
));
5167 if (dd
->comm
->bDynLoadBal
)
5169 fprintf(fplog
," vol min/aver %5.3f%c",
5170 dd_vol_min(dd
),flags
? '!' : ' ');
5172 fprintf(fplog
," load imb.: force %4.1f%%",dd_f_imbal(dd
)*100);
5173 if (dd
->comm
->cycl_n
[ddCyclPME
])
5175 fprintf(fplog
," pme mesh/force %5.3f",dd_pme_f_ratio(dd
));
5177 fprintf(fplog
,"\n\n");
5180 static void dd_print_load_verbose(gmx_domdec_t
*dd
)
5182 if (dd
->comm
->bDynLoadBal
)
5184 fprintf(stderr
,"vol %4.2f%c ",
5185 dd_vol_min(dd
),dd_load_flags(dd
) ? '!' : ' ');
5187 fprintf(stderr
,"imb F %2d%% ",(int)(dd_f_imbal(dd
)*100+0.5));
5188 if (dd
->comm
->cycl_n
[ddCyclPME
])
5190 fprintf(stderr
,"pme/F %4.2f ",dd_pme_f_ratio(dd
));
5195 static void make_load_communicator(gmx_domdec_t
*dd
,MPI_Group g_all
,
5196 int dim_ind
,ivec loc
)
5202 gmx_domdec_root_t
*root
;
5204 dim
= dd
->dim
[dim_ind
];
5205 copy_ivec(loc
,loc_c
);
5206 snew(rank
,dd
->nc
[dim
]);
5207 for(i
=0; i
<dd
->nc
[dim
]; i
++)
5210 rank
[i
] = dd_index(dd
->nc
,loc_c
);
5212 /* Here we create a new group, that does not necessarily
5213 * include our process. But MPI_Comm_create needs to be
5214 * called by all the processes in the original communicator.
5215 * Calling MPI_Group_free afterwards gives errors, so I assume
5216 * also the group is needed by all processes. (B. Hess)
5218 MPI_Group_incl(g_all
,dd
->nc
[dim
],rank
,&g_row
);
5219 MPI_Comm_create(dd
->mpi_comm_all
,g_row
,&c_row
);
5220 if (c_row
!= MPI_COMM_NULL
)
5222 /* This process is part of the group */
5223 dd
->comm
->mpi_comm_load
[dim_ind
] = c_row
;
5224 if (dd
->comm
->eDLB
!= edlbNO
)
5226 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
5228 /* This is the root process of this row */
5229 snew(dd
->comm
->root
[dim_ind
],1);
5230 root
= dd
->comm
->root
[dim_ind
];
5231 snew(root
->cell_f
,DD_CELL_F_SIZE(dd
,dim_ind
));
5232 snew(root
->old_cell_f
,dd
->nc
[dim
]+1);
5233 snew(root
->bCellMin
,dd
->nc
[dim
]);
5236 snew(root
->cell_f_max0
,dd
->nc
[dim
]);
5237 snew(root
->cell_f_min1
,dd
->nc
[dim
]);
5238 snew(root
->bound_min
,dd
->nc
[dim
]);
5239 snew(root
->bound_max
,dd
->nc
[dim
]);
5241 snew(root
->buf_ncd
,dd
->nc
[dim
]);
5245 /* This is not a root process, we only need to receive cell_f */
5246 snew(dd
->comm
->cell_f_row
,DD_CELL_F_SIZE(dd
,dim_ind
));
5249 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
5251 snew(dd
->comm
->load
[dim_ind
].load
,dd
->nc
[dim
]*DD_NLOAD_MAX
);
5258 static void make_load_communicators(gmx_domdec_t
*dd
)
5266 fprintf(debug
,"Making load communicators\n");
5268 MPI_Comm_group(dd
->mpi_comm_all
,&g_all
);
5270 snew(dd
->comm
->load
,dd
->ndim
);
5271 snew(dd
->comm
->mpi_comm_load
,dd
->ndim
);
5274 make_load_communicator(dd
,g_all
,0,loc
);
5277 for(i
=0; i
<dd
->nc
[dim0
]; i
++) {
5279 make_load_communicator(dd
,g_all
,1,loc
);
5284 for(i
=0; i
<dd
->nc
[dim0
]; i
++) {
5287 for(j
=0; j
<dd
->nc
[dim1
]; j
++) {
5289 make_load_communicator(dd
,g_all
,2,loc
);
5294 MPI_Group_free(&g_all
);
5297 fprintf(debug
,"Finished making load communicators\n");
5301 void setup_dd_grid(FILE *fplog
,gmx_domdec_t
*dd
)
5307 ivec dd_zp
[DD_MAXIZONE
];
5308 gmx_domdec_zones_t
*zones
;
5309 gmx_domdec_ns_ranges_t
*izone
;
5311 for(d
=0; d
<dd
->ndim
; d
++)
5314 copy_ivec(dd
->ci
,tmp
);
5315 tmp
[dim
] = (tmp
[dim
] + 1) % dd
->nc
[dim
];
5316 dd
->neighbor
[d
][0] = ddcoord2ddnodeid(dd
,tmp
);
5317 copy_ivec(dd
->ci
,tmp
);
5318 tmp
[dim
] = (tmp
[dim
] - 1 + dd
->nc
[dim
]) % dd
->nc
[dim
];
5319 dd
->neighbor
[d
][1] = ddcoord2ddnodeid(dd
,tmp
);
5322 fprintf(debug
,"DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5325 dd
->neighbor
[d
][1]);
5331 fprintf(stderr
,"Making %dD domain decomposition %d x %d x %d\n",
5332 dd
->ndim
,dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
]);
5336 fprintf(fplog
,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5338 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
],
5339 dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5346 for(i
=0; i
<nzonep
; i
++)
5348 copy_ivec(dd_zp3
[i
],dd_zp
[i
]);
5354 for(i
=0; i
<nzonep
; i
++)
5356 copy_ivec(dd_zp2
[i
],dd_zp
[i
]);
5362 for(i
=0; i
<nzonep
; i
++)
5364 copy_ivec(dd_zp1
[i
],dd_zp
[i
]);
5368 gmx_fatal(FARGS
,"Can only do 1, 2 or 3D domain decomposition");
5373 zones
= &dd
->comm
->zones
;
5375 for(i
=0; i
<nzone
; i
++)
5378 clear_ivec(zones
->shift
[i
]);
5379 for(d
=0; d
<dd
->ndim
; d
++)
5381 zones
->shift
[i
][dd
->dim
[d
]] = dd_zo
[i
][m
++];
5386 for(i
=0; i
<nzone
; i
++)
5388 for(d
=0; d
<DIM
; d
++)
5390 s
[d
] = dd
->ci
[d
] - zones
->shift
[i
][d
];
5395 else if (s
[d
] >= dd
->nc
[d
])
5401 zones
->nizone
= nzonep
;
5402 for(i
=0; i
<zones
->nizone
; i
++)
5404 if (dd_zp
[i
][0] != i
)
5406 gmx_fatal(FARGS
,"Internal inconsistency in the dd grid setup");
5408 izone
= &zones
->izone
[i
];
5409 izone
->j0
= dd_zp
[i
][1];
5410 izone
->j1
= dd_zp
[i
][2];
5411 for(dim
=0; dim
<DIM
; dim
++)
5413 if (dd
->nc
[dim
] == 1)
5415 /* All shifts should be allowed */
5416 izone
->shift0
[dim
] = -1;
5417 izone
->shift1
[dim
] = 1;
5422 izone->shift0[d] = 0;
5423 izone->shift1[d] = 0;
5424 for(j=izone->j0; j<izone->j1; j++) {
5425 if (dd->shift[j][d] > dd->shift[i][d])
5426 izone->shift0[d] = -1;
5427 if (dd->shift[j][d] < dd->shift[i][d])
5428 izone->shift1[d] = 1;
5434 /* Assume the shift are not more than 1 cell */
5435 izone
->shift0
[dim
] = 1;
5436 izone
->shift1
[dim
] = -1;
5437 for(j
=izone
->j0
; j
<izone
->j1
; j
++)
5439 shift_diff
= zones
->shift
[j
][dim
] - zones
->shift
[i
][dim
];
5440 if (shift_diff
< izone
->shift0
[dim
])
5442 izone
->shift0
[dim
] = shift_diff
;
5444 if (shift_diff
> izone
->shift1
[dim
])
5446 izone
->shift1
[dim
] = shift_diff
;
5453 if (dd
->comm
->eDLB
!= edlbNO
)
5455 snew(dd
->comm
->root
,dd
->ndim
);
5458 if (dd
->comm
->bRecordLoad
)
5460 make_load_communicators(dd
);
5464 static void make_pp_communicator(FILE *fplog
,t_commrec
*cr
,int reorder
)
5467 gmx_domdec_comm_t
*comm
;
5478 if (comm
->bCartesianPP
)
5480 /* Set up cartesian communication for the particle-particle part */
5483 fprintf(fplog
,"Will use a Cartesian communicator: %d x %d x %d\n",
5484 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
]);
5487 for(i
=0; i
<DIM
; i
++)
5491 MPI_Cart_create(cr
->mpi_comm_mygroup
,DIM
,dd
->nc
,periods
,reorder
,
5493 /* We overwrite the old communicator with the new cartesian one */
5494 cr
->mpi_comm_mygroup
= comm_cart
;
5497 dd
->mpi_comm_all
= cr
->mpi_comm_mygroup
;
5498 MPI_Comm_rank(dd
->mpi_comm_all
,&dd
->rank
);
5500 if (comm
->bCartesianPP_PME
)
5502 /* Since we want to use the original cartesian setup for sim,
5503 * and not the one after split, we need to make an index.
5505 snew(comm
->ddindex2ddnodeid
,dd
->nnodes
);
5506 comm
->ddindex2ddnodeid
[dd_index(dd
->nc
,dd
->ci
)] = dd
->rank
;
5507 gmx_sumi(dd
->nnodes
,comm
->ddindex2ddnodeid
,cr
);
5508 /* Get the rank of the DD master,
5509 * above we made sure that the master node is a PP node.
5519 MPI_Allreduce(&rank
,&dd
->masterrank
,1,MPI_INT
,MPI_SUM
,dd
->mpi_comm_all
);
5521 else if (comm
->bCartesianPP
)
5523 if (cr
->npmenodes
== 0)
5525 /* The PP communicator is also
5526 * the communicator for this simulation
5528 cr
->mpi_comm_mysim
= cr
->mpi_comm_mygroup
;
5530 cr
->nodeid
= dd
->rank
;
5532 MPI_Cart_coords(dd
->mpi_comm_all
,dd
->rank
,DIM
,dd
->ci
);
5534 /* We need to make an index to go from the coordinates
5535 * to the nodeid of this simulation.
5537 snew(comm
->ddindex2simnodeid
,dd
->nnodes
);
5538 snew(buf
,dd
->nnodes
);
5539 if (cr
->duty
& DUTY_PP
)
5541 buf
[dd_index(dd
->nc
,dd
->ci
)] = cr
->sim_nodeid
;
5543 /* Communicate the ddindex to simulation nodeid index */
5544 MPI_Allreduce(buf
,comm
->ddindex2simnodeid
,dd
->nnodes
,MPI_INT
,MPI_SUM
,
5545 cr
->mpi_comm_mysim
);
5548 /* Determine the master coordinates and rank.
5549 * The DD master should be the same node as the master of this sim.
5551 for(i
=0; i
<dd
->nnodes
; i
++)
5553 if (comm
->ddindex2simnodeid
[i
] == 0)
5555 ddindex2xyz(dd
->nc
,i
,dd
->master_ci
);
5556 MPI_Cart_rank(dd
->mpi_comm_all
,dd
->master_ci
,&dd
->masterrank
);
5561 fprintf(debug
,"The master rank is %d\n",dd
->masterrank
);
5566 /* No Cartesian communicators */
5567 /* We use the rank in dd->comm->all as DD index */
5568 ddindex2xyz(dd
->nc
,dd
->rank
,dd
->ci
);
5569 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
5571 clear_ivec(dd
->master_ci
);
5578 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5579 dd
->rank
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5584 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5585 dd
->rank
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5589 static void receive_ddindex2simnodeid(t_commrec
*cr
)
5593 gmx_domdec_comm_t
*comm
;
5600 if (!comm
->bCartesianPP_PME
&& comm
->bCartesianPP
)
5602 snew(comm
->ddindex2simnodeid
,dd
->nnodes
);
5603 snew(buf
,dd
->nnodes
);
5604 if (cr
->duty
& DUTY_PP
)
5606 buf
[dd_index(dd
->nc
,dd
->ci
)] = cr
->sim_nodeid
;
5609 /* Communicate the ddindex to simulation nodeid index */
5610 MPI_Allreduce(buf
,comm
->ddindex2simnodeid
,dd
->nnodes
,MPI_INT
,MPI_SUM
,
5611 cr
->mpi_comm_mysim
);
5618 static gmx_domdec_master_t
*init_gmx_domdec_master_t(gmx_domdec_t
*dd
,
5621 gmx_domdec_master_t
*ma
;
5626 snew(ma
->ncg
,dd
->nnodes
);
5627 snew(ma
->index
,dd
->nnodes
+1);
5629 snew(ma
->nat
,dd
->nnodes
);
5630 snew(ma
->ibuf
,dd
->nnodes
*2);
5631 snew(ma
->cell_x
,DIM
);
5632 for(i
=0; i
<DIM
; i
++)
5634 snew(ma
->cell_x
[i
],dd
->nc
[i
]+1);
5637 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
5643 snew(ma
->vbuf
,natoms
);
5649 static void split_communicator(FILE *fplog
,t_commrec
*cr
,int dd_node_order
,
5653 gmx_domdec_comm_t
*comm
;
5664 if (comm
->bCartesianPP
)
5666 for(i
=1; i
<DIM
; i
++)
5668 bDiv
[i
] = ((cr
->npmenodes
*dd
->nc
[i
]) % (dd
->nnodes
) == 0);
5670 if (bDiv
[YY
] || bDiv
[ZZ
])
5672 comm
->bCartesianPP_PME
= TRUE
;
5673 /* If we have 2D PME decomposition, which is always in x+y,
5674 * we stack the PME only nodes in z.
5675 * Otherwise we choose the direction that provides the thinnest slab
5676 * of PME only nodes as this will have the least effect
5677 * on the PP communication.
5678 * But for the PME communication the opposite might be better.
5680 if (bDiv
[ZZ
] && (comm
->npmenodes_y
> 1 ||
5682 dd
->nc
[YY
] > dd
->nc
[ZZ
]))
5684 comm
->cartpmedim
= ZZ
;
5688 comm
->cartpmedim
= YY
;
5690 comm
->ntot
[comm
->cartpmedim
]
5691 += (cr
->npmenodes
*dd
->nc
[comm
->cartpmedim
])/dd
->nnodes
;
5695 fprintf(fplog
,"#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n",cr
->npmenodes
,dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[XX
],dd
->nc
[ZZ
]);
5697 "Will not use a Cartesian communicator for PP <-> PME\n\n");
5702 if (comm
->bCartesianPP_PME
)
5706 fprintf(fplog
,"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n",comm
->ntot
[XX
],comm
->ntot
[YY
],comm
->ntot
[ZZ
]);
5709 for(i
=0; i
<DIM
; i
++)
5713 MPI_Cart_create(cr
->mpi_comm_mysim
,DIM
,comm
->ntot
,periods
,reorder
,
5716 MPI_Comm_rank(comm_cart
,&rank
);
5717 if (MASTERNODE(cr
) && rank
!= 0)
5719 gmx_fatal(FARGS
,"MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
5722 /* With this assigment we loose the link to the original communicator
5723 * which will usually be MPI_COMM_WORLD, unless have multisim.
5725 cr
->mpi_comm_mysim
= comm_cart
;
5726 cr
->sim_nodeid
= rank
;
5728 MPI_Cart_coords(cr
->mpi_comm_mysim
,cr
->sim_nodeid
,DIM
,dd
->ci
);
5732 fprintf(fplog
,"Cartesian nodeid %d, coordinates %d %d %d\n\n",
5733 cr
->sim_nodeid
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5736 if (dd
->ci
[comm
->cartpmedim
] < dd
->nc
[comm
->cartpmedim
])
5740 if (cr
->npmenodes
== 0 ||
5741 dd
->ci
[comm
->cartpmedim
] >= dd
->nc
[comm
->cartpmedim
])
5743 cr
->duty
= DUTY_PME
;
5746 /* Split the sim communicator into PP and PME only nodes */
5747 MPI_Comm_split(cr
->mpi_comm_mysim
,
5749 dd_index(comm
->ntot
,dd
->ci
),
5750 &cr
->mpi_comm_mygroup
);
5754 switch (dd_node_order
)
5759 fprintf(fplog
,"Order of the nodes: PP first, PME last\n");
5762 case ddnoINTERLEAVE
:
5763 /* Interleave the PP-only and PME-only nodes,
5764 * as on clusters with dual-core machines this will double
5765 * the communication bandwidth of the PME processes
5766 * and thus speed up the PP <-> PME and inter PME communication.
5770 fprintf(fplog
,"Interleaving PP and PME nodes\n");
5772 comm
->pmenodes
= dd_pmenodes(cr
);
5777 gmx_fatal(FARGS
,"Unknown dd_node_order=%d",dd_node_order
);
5780 if (dd_simnode2pmenode(cr
,cr
->sim_nodeid
) == -1)
5782 cr
->duty
= DUTY_PME
;
5789 /* Split the sim communicator into PP and PME only nodes */
5790 MPI_Comm_split(cr
->mpi_comm_mysim
,
5793 &cr
->mpi_comm_mygroup
);
5794 MPI_Comm_rank(cr
->mpi_comm_mygroup
,&cr
->nodeid
);
5800 fprintf(fplog
,"This is a %s only node\n\n",
5801 (cr
->duty
& DUTY_PP
) ? "particle-particle" : "PME-mesh");
5805 void make_dd_communicators(FILE *fplog
,t_commrec
*cr
,int dd_node_order
)
5808 gmx_domdec_comm_t
*comm
;
5814 copy_ivec(dd
->nc
,comm
->ntot
);
5816 comm
->bCartesianPP
= (dd_node_order
== ddnoCARTESIAN
);
5817 comm
->bCartesianPP_PME
= FALSE
;
5819 /* Reorder the nodes by default. This might change the MPI ranks.
5820 * Real reordering is only supported on very few architectures,
5821 * Blue Gene is one of them.
5823 CartReorder
= (getenv("GMX_NO_CART_REORDER") == NULL
);
5825 if (cr
->npmenodes
> 0)
5827 /* Split the communicator into a PP and PME part */
5828 split_communicator(fplog
,cr
,dd_node_order
,CartReorder
);
5829 if (comm
->bCartesianPP_PME
)
5831 /* We (possibly) reordered the nodes in split_communicator,
5832 * so it is no longer required in make_pp_communicator.
5834 CartReorder
= FALSE
;
5839 /* All nodes do PP and PME */
5841 /* We do not require separate communicators */
5842 cr
->mpi_comm_mygroup
= cr
->mpi_comm_mysim
;
5846 if (cr
->duty
& DUTY_PP
)
5848 /* Copy or make a new PP communicator */
5849 make_pp_communicator(fplog
,cr
,CartReorder
);
5853 receive_ddindex2simnodeid(cr
);
5856 if (!(cr
->duty
& DUTY_PME
))
5858 /* Set up the commnuication to our PME node */
5859 dd
->pme_nodeid
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
5860 dd
->pme_receive_vir_ener
= receive_vir_ener(cr
);
5863 fprintf(debug
,"My pme_nodeid %d receive ener %d\n",
5864 dd
->pme_nodeid
,dd
->pme_receive_vir_ener
);
5869 dd
->pme_nodeid
= -1;
5874 dd
->ma
= init_gmx_domdec_master_t(dd
,
5876 comm
->cgs_gl
.index
[comm
->cgs_gl
.nr
]);
5880 static real
*get_slb_frac(FILE *fplog
,const char *dir
,int nc
,const char *size_string
)
5887 if (nc
> 1 && size_string
!= NULL
)
5891 fprintf(fplog
,"Using static load balancing for the %s direction\n",
5896 for (i
=0; i
<nc
; i
++)
5899 sscanf(size_string
,"%lf%n",&dbl
,&n
);
5902 gmx_fatal(FARGS
,"Incorrect or not enough DD cell size entries for direction %s: '%s'",dir
,size_string
);
5911 fprintf(fplog
,"Relative cell sizes:");
5913 for (i
=0; i
<nc
; i
++)
5918 fprintf(fplog
," %5.3f",slb_frac
[i
]);
5923 fprintf(fplog
,"\n");
5930 static int multi_body_bondeds_count(gmx_mtop_t
*mtop
)
5933 gmx_mtop_ilistloop_t iloop
;
5937 iloop
= gmx_mtop_ilistloop_init(mtop
);
5938 while (gmx_mtop_ilistloop_next(iloop
,&il
,&nmol
))
5940 for(ftype
=0; ftype
<F_NRE
; ftype
++)
5942 if ((interaction_function
[ftype
].flags
& IF_BOND
) &&
5945 n
+= nmol
*il
[ftype
].nr
/(1 + NRAL(ftype
));
5953 static int dd_nst_env(FILE *fplog
,const char *env_var
,int def
)
5959 val
= getenv(env_var
);
5962 if (sscanf(val
,"%d",&nst
) <= 0)
5968 fprintf(fplog
,"Found env.var. %s = %s, using value %d\n",
5976 static void dd_warning(t_commrec
*cr
,FILE *fplog
,const char *warn_string
)
5980 fprintf(stderr
,"\n%s\n",warn_string
);
5984 fprintf(fplog
,"\n%s\n",warn_string
);
5988 static void check_dd_restrictions(t_commrec
*cr
,gmx_domdec_t
*dd
,
5989 t_inputrec
*ir
,FILE *fplog
)
5991 if (ir
->ePBC
== epbcSCREW
&&
5992 (dd
->nc
[XX
] == 1 || dd
->nc
[YY
] > 1 || dd
->nc
[ZZ
] > 1))
5994 gmx_fatal(FARGS
,"With pbc=%s can only do domain decomposition in the x-direction",epbc_names
[ir
->ePBC
]);
5997 if (ir
->ns_type
== ensSIMPLE
)
5999 gmx_fatal(FARGS
,"Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
6002 if (ir
->nstlist
== 0)
6004 gmx_fatal(FARGS
,"Domain decomposition does not work with nstlist=0");
6007 if (ir
->comm_mode
== ecmANGULAR
&& ir
->ePBC
!= epbcNONE
)
6009 dd_warning(cr
,fplog
,"comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
6013 static real
average_cellsize_min(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
6018 r
= ddbox
->box_size
[XX
];
6019 for(di
=0; di
<dd
->ndim
; di
++)
6022 /* Check using the initial average cell size */
6023 r
= min(r
,ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
]/dd
->nc
[d
]);
6029 static int check_dlb_support(FILE *fplog
,t_commrec
*cr
,
6030 const char *dlb_opt
,gmx_bool bRecordLoad
,
6031 unsigned long Flags
,t_inputrec
*ir
)
6039 case 'a': eDLB
= edlbAUTO
; break;
6040 case 'n': eDLB
= edlbNO
; break;
6041 case 'y': eDLB
= edlbYES
; break;
6042 default: gmx_incons("Unknown dlb_opt");
6045 if (Flags
& MD_RERUN
)
6050 if (!EI_DYNAMICS(ir
->eI
))
6052 if (eDLB
== edlbYES
)
6054 sprintf(buf
,"NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n",EI(ir
->eI
));
6055 dd_warning(cr
,fplog
,buf
);
6063 dd_warning(cr
,fplog
,"NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6068 if (Flags
& MD_REPRODUCIBLE
)
6075 dd_warning(cr
,fplog
,"NOTE: reproducability requested, will not use dynamic load balancing\n");
6079 dd_warning(cr
,fplog
,"WARNING: reproducability requested with dynamic load balancing, the simulation will NOT be binary reproducable\n");
6082 gmx_fatal(FARGS
,"Death horror: undefined case (%d) for load balancing choice",eDLB
);
6090 static void set_dd_dim(FILE *fplog
,gmx_domdec_t
*dd
)
6095 if (getenv("GMX_DD_ORDER_ZYX") != NULL
)
6097 /* Decomposition order z,y,x */
6100 fprintf(fplog
,"Using domain decomposition order z, y, x\n");
6102 for(dim
=DIM
-1; dim
>=0; dim
--)
6104 if (dd
->nc
[dim
] > 1)
6106 dd
->dim
[dd
->ndim
++] = dim
;
6112 /* Decomposition order x,y,z */
6113 for(dim
=0; dim
<DIM
; dim
++)
6115 if (dd
->nc
[dim
] > 1)
6117 dd
->dim
[dd
->ndim
++] = dim
;
6123 static gmx_domdec_comm_t
*init_dd_comm()
6125 gmx_domdec_comm_t
*comm
;
6129 snew(comm
->cggl_flag
,DIM
*2);
6130 snew(comm
->cgcm_state
,DIM
*2);
6131 for(i
=0; i
<DIM
*2; i
++)
6133 comm
->cggl_flag_nalloc
[i
] = 0;
6134 comm
->cgcm_state_nalloc
[i
] = 0;
6137 comm
->nalloc_int
= 0;
6138 comm
->buf_int
= NULL
;
6140 vec_rvec_init(&comm
->vbuf
);
6142 comm
->n_load_have
= 0;
6143 comm
->n_load_collect
= 0;
6145 for(i
=0; i
<ddnatNR
-ddnatZONE
; i
++)
6147 comm
->sum_nat
[i
] = 0;
6151 comm
->load_step
= 0;
6154 clear_ivec(comm
->load_lim
);
6161 gmx_domdec_t
*init_domain_decomposition(FILE *fplog
,t_commrec
*cr
,
6162 unsigned long Flags
,
6164 real comm_distance_min
,real rconstr
,
6165 const char *dlb_opt
,real dlb_scale
,
6166 const char *sizex
,const char *sizey
,const char *sizez
,
6167 gmx_mtop_t
*mtop
,t_inputrec
*ir
,
6170 int *npme_x
,int *npme_y
)
6173 gmx_domdec_comm_t
*comm
;
6176 real r_2b
,r_mb
,r_bonded
=-1,r_bonded_limit
=-1,limit
,acs
;
6183 "\nInitializing Domain Decomposition on %d nodes\n",cr
->nnodes
);
6188 dd
->comm
= init_dd_comm();
6190 snew(comm
->cggl_flag
,DIM
*2);
6191 snew(comm
->cgcm_state
,DIM
*2);
6193 dd
->npbcdim
= ePBC2npbcdim(ir
->ePBC
);
6194 dd
->bScrewPBC
= (ir
->ePBC
== epbcSCREW
);
6196 dd
->bSendRecv2
= dd_nst_env(fplog
,"GMX_DD_SENDRECV2",0);
6197 comm
->eFlop
= dd_nst_env(fplog
,"GMX_DLB_FLOP",0);
6198 recload
= dd_nst_env(fplog
,"GMX_DD_LOAD",1);
6199 comm
->nstSortCG
= dd_nst_env(fplog
,"GMX_DD_SORT",1);
6200 comm
->nstDDDump
= dd_nst_env(fplog
,"GMX_DD_DUMP",0);
6201 comm
->nstDDDumpGrid
= dd_nst_env(fplog
,"GMX_DD_DUMP_GRID",0);
6202 comm
->DD_debug
= dd_nst_env(fplog
,"GMX_DD_DEBUG",0);
6204 dd
->pme_recv_f_alloc
= 0;
6205 dd
->pme_recv_f_buf
= NULL
;
6207 if (dd
->bSendRecv2
&& fplog
)
6209 fprintf(fplog
,"Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6215 fprintf(fplog
,"Will load balance based on FLOP count\n");
6217 if (comm
->eFlop
> 1)
6219 srand(1+cr
->nodeid
);
6221 comm
->bRecordLoad
= TRUE
;
6225 comm
->bRecordLoad
= (wallcycle_have_counter() && recload
> 0);
6229 comm
->eDLB
= check_dlb_support(fplog
,cr
,dlb_opt
,comm
->bRecordLoad
,Flags
,ir
);
6231 comm
->bDynLoadBal
= (comm
->eDLB
== edlbYES
);
6234 fprintf(fplog
,"Dynamic load balancing: %s\n",edlb_names
[comm
->eDLB
]);
6236 dd
->bGridJump
= comm
->bDynLoadBal
;
6238 if (comm
->nstSortCG
)
6242 if (comm
->nstSortCG
== 1)
6244 fprintf(fplog
,"Will sort the charge groups at every domain (re)decomposition\n");
6248 fprintf(fplog
,"Will sort the charge groups every %d steps\n",
6258 fprintf(fplog
,"Will not sort the charge groups\n");
6262 comm
->bInterCGBondeds
= (ncg_mtop(mtop
) > mtop
->mols
.nr
);
6263 if (comm
->bInterCGBondeds
)
6265 comm
->bInterCGMultiBody
= (multi_body_bondeds_count(mtop
) > 0);
6269 comm
->bInterCGMultiBody
= FALSE
;
6272 dd
->bInterCGcons
= inter_charge_group_constraints(mtop
);
6274 if (ir
->rlistlong
== 0)
6276 /* Set the cut-off to some very large value,
6277 * so we don't need if statements everywhere in the code.
6278 * We use sqrt, since the cut-off is squared in some places.
6280 comm
->cutoff
= GMX_CUTOFF_INF
;
6284 comm
->cutoff
= ir
->rlistlong
;
6286 comm
->cutoff_mbody
= 0;
6288 comm
->cellsize_limit
= 0;
6289 comm
->bBondComm
= FALSE
;
6291 if (comm
->bInterCGBondeds
)
6293 if (comm_distance_min
> 0)
6295 comm
->cutoff_mbody
= comm_distance_min
;
6296 if (Flags
& MD_DDBONDCOMM
)
6298 comm
->bBondComm
= (comm
->cutoff_mbody
> comm
->cutoff
);
6302 comm
->cutoff
= max(comm
->cutoff
,comm
->cutoff_mbody
);
6304 r_bonded_limit
= comm
->cutoff_mbody
;
6306 else if (ir
->bPeriodicMols
)
6308 /* Can not easily determine the required cut-off */
6309 dd_warning(cr
,fplog
,"NOTE: Periodic molecules: can not easily determine the required minimum bonded cut-off, using half the non-bonded cut-off\n");
6310 comm
->cutoff_mbody
= comm
->cutoff
/2;
6311 r_bonded_limit
= comm
->cutoff_mbody
;
6317 dd_bonded_cg_distance(fplog
,dd
,mtop
,ir
,x
,box
,
6318 Flags
& MD_DDBONDCHECK
,&r_2b
,&r_mb
);
6320 gmx_bcast(sizeof(r_2b
),&r_2b
,cr
);
6321 gmx_bcast(sizeof(r_mb
),&r_mb
,cr
);
6323 /* We use an initial margin of 10% for the minimum cell size,
6324 * except when we are just below the non-bonded cut-off.
6326 if (Flags
& MD_DDBONDCOMM
)
6328 if (max(r_2b
,r_mb
) > comm
->cutoff
)
6330 r_bonded
= max(r_2b
,r_mb
);
6331 r_bonded_limit
= 1.1*r_bonded
;
6332 comm
->bBondComm
= TRUE
;
6337 r_bonded_limit
= min(1.1*r_bonded
,comm
->cutoff
);
6339 /* We determine cutoff_mbody later */
6343 /* No special bonded communication,
6344 * simply increase the DD cut-off.
6346 r_bonded_limit
= 1.1*max(r_2b
,r_mb
);
6347 comm
->cutoff_mbody
= r_bonded_limit
;
6348 comm
->cutoff
= max(comm
->cutoff
,comm
->cutoff_mbody
);
6351 comm
->cellsize_limit
= max(comm
->cellsize_limit
,r_bonded_limit
);
6355 "Minimum cell size due to bonded interactions: %.3f nm\n",
6356 comm
->cellsize_limit
);
6360 if (dd
->bInterCGcons
&& rconstr
<= 0)
6362 /* There is a cell size limit due to the constraints (P-LINCS) */
6363 rconstr
= constr_r_max(fplog
,mtop
,ir
);
6367 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6369 if (rconstr
> comm
->cellsize_limit
)
6371 fprintf(fplog
,"This distance will limit the DD cell size, you can override this with -rcon\n");
6375 else if (rconstr
> 0 && fplog
)
6377 /* Here we do not check for dd->bInterCGcons,
6378 * because one can also set a cell size limit for virtual sites only
6379 * and at this point we don't know yet if there are intercg v-sites.
6382 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6385 comm
->cellsize_limit
= max(comm
->cellsize_limit
,rconstr
);
6387 comm
->cgs_gl
= gmx_mtop_global_cgs(mtop
);
6391 copy_ivec(nc
,dd
->nc
);
6392 set_dd_dim(fplog
,dd
);
6393 set_ddbox_cr(cr
,&dd
->nc
,ir
,box
,&comm
->cgs_gl
,x
,ddbox
);
6395 if (cr
->npmenodes
== -1)
6399 acs
= average_cellsize_min(dd
,ddbox
);
6400 if (acs
< comm
->cellsize_limit
)
6404 fprintf(fplog
,"ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n",acs
,comm
->cellsize_limit
);
6406 gmx_fatal_collective(FARGS
,cr
,NULL
,
6407 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6408 acs
,comm
->cellsize_limit
);
6413 set_ddbox_cr(cr
,NULL
,ir
,box
,&comm
->cgs_gl
,x
,ddbox
);
6415 /* We need to choose the optimal DD grid and possibly PME nodes */
6416 limit
= dd_choose_grid(fplog
,cr
,dd
,ir
,mtop
,box
,ddbox
,
6417 comm
->eDLB
!=edlbNO
,dlb_scale
,
6418 comm
->cellsize_limit
,comm
->cutoff
,
6419 comm
->bInterCGBondeds
,comm
->bInterCGMultiBody
);
6421 if (dd
->nc
[XX
] == 0)
6423 bC
= (dd
->bInterCGcons
&& rconstr
> r_bonded_limit
);
6424 sprintf(buf
,"Change the number of nodes or mdrun option %s%s%s",
6425 !bC
? "-rdd" : "-rcon",
6426 comm
->eDLB
!=edlbNO
? " or -dds" : "",
6427 bC
? " or your LINCS settings" : "");
6429 gmx_fatal_collective(FARGS
,cr
,NULL
,
6430 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6432 "Look in the log file for details on the domain decomposition",
6433 cr
->nnodes
-cr
->npmenodes
,limit
,buf
);
6435 set_dd_dim(fplog
,dd
);
6441 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6442 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
],cr
->npmenodes
);
6445 dd
->nnodes
= dd
->nc
[XX
]*dd
->nc
[YY
]*dd
->nc
[ZZ
];
6446 if (cr
->nnodes
- dd
->nnodes
!= cr
->npmenodes
)
6448 gmx_fatal_collective(FARGS
,cr
,NULL
,
6449 "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6450 dd
->nnodes
,cr
->nnodes
- cr
->npmenodes
,cr
->nnodes
);
6452 if (cr
->npmenodes
> dd
->nnodes
)
6454 gmx_fatal_collective(FARGS
,cr
,NULL
,
6455 "The number of separate PME node (%d) is larger than the number of PP nodes (%d), this is not supported.",cr
->npmenodes
,dd
->nnodes
);
6457 if (cr
->npmenodes
> 0)
6459 comm
->npmenodes
= cr
->npmenodes
;
6463 comm
->npmenodes
= dd
->nnodes
;
6466 if (EEL_PME(ir
->coulombtype
))
6468 /* The following choices should match those
6469 * in comm_cost_est in domdec_setup.c.
6470 * Note that here the checks have to take into account
6471 * that the decomposition might occur in a different order than xyz
6472 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
6473 * in which case they will not match those in comm_cost_est,
6474 * but since that is mainly for testing purposes that's fine.
6476 if (dd
->ndim
>= 2 && dd
->dim
[0] == XX
&& dd
->dim
[1] == YY
&&
6477 comm
->npmenodes
> dd
->nc
[XX
] && comm
->npmenodes
% dd
->nc
[XX
] == 0 &&
6478 getenv("GMX_PMEONEDD") == NULL
)
6480 comm
->npmedecompdim
= 2;
6481 comm
->npmenodes_x
= dd
->nc
[XX
];
6482 comm
->npmenodes_y
= comm
->npmenodes
/comm
->npmenodes_x
;
6486 /* In case nc is 1 in both x and y we could still choose to
6487 * decompose pme in y instead of x, but we use x for simplicity.
6489 comm
->npmedecompdim
= 1;
6490 if (dd
->dim
[0] == YY
)
6492 comm
->npmenodes_x
= 1;
6493 comm
->npmenodes_y
= comm
->npmenodes
;
6497 comm
->npmenodes_x
= comm
->npmenodes
;
6498 comm
->npmenodes_y
= 1;
6503 fprintf(fplog
,"PME domain decomposition: %d x %d x %d\n",
6504 comm
->npmenodes_x
,comm
->npmenodes_y
,1);
6509 comm
->npmedecompdim
= 0;
6510 comm
->npmenodes_x
= 0;
6511 comm
->npmenodes_y
= 0;
6514 /* Technically we don't need both of these,
6515 * but it simplifies code not having to recalculate it.
6517 *npme_x
= comm
->npmenodes_x
;
6518 *npme_y
= comm
->npmenodes_y
;
6520 snew(comm
->slb_frac
,DIM
);
6521 if (comm
->eDLB
== edlbNO
)
6523 comm
->slb_frac
[XX
] = get_slb_frac(fplog
,"x",dd
->nc
[XX
],sizex
);
6524 comm
->slb_frac
[YY
] = get_slb_frac(fplog
,"y",dd
->nc
[YY
],sizey
);
6525 comm
->slb_frac
[ZZ
] = get_slb_frac(fplog
,"z",dd
->nc
[ZZ
],sizez
);
6528 if (comm
->bInterCGBondeds
&& comm
->cutoff_mbody
== 0)
6530 if (comm
->bBondComm
|| comm
->eDLB
!= edlbNO
)
6532 /* Set the bonded communication distance to halfway
6533 * the minimum and the maximum,
6534 * since the extra communication cost is nearly zero.
6536 acs
= average_cellsize_min(dd
,ddbox
);
6537 comm
->cutoff_mbody
= 0.5*(r_bonded
+ acs
);
6538 if (comm
->eDLB
!= edlbNO
)
6540 /* Check if this does not limit the scaling */
6541 comm
->cutoff_mbody
= min(comm
->cutoff_mbody
,dlb_scale
*acs
);
6543 if (!comm
->bBondComm
)
6545 /* Without bBondComm do not go beyond the n.b. cut-off */
6546 comm
->cutoff_mbody
= min(comm
->cutoff_mbody
,comm
->cutoff
);
6547 if (comm
->cellsize_limit
>= comm
->cutoff
)
6549 /* We don't loose a lot of efficieny
6550 * when increasing it to the n.b. cut-off.
6551 * It can even be slightly faster, because we need
6552 * less checks for the communication setup.
6554 comm
->cutoff_mbody
= comm
->cutoff
;
6557 /* Check if we did not end up below our original limit */
6558 comm
->cutoff_mbody
= max(comm
->cutoff_mbody
,r_bonded_limit
);
6560 if (comm
->cutoff_mbody
> comm
->cellsize_limit
)
6562 comm
->cellsize_limit
= comm
->cutoff_mbody
;
6565 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
6570 fprintf(debug
,"Bonded atom communication beyond the cut-off: %d\n"
6571 "cellsize limit %f\n",
6572 comm
->bBondComm
,comm
->cellsize_limit
);
6577 check_dd_restrictions(cr
,dd
,ir
,fplog
);
6580 comm
->partition_step
= INT_MIN
;
6583 clear_dd_cycle_counts(dd
);
6588 static void set_dlb_limits(gmx_domdec_t
*dd
)
6593 for(d
=0; d
<dd
->ndim
; d
++)
6595 dd
->comm
->cd
[d
].np
= dd
->comm
->cd
[d
].np_dlb
;
6596 dd
->comm
->cellsize_min
[dd
->dim
[d
]] =
6597 dd
->comm
->cellsize_min_dlb
[dd
->dim
[d
]];
6602 static void turn_on_dlb(FILE *fplog
,t_commrec
*cr
,gmx_large_int_t step
)
6605 gmx_domdec_comm_t
*comm
;
6615 fprintf(fplog
,"At step %s the performance loss due to force load imbalance is %.1f %%\n",gmx_step_str(step
,buf
),dd_force_imb_perf_loss(dd
)*100);
6618 cellsize_min
= comm
->cellsize_min
[dd
->dim
[0]];
6619 for(d
=1; d
<dd
->ndim
; d
++)
6621 cellsize_min
= min(cellsize_min
,comm
->cellsize_min
[dd
->dim
[d
]]);
6624 if (cellsize_min
< comm
->cellsize_limit
*1.05)
6626 dd_warning(cr
,fplog
,"NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
6628 /* Change DLB from "auto" to "no". */
6629 comm
->eDLB
= edlbNO
;
6634 dd_warning(cr
,fplog
,"NOTE: Turning on dynamic load balancing\n");
6635 comm
->bDynLoadBal
= TRUE
;
6636 dd
->bGridJump
= TRUE
;
6640 /* We can set the required cell size info here,
6641 * so we do not need to communicate this.
6642 * The grid is completely uniform.
6644 for(d
=0; d
<dd
->ndim
; d
++)
6648 comm
->load
[d
].sum_m
= comm
->load
[d
].sum
;
6650 nc
= dd
->nc
[dd
->dim
[d
]];
6653 comm
->root
[d
]->cell_f
[i
] = i
/(real
)nc
;
6656 comm
->root
[d
]->cell_f_max0
[i
] = i
/(real
)nc
;
6657 comm
->root
[d
]->cell_f_min1
[i
] = (i
+1)/(real
)nc
;
6660 comm
->root
[d
]->cell_f
[nc
] = 1.0;
6665 static char *init_bLocalCG(gmx_mtop_t
*mtop
)
6670 ncg
= ncg_mtop(mtop
);
6672 for(cg
=0; cg
<ncg
; cg
++)
6674 bLocalCG
[cg
] = FALSE
;
6680 void dd_init_bondeds(FILE *fplog
,
6681 gmx_domdec_t
*dd
,gmx_mtop_t
*mtop
,
6682 gmx_vsite_t
*vsite
,gmx_constr_t constr
,
6683 t_inputrec
*ir
,gmx_bool bBCheck
,cginfo_mb_t
*cginfo_mb
)
6685 gmx_domdec_comm_t
*comm
;
6689 dd_make_reverse_top(fplog
,dd
,mtop
,vsite
,constr
,ir
,bBCheck
);
6693 if (comm
->bBondComm
)
6695 /* Communicate atoms beyond the cut-off for bonded interactions */
6698 comm
->cglink
= make_charge_group_links(mtop
,dd
,cginfo_mb
);
6700 comm
->bLocalCG
= init_bLocalCG(mtop
);
6704 /* Only communicate atoms based on cut-off */
6705 comm
->cglink
= NULL
;
6706 comm
->bLocalCG
= NULL
;
6710 static void print_dd_settings(FILE *fplog
,gmx_domdec_t
*dd
,
6712 gmx_bool bDynLoadBal
,real dlb_scale
,
6715 gmx_domdec_comm_t
*comm
;
6730 fprintf(fplog
,"The maximum number of communication pulses is:");
6731 for(d
=0; d
<dd
->ndim
; d
++)
6733 fprintf(fplog
," %c %d",dim2char(dd
->dim
[d
]),comm
->cd
[d
].np_dlb
);
6735 fprintf(fplog
,"\n");
6736 fprintf(fplog
,"The minimum size for domain decomposition cells is %.3f nm\n",comm
->cellsize_limit
);
6737 fprintf(fplog
,"The requested allowed shrink of DD cells (option -dds) is: %.2f\n",dlb_scale
);
6738 fprintf(fplog
,"The allowed shrink of domain decomposition cells is:");
6739 for(d
=0; d
<DIM
; d
++)
6743 if (d
>= ddbox
->npbcdim
&& dd
->nc
[d
] == 2)
6750 comm
->cellsize_min_dlb
[d
]/
6751 (ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
]/dd
->nc
[d
]);
6753 fprintf(fplog
," %c %.2f",dim2char(d
),shrink
);
6756 fprintf(fplog
,"\n");
6760 set_dd_cell_sizes_slb(dd
,ddbox
,FALSE
,np
);
6761 fprintf(fplog
,"The initial number of communication pulses is:");
6762 for(d
=0; d
<dd
->ndim
; d
++)
6764 fprintf(fplog
," %c %d",dim2char(dd
->dim
[d
]),np
[dd
->dim
[d
]]);
6766 fprintf(fplog
,"\n");
6767 fprintf(fplog
,"The initial domain decomposition cell size is:");
6768 for(d
=0; d
<DIM
; d
++) {
6771 fprintf(fplog
," %c %.2f nm",
6772 dim2char(d
),dd
->comm
->cellsize_min
[d
]);
6775 fprintf(fplog
,"\n\n");
6778 if (comm
->bInterCGBondeds
|| dd
->vsite_comm
|| dd
->constraint_comm
)
6780 fprintf(fplog
,"The maximum allowed distance for charge groups involved in interactions is:\n");
6781 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6782 "non-bonded interactions","",comm
->cutoff
);
6786 limit
= dd
->comm
->cellsize_limit
;
6790 if (dynamic_dd_box(ddbox
,ir
))
6792 fprintf(fplog
,"(the following are initial values, they could change due to box deformation)\n");
6794 limit
= dd
->comm
->cellsize_min
[XX
];
6795 for(d
=1; d
<DIM
; d
++)
6797 limit
= min(limit
,dd
->comm
->cellsize_min
[d
]);
6801 if (comm
->bInterCGBondeds
)
6803 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6804 "two-body bonded interactions","(-rdd)",
6805 max(comm
->cutoff
,comm
->cutoff_mbody
));
6806 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6807 "multi-body bonded interactions","(-rdd)",
6808 (comm
->bBondComm
|| dd
->bGridJump
) ? comm
->cutoff_mbody
: min(comm
->cutoff
,limit
));
6812 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6813 "virtual site constructions","(-rcon)",limit
);
6815 if (dd
->constraint_comm
)
6817 sprintf(buf
,"atoms separated by up to %d constraints",
6819 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6820 buf
,"(-rcon)",limit
);
6822 fprintf(fplog
,"\n");
6828 void set_dd_parameters(FILE *fplog
,gmx_domdec_t
*dd
,real dlb_scale
,
6829 t_inputrec
*ir
,t_forcerec
*fr
,
6832 gmx_domdec_comm_t
*comm
;
6833 int d
,dim
,npulse
,npulse_d_max
,npulse_d
;
6840 bNoCutOff
= (ir
->rvdw
== 0 || ir
->rcoulomb
== 0);
6842 if (EEL_PME(ir
->coulombtype
))
6844 init_ddpme(dd
,&comm
->ddpme
[0],0);
6845 if (comm
->npmedecompdim
>= 2)
6847 init_ddpme(dd
,&comm
->ddpme
[1],1);
6852 comm
->npmenodes
= 0;
6853 if (dd
->pme_nodeid
>= 0)
6855 gmx_fatal_collective(FARGS
,NULL
,dd
,
6856 "Can not have separate PME nodes without PME electrostatics");
6860 /* If each molecule is a single charge group
6861 * or we use domain decomposition for each periodic dimension,
6862 * we do not need to take pbc into account for the bonded interactions.
6864 if (fr
->ePBC
== epbcNONE
|| !comm
->bInterCGBondeds
||
6865 (dd
->nc
[XX
]>1 && dd
->nc
[YY
]>1 && (dd
->nc
[ZZ
]>1 || fr
->ePBC
==epbcXY
)))
6867 fr
->bMolPBC
= FALSE
;
6876 fprintf(debug
,"The DD cut-off is %f\n",comm
->cutoff
);
6878 if (comm
->eDLB
!= edlbNO
)
6880 /* Determine the maximum number of comm. pulses in one dimension */
6882 comm
->cellsize_limit
= max(comm
->cellsize_limit
,comm
->cutoff_mbody
);
6884 /* Determine the maximum required number of grid pulses */
6885 if (comm
->cellsize_limit
>= comm
->cutoff
)
6887 /* Only a single pulse is required */
6890 else if (!bNoCutOff
&& comm
->cellsize_limit
> 0)
6892 /* We round down slightly here to avoid overhead due to the latency
6893 * of extra communication calls when the cut-off
6894 * would be only slightly longer than the cell size.
6895 * Later cellsize_limit is redetermined,
6896 * so we can not miss interactions due to this rounding.
6898 npulse
= (int)(0.96 + comm
->cutoff
/comm
->cellsize_limit
);
6902 /* There is no cell size limit */
6903 npulse
= max(dd
->nc
[XX
]-1,max(dd
->nc
[YY
]-1,dd
->nc
[ZZ
]-1));
6906 if (!bNoCutOff
&& npulse
> 1)
6908 /* See if we can do with less pulses, based on dlb_scale */
6910 for(d
=0; d
<dd
->ndim
; d
++)
6913 npulse_d
= (int)(1 + dd
->nc
[dim
]*comm
->cutoff
6914 /(ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
]*dlb_scale
));
6915 npulse_d_max
= max(npulse_d_max
,npulse_d
);
6917 npulse
= min(npulse
,npulse_d_max
);
6920 /* This env var can override npulse */
6921 d
= dd_nst_env(fplog
,"GMX_DD_NPULSE",0);
6928 comm
->bVacDLBNoLimit
= (ir
->ePBC
== epbcNONE
);
6929 for(d
=0; d
<dd
->ndim
; d
++)
6931 comm
->cd
[d
].np_dlb
= min(npulse
,dd
->nc
[dd
->dim
[d
]]-1);
6932 comm
->cd
[d
].np_nalloc
= comm
->cd
[d
].np_dlb
;
6933 snew(comm
->cd
[d
].ind
,comm
->cd
[d
].np_nalloc
);
6934 comm
->maxpulse
= max(comm
->maxpulse
,comm
->cd
[d
].np_dlb
);
6935 if (comm
->cd
[d
].np_dlb
< dd
->nc
[dd
->dim
[d
]]-1)
6937 comm
->bVacDLBNoLimit
= FALSE
;
6941 /* cellsize_limit is set for LINCS in init_domain_decomposition */
6942 if (!comm
->bVacDLBNoLimit
)
6944 comm
->cellsize_limit
= max(comm
->cellsize_limit
,
6945 comm
->cutoff
/comm
->maxpulse
);
6947 comm
->cellsize_limit
= max(comm
->cellsize_limit
,comm
->cutoff_mbody
);
6948 /* Set the minimum cell size for each DD dimension */
6949 for(d
=0; d
<dd
->ndim
; d
++)
6951 if (comm
->bVacDLBNoLimit
||
6952 comm
->cd
[d
].np_dlb
*comm
->cellsize_limit
>= comm
->cutoff
)
6954 comm
->cellsize_min_dlb
[dd
->dim
[d
]] = comm
->cellsize_limit
;
6958 comm
->cellsize_min_dlb
[dd
->dim
[d
]] =
6959 comm
->cutoff
/comm
->cd
[d
].np_dlb
;
6962 if (comm
->cutoff_mbody
<= 0)
6964 comm
->cutoff_mbody
= min(comm
->cutoff
,comm
->cellsize_limit
);
6966 if (comm
->bDynLoadBal
)
6972 print_dd_settings(fplog
,dd
,ir
,comm
->bDynLoadBal
,dlb_scale
,ddbox
);
6973 if (comm
->eDLB
== edlbAUTO
)
6977 fprintf(fplog
,"When dynamic load balancing gets turned on, these settings will change to:\n");
6979 print_dd_settings(fplog
,dd
,ir
,TRUE
,dlb_scale
,ddbox
);
6982 if (ir
->ePBC
== epbcNONE
)
6984 vol_frac
= 1 - 1/(double)dd
->nnodes
;
6989 (1 + comm_box_frac(dd
->nc
,comm
->cutoff
,ddbox
))/(double)dd
->nnodes
;
6993 fprintf(debug
,"Volume fraction for all DD zones: %f\n",vol_frac
);
6995 natoms_tot
= comm
->cgs_gl
.index
[comm
->cgs_gl
.nr
];
6997 dd
->ga2la
= ga2la_init(natoms_tot
,vol_frac
*natoms_tot
);
7000 static void merge_cg_buffers(int ncell
,
7001 gmx_domdec_comm_dim_t
*cd
, int pulse
,
7003 int *index_gl
, int *recv_i
,
7004 rvec
*cg_cm
, rvec
*recv_vr
,
7006 cginfo_mb_t
*cginfo_mb
,int *cginfo
)
7008 gmx_domdec_ind_t
*ind
,*ind_p
;
7009 int p
,cell
,c
,cg
,cg0
,cg1
,cg_gl
,nat
;
7012 ind
= &cd
->ind
[pulse
];
7014 /* First correct the already stored data */
7015 shift
= ind
->nrecv
[ncell
];
7016 for(cell
=ncell
-1; cell
>=0; cell
--)
7018 shift
-= ind
->nrecv
[cell
];
7021 /* Move the cg's present from previous grid pulses */
7022 cg0
= ncg_cell
[ncell
+cell
];
7023 cg1
= ncg_cell
[ncell
+cell
+1];
7024 cgindex
[cg1
+shift
] = cgindex
[cg1
];
7025 for(cg
=cg1
-1; cg
>=cg0
; cg
--)
7027 index_gl
[cg
+shift
] = index_gl
[cg
];
7028 copy_rvec(cg_cm
[cg
],cg_cm
[cg
+shift
]);
7029 cgindex
[cg
+shift
] = cgindex
[cg
];
7030 cginfo
[cg
+shift
] = cginfo
[cg
];
7032 /* Correct the already stored send indices for the shift */
7033 for(p
=1; p
<=pulse
; p
++)
7035 ind_p
= &cd
->ind
[p
];
7037 for(c
=0; c
<cell
; c
++)
7039 cg0
+= ind_p
->nsend
[c
];
7041 cg1
= cg0
+ ind_p
->nsend
[cell
];
7042 for(cg
=cg0
; cg
<cg1
; cg
++)
7044 ind_p
->index
[cg
] += shift
;
7050 /* Merge in the communicated buffers */
7054 for(cell
=0; cell
<ncell
; cell
++)
7056 cg1
= ncg_cell
[ncell
+cell
+1] + shift
;
7059 /* Correct the old cg indices */
7060 for(cg
=ncg_cell
[ncell
+cell
]; cg
<cg1
; cg
++)
7062 cgindex
[cg
+1] += shift_at
;
7065 for(cg
=0; cg
<ind
->nrecv
[cell
]; cg
++)
7067 /* Copy this charge group from the buffer */
7068 index_gl
[cg1
] = recv_i
[cg0
];
7069 copy_rvec(recv_vr
[cg0
],cg_cm
[cg1
]);
7070 /* Add it to the cgindex */
7071 cg_gl
= index_gl
[cg1
];
7072 cginfo
[cg1
] = ddcginfo(cginfo_mb
,cg_gl
);
7073 nat
= GET_CGINFO_NATOMS(cginfo
[cg1
]);
7074 cgindex
[cg1
+1] = cgindex
[cg1
] + nat
;
7079 shift
+= ind
->nrecv
[cell
];
7080 ncg_cell
[ncell
+cell
+1] = cg1
;
7084 static void make_cell2at_index(gmx_domdec_comm_dim_t
*cd
,
7085 int nzone
,int cg0
,const int *cgindex
)
7089 /* Store the atom block boundaries for easy copying of communication buffers
7092 for(zone
=0; zone
<nzone
; zone
++)
7094 for(p
=0; p
<cd
->np
; p
++) {
7095 cd
->ind
[p
].cell2at0
[zone
] = cgindex
[cg
];
7096 cg
+= cd
->ind
[p
].nrecv
[zone
];
7097 cd
->ind
[p
].cell2at1
[zone
] = cgindex
[cg
];
7102 static gmx_bool
missing_link(t_blocka
*link
,int cg_gl
,char *bLocalCG
)
7108 for(i
=link
->index
[cg_gl
]; i
<link
->index
[cg_gl
+1]; i
++)
7110 if (!bLocalCG
[link
->a
[i
]])
7119 static void setup_dd_communication(gmx_domdec_t
*dd
,
7120 matrix box
,gmx_ddbox_t
*ddbox
,t_forcerec
*fr
)
7122 int dim_ind
,dim
,dim0
,dim1
=-1,dim2
=-1,dimd
,p
,nat_tot
;
7123 int nzone
,nzone_send
,zone
,zonei
,cg0
,cg1
;
7124 int c
,i
,j
,cg
,cg_gl
,nrcg
;
7125 int *zone_cg_range
,pos_cg
,*index_gl
,*cgindex
,*recv_i
;
7126 gmx_domdec_comm_t
*comm
;
7127 gmx_domdec_zones_t
*zones
;
7128 gmx_domdec_comm_dim_t
*cd
;
7129 gmx_domdec_ind_t
*ind
;
7130 cginfo_mb_t
*cginfo_mb
;
7131 gmx_bool bBondComm
,bDist2B
,bDistMB
,bDistMB_pulse
,bDistBonded
,bScrew
;
7132 real r_mb
,r_comm2
,r_scomm2
,r_bcomm2
,r
,r_0
,r_1
,r2
,rb2
,r2inc
,inv_ncg
,tric_sh
;
7134 real corner
[DIM
][4],corner_round_0
=0,corner_round_1
[4];
7135 real bcorner
[DIM
],bcorner_round_1
=0;
7137 rvec
*cg_cm
,*normal
,*v_d
,*v_0
=NULL
,*v_1
=NULL
,*recv_vr
;
7138 real skew_fac2_d
,skew_fac_01
;
7144 fprintf(debug
,"Setting up DD communication\n");
7150 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
7152 dim
= dd
->dim
[dim_ind
];
7154 /* Check if we need to use triclinic distances */
7155 tric_dist
[dim_ind
] = 0;
7156 for(i
=0; i
<=dim_ind
; i
++)
7158 if (ddbox
->tric_dir
[dd
->dim
[i
]])
7160 tric_dist
[dim_ind
] = 1;
7165 bBondComm
= comm
->bBondComm
;
7167 /* Do we need to determine extra distances for multi-body bondeds? */
7168 bDistMB
= (comm
->bInterCGMultiBody
&& dd
->bGridJump
&& dd
->ndim
> 1);
7170 /* Do we need to determine extra distances for only two-body bondeds? */
7171 bDist2B
= (bBondComm
&& !bDistMB
);
7173 r_comm2
= sqr(comm
->cutoff
);
7174 r_bcomm2
= sqr(comm
->cutoff_mbody
);
7178 fprintf(debug
,"bBondComm %d, r_bc %f\n",bBondComm
,sqrt(r_bcomm2
));
7181 zones
= &comm
->zones
;
7184 /* The first dimension is equal for all cells */
7185 corner
[0][0] = comm
->cell_x0
[dim0
];
7188 bcorner
[0] = corner
[0][0];
7193 /* This cell row is only seen from the first row */
7194 corner
[1][0] = comm
->cell_x0
[dim1
];
7195 /* All rows can see this row */
7196 corner
[1][1] = comm
->cell_x0
[dim1
];
7199 corner
[1][1] = max(comm
->cell_x0
[dim1
],comm
->zone_d1
[1].mch0
);
7202 /* For the multi-body distance we need the maximum */
7203 bcorner
[1] = max(comm
->cell_x0
[dim1
],comm
->zone_d1
[1].p1_0
);
7206 /* Set the upper-right corner for rounding */
7207 corner_round_0
= comm
->cell_x1
[dim0
];
7214 corner
[2][j
] = comm
->cell_x0
[dim2
];
7218 /* Use the maximum of the i-cells that see a j-cell */
7219 for(i
=0; i
<zones
->nizone
; i
++)
7221 for(j
=zones
->izone
[i
].j0
; j
<zones
->izone
[i
].j1
; j
++)
7227 comm
->zone_d2
[zones
->shift
[i
][dim0
]][zones
->shift
[i
][dim1
]].mch0
);
7233 /* For the multi-body distance we need the maximum */
7234 bcorner
[2] = comm
->cell_x0
[dim2
];
7239 bcorner
[2] = max(bcorner
[2],
7240 comm
->zone_d2
[i
][j
].p1_0
);
7246 /* Set the upper-right corner for rounding */
7247 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7248 * Only cell (0,0,0) can see cell 7 (1,1,1)
7250 corner_round_1
[0] = comm
->cell_x1
[dim1
];
7251 corner_round_1
[3] = comm
->cell_x1
[dim1
];
7254 corner_round_1
[0] = max(comm
->cell_x1
[dim1
],
7255 comm
->zone_d1
[1].mch1
);
7258 /* For the multi-body distance we need the maximum */
7259 bcorner_round_1
= max(comm
->cell_x1
[dim1
],
7260 comm
->zone_d1
[1].p1_1
);
7266 /* Triclinic stuff */
7267 normal
= ddbox
->normal
;
7271 v_0
= ddbox
->v
[dim0
];
7272 if (ddbox
->tric_dir
[dim0
] && ddbox
->tric_dir
[dim1
])
7274 /* Determine the coupling coefficient for the distances
7275 * to the cell planes along dim0 and dim1 through dim2.
7276 * This is required for correct rounding.
7279 ddbox
->v
[dim0
][dim1
+1][dim0
]*ddbox
->v
[dim1
][dim1
+1][dim1
];
7282 fprintf(debug
,"\nskew_fac_01 %f\n",skew_fac_01
);
7288 v_1
= ddbox
->v
[dim1
];
7291 zone_cg_range
= zones
->cg_range
;
7292 index_gl
= dd
->index_gl
;
7293 cgindex
= dd
->cgindex
;
7294 cginfo_mb
= fr
->cginfo_mb
;
7296 zone_cg_range
[0] = 0;
7297 zone_cg_range
[1] = dd
->ncg_home
;
7298 comm
->zone_ncg1
[0] = dd
->ncg_home
;
7299 pos_cg
= dd
->ncg_home
;
7301 nat_tot
= dd
->nat_home
;
7303 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
7305 dim
= dd
->dim
[dim_ind
];
7306 cd
= &comm
->cd
[dim_ind
];
7308 if (dim
>= ddbox
->npbcdim
&& dd
->ci
[dim
] == 0)
7310 /* No pbc in this dimension, the first node should not comm. */
7318 bScrew
= (dd
->bScrewPBC
&& dim
== XX
);
7320 v_d
= ddbox
->v
[dim
];
7321 skew_fac2_d
= sqr(ddbox
->skew_fac
[dim
]);
7323 cd
->bInPlace
= TRUE
;
7324 for(p
=0; p
<cd
->np
; p
++)
7326 /* Only atoms communicated in the first pulse are used
7327 * for multi-body bonded interactions or for bBondComm.
7329 bDistBonded
= ((bDistMB
|| bDist2B
) && p
== 0);
7330 bDistMB_pulse
= (bDistMB
&& bDistBonded
);
7335 for(zone
=0; zone
<nzone_send
; zone
++)
7337 if (tric_dist
[dim_ind
] && dim_ind
> 0)
7339 /* Determine slightly more optimized skew_fac's
7341 * This reduces the number of communicated atoms
7342 * by about 10% for 3D DD of rhombic dodecahedra.
7344 for(dimd
=0; dimd
<dim
; dimd
++)
7346 sf2_round
[dimd
] = 1;
7347 if (ddbox
->tric_dir
[dimd
])
7349 for(i
=dd
->dim
[dimd
]+1; i
<DIM
; i
++)
7351 /* If we are shifted in dimension i
7352 * and the cell plane is tilted forward
7353 * in dimension i, skip this coupling.
7355 if (!(zones
->shift
[nzone
+zone
][i
] &&
7356 ddbox
->v
[dimd
][i
][dimd
] >= 0))
7359 sqr(ddbox
->v
[dimd
][i
][dimd
]);
7362 sf2_round
[dimd
] = 1/sf2_round
[dimd
];
7367 zonei
= zone_perm
[dim_ind
][zone
];
7370 /* Here we permutate the zones to obtain a convenient order
7371 * for neighbor searching
7373 cg0
= zone_cg_range
[zonei
];
7374 cg1
= zone_cg_range
[zonei
+1];
7378 /* Look only at the cg's received in the previous grid pulse
7380 cg1
= zone_cg_range
[nzone
+zone
+1];
7381 cg0
= cg1
- cd
->ind
[p
-1].nrecv
[zone
];
7383 ind
->nsend
[zone
] = 0;
7384 for(cg
=cg0
; cg
<cg1
; cg
++)
7388 if (tric_dist
[dim_ind
] == 0)
7390 /* Rectangular direction, easy */
7391 r
= cg_cm
[cg
][dim
] - corner
[dim_ind
][zone
];
7398 r
= cg_cm
[cg
][dim
] - bcorner
[dim_ind
];
7404 /* Rounding gives at most a 16% reduction
7405 * in communicated atoms
7407 if (dim_ind
>= 1 && (zonei
== 1 || zonei
== 2))
7409 r
= cg_cm
[cg
][dim0
] - corner_round_0
;
7410 /* This is the first dimension, so always r >= 0 */
7417 if (dim_ind
== 2 && (zonei
== 2 || zonei
== 3))
7419 r
= cg_cm
[cg
][dim1
] - corner_round_1
[zone
];
7426 r
= cg_cm
[cg
][dim1
] - bcorner_round_1
;
7436 /* Triclinic direction, more complicated */
7439 /* Rounding, conservative as the skew_fac multiplication
7440 * will slightly underestimate the distance.
7442 if (dim_ind
>= 1 && (zonei
== 1 || zonei
== 2))
7444 rn
[dim0
] = cg_cm
[cg
][dim0
] - corner_round_0
;
7445 for(i
=dim0
+1; i
<DIM
; i
++)
7447 rn
[dim0
] -= cg_cm
[cg
][i
]*v_0
[i
][dim0
];
7449 r2
= rn
[dim0
]*rn
[dim0
]*sf2_round
[dim0
];
7452 rb
[dim0
] = rn
[dim0
];
7455 /* Take care that the cell planes along dim0 might not
7456 * be orthogonal to those along dim1 and dim2.
7458 for(i
=1; i
<=dim_ind
; i
++)
7461 if (normal
[dim0
][dimd
] > 0)
7463 rn
[dimd
] -= rn
[dim0
]*normal
[dim0
][dimd
];
7466 rb
[dimd
] -= rb
[dim0
]*normal
[dim0
][dimd
];
7471 if (dim_ind
== 2 && (zonei
== 2 || zonei
== 3))
7473 rn
[dim1
] += cg_cm
[cg
][dim1
] - corner_round_1
[zone
];
7475 for(i
=dim1
+1; i
<DIM
; i
++)
7477 tric_sh
-= cg_cm
[cg
][i
]*v_1
[i
][dim1
];
7479 rn
[dim1
] += tric_sh
;
7482 r2
+= rn
[dim1
]*rn
[dim1
]*sf2_round
[dim1
];
7483 /* Take care of coupling of the distances
7484 * to the planes along dim0 and dim1 through dim2.
7486 r2
-= rn
[dim0
]*rn
[dim1
]*skew_fac_01
;
7487 /* Take care that the cell planes along dim1
7488 * might not be orthogonal to that along dim2.
7490 if (normal
[dim1
][dim2
] > 0)
7492 rn
[dim2
] -= rn
[dim1
]*normal
[dim1
][dim2
];
7498 cg_cm
[cg
][dim1
] - bcorner_round_1
+ tric_sh
;
7501 rb2
+= rb
[dim1
]*rb
[dim1
]*sf2_round
[dim1
];
7502 /* Take care of coupling of the distances
7503 * to the planes along dim0 and dim1 through dim2.
7505 rb2
-= rb
[dim0
]*rb
[dim1
]*skew_fac_01
;
7506 /* Take care that the cell planes along dim1
7507 * might not be orthogonal to that along dim2.
7509 if (normal
[dim1
][dim2
] > 0)
7511 rb
[dim2
] -= rb
[dim1
]*normal
[dim1
][dim2
];
7516 /* The distance along the communication direction */
7517 rn
[dim
] += cg_cm
[cg
][dim
] - corner
[dim_ind
][zone
];
7519 for(i
=dim
+1; i
<DIM
; i
++)
7521 tric_sh
-= cg_cm
[cg
][i
]*v_d
[i
][dim
];
7526 r2
+= rn
[dim
]*rn
[dim
]*skew_fac2_d
;
7527 /* Take care of coupling of the distances
7528 * to the planes along dim0 and dim1 through dim2.
7530 if (dim_ind
== 1 && zonei
== 1)
7532 r2
-= rn
[dim0
]*rn
[dim
]*skew_fac_01
;
7538 rb
[dim
] += cg_cm
[cg
][dim
] - bcorner
[dim_ind
] + tric_sh
;
7541 rb2
+= rb
[dim
]*rb
[dim
]*skew_fac2_d
;
7542 /* Take care of coupling of the distances
7543 * to the planes along dim0 and dim1 through dim2.
7545 if (dim_ind
== 1 && zonei
== 1)
7547 rb2
-= rb
[dim0
]*rb
[dim
]*skew_fac_01
;
7555 ((bDistMB
&& rb2
< r_bcomm2
) ||
7556 (bDist2B
&& r2
< r_bcomm2
)) &&
7558 (GET_CGINFO_BOND_INTER(fr
->cginfo
[cg
]) &&
7559 missing_link(comm
->cglink
,index_gl
[cg
],
7562 /* Make an index to the local charge groups */
7563 if (nsend
+1 > ind
->nalloc
)
7565 ind
->nalloc
= over_alloc_large(nsend
+1);
7566 srenew(ind
->index
,ind
->nalloc
);
7568 if (nsend
+1 > comm
->nalloc_int
)
7570 comm
->nalloc_int
= over_alloc_large(nsend
+1);
7571 srenew(comm
->buf_int
,comm
->nalloc_int
);
7573 ind
->index
[nsend
] = cg
;
7574 comm
->buf_int
[nsend
] = index_gl
[cg
];
7576 vec_rvec_check_alloc(&comm
->vbuf
,nsend
+1);
7578 if (dd
->ci
[dim
] == 0)
7580 /* Correct cg_cm for pbc */
7581 rvec_add(cg_cm
[cg
],box
[dim
],comm
->vbuf
.v
[nsend
]);
7584 comm
->vbuf
.v
[nsend
][YY
] =
7585 box
[YY
][YY
]-comm
->vbuf
.v
[nsend
][YY
];
7586 comm
->vbuf
.v
[nsend
][ZZ
] =
7587 box
[ZZ
][ZZ
]-comm
->vbuf
.v
[nsend
][ZZ
];
7592 copy_rvec(cg_cm
[cg
],comm
->vbuf
.v
[nsend
]);
7595 nat
+= cgindex
[cg
+1] - cgindex
[cg
];
7599 /* Clear the counts in case we do not have pbc */
7600 for(zone
=nzone_send
; zone
<nzone
; zone
++)
7602 ind
->nsend
[zone
] = 0;
7604 ind
->nsend
[nzone
] = nsend
;
7605 ind
->nsend
[nzone
+1] = nat
;
7606 /* Communicate the number of cg's and atoms to receive */
7607 dd_sendrecv_int(dd
, dim_ind
, dddirBackward
,
7608 ind
->nsend
, nzone
+2,
7609 ind
->nrecv
, nzone
+2);
7611 /* The rvec buffer is also required for atom buffers of size nsend
7612 * in dd_move_x and dd_move_f.
7614 vec_rvec_check_alloc(&comm
->vbuf
,ind
->nsend
[nzone
+1]);
7618 /* We can receive in place if only the last zone is not empty */
7619 for(zone
=0; zone
<nzone
-1; zone
++)
7621 if (ind
->nrecv
[zone
] > 0)
7623 cd
->bInPlace
= FALSE
;
7628 /* The int buffer is only required here for the cg indices */
7629 if (ind
->nrecv
[nzone
] > comm
->nalloc_int2
)
7631 comm
->nalloc_int2
= over_alloc_dd(ind
->nrecv
[nzone
]);
7632 srenew(comm
->buf_int2
,comm
->nalloc_int2
);
7634 /* The rvec buffer is also required for atom buffers
7635 * of size nrecv in dd_move_x and dd_move_f.
7637 i
= max(cd
->ind
[0].nrecv
[nzone
+1],ind
->nrecv
[nzone
+1]);
7638 vec_rvec_check_alloc(&comm
->vbuf2
,i
);
7642 /* Make space for the global cg indices */
7643 if (pos_cg
+ ind
->nrecv
[nzone
] > dd
->cg_nalloc
7644 || dd
->cg_nalloc
== 0)
7646 dd
->cg_nalloc
= over_alloc_dd(pos_cg
+ ind
->nrecv
[nzone
]);
7647 srenew(index_gl
,dd
->cg_nalloc
);
7648 srenew(cgindex
,dd
->cg_nalloc
+1);
7650 /* Communicate the global cg indices */
7653 recv_i
= index_gl
+ pos_cg
;
7657 recv_i
= comm
->buf_int2
;
7659 dd_sendrecv_int(dd
, dim_ind
, dddirBackward
,
7660 comm
->buf_int
, nsend
,
7661 recv_i
, ind
->nrecv
[nzone
]);
7663 /* Make space for cg_cm */
7664 if (pos_cg
+ ind
->nrecv
[nzone
] > fr
->cg_nalloc
)
7666 dd_realloc_fr_cg(fr
,pos_cg
+ ind
->nrecv
[nzone
]);
7669 /* Communicate cg_cm */
7672 recv_vr
= cg_cm
+ pos_cg
;
7676 recv_vr
= comm
->vbuf2
.v
;
7678 dd_sendrecv_rvec(dd
, dim_ind
, dddirBackward
,
7679 comm
->vbuf
.v
, nsend
,
7680 recv_vr
, ind
->nrecv
[nzone
]);
7682 /* Make the charge group index */
7685 zone
= (p
== 0 ? 0 : nzone
- 1);
7686 while (zone
< nzone
)
7688 for(cg
=0; cg
<ind
->nrecv
[zone
]; cg
++)
7690 cg_gl
= index_gl
[pos_cg
];
7691 fr
->cginfo
[pos_cg
] = ddcginfo(cginfo_mb
,cg_gl
);
7692 nrcg
= GET_CGINFO_NATOMS(fr
->cginfo
[pos_cg
]);
7693 cgindex
[pos_cg
+1] = cgindex
[pos_cg
] + nrcg
;
7696 /* Update the charge group presence,
7697 * so we can use it in the next pass of the loop.
7699 comm
->bLocalCG
[cg_gl
] = TRUE
;
7705 comm
->zone_ncg1
[nzone
+zone
] = ind
->nrecv
[zone
];
7708 zone_cg_range
[nzone
+zone
] = pos_cg
;
7713 /* This part of the code is never executed with bBondComm. */
7714 merge_cg_buffers(nzone
,cd
,p
,zone_cg_range
,
7715 index_gl
,recv_i
,cg_cm
,recv_vr
,
7716 cgindex
,fr
->cginfo_mb
,fr
->cginfo
);
7717 pos_cg
+= ind
->nrecv
[nzone
];
7719 nat_tot
+= ind
->nrecv
[nzone
+1];
7723 /* Store the atom block for easy copying of communication buffers */
7724 make_cell2at_index(cd
,nzone
,zone_cg_range
[nzone
],cgindex
);
7728 dd
->index_gl
= index_gl
;
7729 dd
->cgindex
= cgindex
;
7731 dd
->ncg_tot
= zone_cg_range
[zones
->n
];
7732 dd
->nat_tot
= nat_tot
;
7733 comm
->nat
[ddnatHOME
] = dd
->nat_home
;
7734 for(i
=ddnatZONE
; i
<ddnatNR
; i
++)
7736 comm
->nat
[i
] = dd
->nat_tot
;
7741 /* We don't need to update cginfo, since that was alrady done above.
7742 * So we pass NULL for the forcerec.
7744 dd_set_cginfo(dd
->index_gl
,dd
->ncg_home
,dd
->ncg_tot
,
7745 NULL
,comm
->bLocalCG
);
7750 fprintf(debug
,"Finished setting up DD communication, zones:");
7751 for(c
=0; c
<zones
->n
; c
++)
7753 fprintf(debug
," %d",zones
->cg_range
[c
+1]-zones
->cg_range
[c
]);
7755 fprintf(debug
,"\n");
7759 static void set_cg_boundaries(gmx_domdec_zones_t
*zones
)
7763 for(c
=0; c
<zones
->nizone
; c
++)
7765 zones
->izone
[c
].cg1
= zones
->cg_range
[c
+1];
7766 zones
->izone
[c
].jcg0
= zones
->cg_range
[zones
->izone
[c
].j0
];
7767 zones
->izone
[c
].jcg1
= zones
->cg_range
[zones
->izone
[c
].j1
];
7771 static int comp_cgsort(const void *a
,const void *b
)
7775 gmx_cgsort_t
*cga
,*cgb
;
7776 cga
= (gmx_cgsort_t
*)a
;
7777 cgb
= (gmx_cgsort_t
*)b
;
7779 comp
= cga
->nsc
- cgb
->nsc
;
7782 comp
= cga
->ind_gl
- cgb
->ind_gl
;
7788 static void order_int_cg(int n
,gmx_cgsort_t
*sort
,
7793 /* Order the data */
7796 buf
[i
] = a
[sort
[i
].ind
];
7799 /* Copy back to the original array */
7806 static void order_vec_cg(int n
,gmx_cgsort_t
*sort
,
7811 /* Order the data */
7814 copy_rvec(v
[sort
[i
].ind
],buf
[i
]);
7817 /* Copy back to the original array */
7820 copy_rvec(buf
[i
],v
[i
]);
7824 static void order_vec_atom(int ncg
,int *cgindex
,gmx_cgsort_t
*sort
,
7827 int a
,atot
,cg
,cg0
,cg1
,i
;
7829 /* Order the data */
7831 for(cg
=0; cg
<ncg
; cg
++)
7833 cg0
= cgindex
[sort
[cg
].ind
];
7834 cg1
= cgindex
[sort
[cg
].ind
+1];
7835 for(i
=cg0
; i
<cg1
; i
++)
7837 copy_rvec(v
[i
],buf
[a
]);
7843 /* Copy back to the original array */
7844 for(a
=0; a
<atot
; a
++)
7846 copy_rvec(buf
[a
],v
[a
]);
7850 static void ordered_sort(int nsort2
,gmx_cgsort_t
*sort2
,
7851 int nsort_new
,gmx_cgsort_t
*sort_new
,
7852 gmx_cgsort_t
*sort1
)
7856 /* The new indices are not very ordered, so we qsort them */
7857 qsort_threadsafe(sort_new
,nsort_new
,sizeof(sort_new
[0]),comp_cgsort
);
7859 /* sort2 is already ordered, so now we can merge the two arrays */
7863 while(i2
< nsort2
|| i_new
< nsort_new
)
7867 sort1
[i1
++] = sort_new
[i_new
++];
7869 else if (i_new
== nsort_new
)
7871 sort1
[i1
++] = sort2
[i2
++];
7873 else if (sort2
[i2
].nsc
< sort_new
[i_new
].nsc
||
7874 (sort2
[i2
].nsc
== sort_new
[i_new
].nsc
&&
7875 sort2
[i2
].ind_gl
< sort_new
[i_new
].ind_gl
))
7877 sort1
[i1
++] = sort2
[i2
++];
7881 sort1
[i1
++] = sort_new
[i_new
++];
7886 static void dd_sort_state(gmx_domdec_t
*dd
,int ePBC
,
7887 rvec
*cgcm
,t_forcerec
*fr
,t_state
*state
,
7890 gmx_domdec_sort_t
*sort
;
7891 gmx_cgsort_t
*cgsort
,*sort_i
;
7892 int ncg_new
,nsort2
,nsort_new
,i
,cell_index
,*ibuf
,cgsize
;
7895 sort
= dd
->comm
->sort
;
7897 if (dd
->ncg_home
> sort
->sort_nalloc
)
7899 sort
->sort_nalloc
= over_alloc_dd(dd
->ncg_home
);
7900 srenew(sort
->sort1
,sort
->sort_nalloc
);
7901 srenew(sort
->sort2
,sort
->sort_nalloc
);
7904 if (ncg_home_old
>= 0)
7906 /* The charge groups that remained in the same ns grid cell
7907 * are completely ordered. So we can sort efficiently by sorting
7908 * the charge groups that did move into the stationary list.
7913 for(i
=0; i
<dd
->ncg_home
; i
++)
7915 /* Check if this cg did not move to another node */
7916 cell_index
= fr
->ns
.grid
->cell_index
[i
];
7917 if (cell_index
!= 4*fr
->ns
.grid
->ncells
)
7919 if (i
>= ncg_home_old
|| cell_index
!= sort
->sort1
[i
].nsc
)
7921 /* This cg is new on this node or moved ns grid cell */
7922 if (nsort_new
>= sort
->sort_new_nalloc
)
7924 sort
->sort_new_nalloc
= over_alloc_dd(nsort_new
+1);
7925 srenew(sort
->sort_new
,sort
->sort_new_nalloc
);
7927 sort_i
= &(sort
->sort_new
[nsort_new
++]);
7931 /* This cg did not move */
7932 sort_i
= &(sort
->sort2
[nsort2
++]);
7934 /* Sort on the ns grid cell indices
7935 * and the global topology index
7937 sort_i
->nsc
= cell_index
;
7938 sort_i
->ind_gl
= dd
->index_gl
[i
];
7945 fprintf(debug
,"ordered sort cgs: stationary %d moved %d\n",
7948 /* Sort efficiently */
7949 ordered_sort(nsort2
,sort
->sort2
,nsort_new
,sort
->sort_new
,sort
->sort1
);
7953 cgsort
= sort
->sort1
;
7955 for(i
=0; i
<dd
->ncg_home
; i
++)
7957 /* Sort on the ns grid cell indices
7958 * and the global topology index
7960 cgsort
[i
].nsc
= fr
->ns
.grid
->cell_index
[i
];
7961 cgsort
[i
].ind_gl
= dd
->index_gl
[i
];
7963 if (cgsort
[i
].nsc
!= 4*fr
->ns
.grid
->ncells
)
7970 fprintf(debug
,"qsort cgs: %d new home %d\n",dd
->ncg_home
,ncg_new
);
7972 /* Determine the order of the charge groups using qsort */
7973 qsort_threadsafe(cgsort
,dd
->ncg_home
,sizeof(cgsort
[0]),comp_cgsort
);
7975 cgsort
= sort
->sort1
;
7977 /* We alloc with the old size, since cgindex is still old */
7978 vec_rvec_check_alloc(&dd
->comm
->vbuf
,dd
->cgindex
[dd
->ncg_home
]);
7979 vbuf
= dd
->comm
->vbuf
.v
;
7981 /* Remove the charge groups which are no longer at home here */
7982 dd
->ncg_home
= ncg_new
;
7984 /* Reorder the state */
7985 for(i
=0; i
<estNR
; i
++)
7987 if (EST_DISTR(i
) && state
->flags
& (1<<i
))
7992 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->x
,vbuf
);
7995 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->v
,vbuf
);
7998 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->sd_X
,vbuf
);
8001 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->cg_p
,vbuf
);
8005 case estDISRE_INITF
:
8006 case estDISRE_RM3TAV
:
8007 case estORIRE_INITF
:
8009 /* No ordering required */
8012 gmx_incons("Unknown state entry encountered in dd_sort_state");
8018 order_vec_cg(dd
->ncg_home
,cgsort
,cgcm
,vbuf
);
8020 if (dd
->ncg_home
+1 > sort
->ibuf_nalloc
)
8022 sort
->ibuf_nalloc
= over_alloc_dd(dd
->ncg_home
+1);
8023 srenew(sort
->ibuf
,sort
->ibuf_nalloc
);
8026 /* Reorder the global cg index */
8027 order_int_cg(dd
->ncg_home
,cgsort
,dd
->index_gl
,ibuf
);
8028 /* Reorder the cginfo */
8029 order_int_cg(dd
->ncg_home
,cgsort
,fr
->cginfo
,ibuf
);
8030 /* Rebuild the local cg index */
8032 for(i
=0; i
<dd
->ncg_home
; i
++)
8034 cgsize
= dd
->cgindex
[cgsort
[i
].ind
+1] - dd
->cgindex
[cgsort
[i
].ind
];
8035 ibuf
[i
+1] = ibuf
[i
] + cgsize
;
8037 for(i
=0; i
<dd
->ncg_home
+1; i
++)
8039 dd
->cgindex
[i
] = ibuf
[i
];
8041 /* Set the home atom number */
8042 dd
->nat_home
= dd
->cgindex
[dd
->ncg_home
];
8044 /* Copy the sorted ns cell indices back to the ns grid struct */
8045 for(i
=0; i
<dd
->ncg_home
; i
++)
8047 fr
->ns
.grid
->cell_index
[i
] = cgsort
[i
].nsc
;
8049 fr
->ns
.grid
->nr
= dd
->ncg_home
;
8052 static void add_dd_statistics(gmx_domdec_t
*dd
)
8054 gmx_domdec_comm_t
*comm
;
8059 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
8061 comm
->sum_nat
[ddnat
-ddnatZONE
] +=
8062 comm
->nat
[ddnat
] - comm
->nat
[ddnat
-1];
8067 void reset_dd_statistics_counters(gmx_domdec_t
*dd
)
8069 gmx_domdec_comm_t
*comm
;
8074 /* Reset all the statistics and counters for total run counting */
8075 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
8077 comm
->sum_nat
[ddnat
-ddnatZONE
] = 0;
8081 comm
->load_step
= 0;
8084 clear_ivec(comm
->load_lim
);
8089 void print_dd_statistics(t_commrec
*cr
,t_inputrec
*ir
,FILE *fplog
)
8091 gmx_domdec_comm_t
*comm
;
8095 comm
= cr
->dd
->comm
;
8097 gmx_sumd(ddnatNR
-ddnatZONE
,comm
->sum_nat
,cr
);
8104 fprintf(fplog
,"\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
8106 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
8108 av
= comm
->sum_nat
[ddnat
-ddnatZONE
]/comm
->ndecomp
;
8113 " av. #atoms communicated per step for force: %d x %.1f\n",
8117 if (cr
->dd
->vsite_comm
)
8120 " av. #atoms communicated per step for vsites: %d x %.1f\n",
8121 (EEL_PME(ir
->coulombtype
) || ir
->coulombtype
==eelEWALD
) ? 3 : 2,
8126 if (cr
->dd
->constraint_comm
)
8129 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
8130 1 + ir
->nLincsIter
,av
);
8134 gmx_incons(" Unknown type for DD statistics");
8137 fprintf(fplog
,"\n");
8139 if (comm
->bRecordLoad
&& EI_DYNAMICS(ir
->eI
))
8141 print_dd_load_av(fplog
,cr
->dd
);
8145 void dd_partition_system(FILE *fplog
,
8146 gmx_large_int_t step
,
8148 gmx_bool bMasterState
,
8150 t_state
*state_global
,
8151 gmx_mtop_t
*top_global
,
8153 t_state
*state_local
,
8156 gmx_localtop_t
*top_local
,
8159 gmx_shellfc_t shellfc
,
8160 gmx_constr_t constr
,
8162 gmx_wallcycle_t wcycle
,
8166 gmx_domdec_comm_t
*comm
;
8167 gmx_ddbox_t ddbox
={0};
8169 gmx_large_int_t step_pcoupl
;
8170 rvec cell_ns_x0
,cell_ns_x1
;
8171 int i
,j
,n
,cg0
=0,ncg_home_old
=-1,nat_f_novirsum
;
8172 gmx_bool bBoxChanged
,bNStGlobalComm
,bDoDLB
,bCheckDLB
,bTurnOnDLB
,bLogLoad
;
8173 gmx_bool bRedist
,bSortCG
,bResortAll
;
8181 bBoxChanged
= (bMasterState
|| DEFORM(*ir
));
8182 if (ir
->epc
!= epcNO
)
8184 /* With nstcalcenery > 1 pressure coupling happens.
8185 * one step after calculating the energies.
8186 * Box scaling happens at the end of the MD step,
8187 * after the DD partitioning.
8188 * We therefore have to do DLB in the first partitioning
8189 * after an MD step where P-coupling occured.
8190 * We need to determine the last step in which p-coupling occurred.
8191 * MRS -- need to validate this for vv?
8193 n
= ir
->nstcalcenergy
;
8196 step_pcoupl
= step
- 1;
8200 step_pcoupl
= ((step
- 1)/n
)*n
+ 1;
8202 if (step_pcoupl
>= comm
->partition_step
)
8208 bNStGlobalComm
= (step
>= comm
->partition_step
+ nstglobalcomm
);
8210 if (!comm
->bDynLoadBal
)
8216 /* Should we do dynamic load balacing this step?
8217 * Since it requires (possibly expensive) global communication,
8218 * we might want to do DLB less frequently.
8220 if (bBoxChanged
|| ir
->epc
!= epcNO
)
8222 bDoDLB
= bBoxChanged
;
8226 bDoDLB
= bNStGlobalComm
;
8230 /* Check if we have recorded loads on the nodes */
8231 if (comm
->bRecordLoad
&& dd_load_count(comm
))
8233 if (comm
->eDLB
== edlbAUTO
&& !comm
->bDynLoadBal
)
8235 /* Check if we should use DLB at the second partitioning
8236 * and every 100 partitionings,
8237 * so the extra communication cost is negligible.
8239 n
= max(100,nstglobalcomm
);
8240 bCheckDLB
= (comm
->n_load_collect
== 0 ||
8241 comm
->n_load_have
% n
== n
-1);
8248 /* Print load every nstlog, first and last step to the log file */
8249 bLogLoad
= ((ir
->nstlog
> 0 && step
% ir
->nstlog
== 0) ||
8250 comm
->n_load_collect
== 0 ||
8251 (step
+ ir
->nstlist
> ir
->init_step
+ ir
->nsteps
));
8253 /* Avoid extra communication due to verbose screen output
8254 * when nstglobalcomm is set.
8256 if (bDoDLB
|| bLogLoad
|| bCheckDLB
||
8257 (bVerbose
&& (ir
->nstlist
== 0 || nstglobalcomm
<= ir
->nstlist
)))
8259 get_load_distribution(dd
,wcycle
);
8264 dd_print_load(fplog
,dd
,step
-1);
8268 dd_print_load_verbose(dd
);
8271 comm
->n_load_collect
++;
8274 /* Since the timings are node dependent, the master decides */
8278 (dd_force_imb_perf_loss(dd
) >= DD_PERF_LOSS
);
8281 fprintf(debug
,"step %s, imb loss %f\n",
8282 gmx_step_str(step
,sbuf
),
8283 dd_force_imb_perf_loss(dd
));
8286 dd_bcast(dd
,sizeof(bTurnOnDLB
),&bTurnOnDLB
);
8289 turn_on_dlb(fplog
,cr
,step
);
8294 comm
->n_load_have
++;
8297 cgs_gl
= &comm
->cgs_gl
;
8302 /* Clear the old state */
8303 clear_dd_indices(dd
,0,0);
8305 set_ddbox(dd
,bMasterState
,cr
,ir
,state_global
->box
,
8306 TRUE
,cgs_gl
,state_global
->x
,&ddbox
);
8308 get_cg_distribution(fplog
,step
,dd
,cgs_gl
,
8309 state_global
->box
,&ddbox
,state_global
->x
);
8311 dd_distribute_state(dd
,cgs_gl
,
8312 state_global
,state_local
,f
);
8314 dd_make_local_cgs(dd
,&top_local
->cgs
);
8316 if (dd
->ncg_home
> fr
->cg_nalloc
)
8318 dd_realloc_fr_cg(fr
,dd
->ncg_home
);
8320 calc_cgcm(fplog
,0,dd
->ncg_home
,
8321 &top_local
->cgs
,state_local
->x
,fr
->cg_cm
);
8323 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
8325 dd_set_cginfo(dd
->index_gl
,0,dd
->ncg_home
,fr
,comm
->bLocalCG
);
8329 else if (state_local
->ddp_count
!= dd
->ddp_count
)
8331 if (state_local
->ddp_count
> dd
->ddp_count
)
8333 gmx_fatal(FARGS
,"Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)",state_local
->ddp_count
,dd
->ddp_count
);
8336 if (state_local
->ddp_count_cg_gl
!= state_local
->ddp_count
)
8338 gmx_fatal(FARGS
,"Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)",state_local
->ddp_count_cg_gl
,state_local
->ddp_count
);
8341 /* Clear the old state */
8342 clear_dd_indices(dd
,0,0);
8344 /* Build the new indices */
8345 rebuild_cgindex(dd
,cgs_gl
->index
,state_local
);
8346 make_dd_indices(dd
,cgs_gl
->index
,0);
8348 /* Redetermine the cg COMs */
8349 calc_cgcm(fplog
,0,dd
->ncg_home
,
8350 &top_local
->cgs
,state_local
->x
,fr
->cg_cm
);
8352 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
8354 dd_set_cginfo(dd
->index_gl
,0,dd
->ncg_home
,fr
,comm
->bLocalCG
);
8356 set_ddbox(dd
,bMasterState
,cr
,ir
,state_local
->box
,
8357 TRUE
,&top_local
->cgs
,state_local
->x
,&ddbox
);
8359 bRedist
= comm
->bDynLoadBal
;
8363 /* We have the full state, only redistribute the cgs */
8365 /* Clear the non-home indices */
8366 clear_dd_indices(dd
,dd
->ncg_home
,dd
->nat_home
);
8368 /* Avoid global communication for dim's without pbc and -gcom */
8369 if (!bNStGlobalComm
)
8371 copy_rvec(comm
->box0
,ddbox
.box0
);
8372 copy_rvec(comm
->box_size
,ddbox
.box_size
);
8374 set_ddbox(dd
,bMasterState
,cr
,ir
,state_local
->box
,
8375 bNStGlobalComm
,&top_local
->cgs
,state_local
->x
,&ddbox
);
8380 /* For dim's without pbc and -gcom */
8381 copy_rvec(ddbox
.box0
,comm
->box0
);
8382 copy_rvec(ddbox
.box_size
,comm
->box_size
);
8384 set_dd_cell_sizes(dd
,&ddbox
,dynamic_dd_box(&ddbox
,ir
),bMasterState
,bDoDLB
,
8387 if (comm
->nstDDDumpGrid
> 0 && step
% comm
->nstDDDumpGrid
== 0)
8389 write_dd_grid_pdb("dd_grid",step
,dd
,state_local
->box
,&ddbox
);
8392 /* Check if we should sort the charge groups */
8393 if (comm
->nstSortCG
> 0)
8395 bSortCG
= (bMasterState
||
8396 (bRedist
&& (step
% comm
->nstSortCG
== 0)));
8403 ncg_home_old
= dd
->ncg_home
;
8407 cg0
= dd_redistribute_cg(fplog
,step
,dd
,ddbox
.tric_dir
,
8408 state_local
,f
,fr
,mdatoms
,
8412 get_nsgrid_boundaries(fr
->ns
.grid
,dd
,
8413 state_local
->box
,&ddbox
,&comm
->cell_x0
,&comm
->cell_x1
,
8414 dd
->ncg_home
,fr
->cg_cm
,
8415 cell_ns_x0
,cell_ns_x1
,&grid_density
);
8419 comm_dd_ns_cell_sizes(dd
,&ddbox
,cell_ns_x0
,cell_ns_x1
,step
);
8422 copy_ivec(fr
->ns
.grid
->n
,ncells_old
);
8423 grid_first(fplog
,fr
->ns
.grid
,dd
,&ddbox
,fr
->ePBC
,
8424 state_local
->box
,cell_ns_x0
,cell_ns_x1
,
8425 fr
->rlistlong
,grid_density
);
8426 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
8427 copy_ivec(ddbox
.tric_dir
,comm
->tric_dir
);
8431 /* Sort the state on charge group position.
8432 * This enables exact restarts from this step.
8433 * It also improves performance by about 15% with larger numbers
8434 * of atoms per node.
8437 /* Fill the ns grid with the home cell,
8438 * so we can sort with the indices.
8440 set_zones_ncg_home(dd
);
8441 fill_grid(fplog
,&comm
->zones
,fr
->ns
.grid
,dd
->ncg_home
,
8442 0,dd
->ncg_home
,fr
->cg_cm
);
8444 /* Check if we can user the old order and ns grid cell indices
8445 * of the charge groups to sort the charge groups efficiently.
8447 bResortAll
= (bMasterState
||
8448 fr
->ns
.grid
->n
[XX
] != ncells_old
[XX
] ||
8449 fr
->ns
.grid
->n
[YY
] != ncells_old
[YY
] ||
8450 fr
->ns
.grid
->n
[ZZ
] != ncells_old
[ZZ
]);
8454 fprintf(debug
,"Step %s, sorting the %d home charge groups\n",
8455 gmx_step_str(step
,sbuf
),dd
->ncg_home
);
8457 dd_sort_state(dd
,ir
->ePBC
,fr
->cg_cm
,fr
,state_local
,
8458 bResortAll
? -1 : ncg_home_old
);
8459 /* Rebuild all the indices */
8461 ga2la_clear(dd
->ga2la
);
8464 /* Setup up the communication and communicate the coordinates */
8465 setup_dd_communication(dd
,state_local
->box
,&ddbox
,fr
);
8467 /* Set the indices */
8468 make_dd_indices(dd
,cgs_gl
->index
,cg0
);
8470 /* Set the charge group boundaries for neighbor searching */
8471 set_cg_boundaries(&comm
->zones
);
8474 write_dd_pdb("dd_home",step,"dump",top_global,cr,
8475 -1,state_local->x,state_local->box);
8478 /* Extract a local topology from the global topology */
8479 for(i
=0; i
<dd
->ndim
; i
++)
8481 np
[dd
->dim
[i
]] = comm
->cd
[i
].np
;
8483 dd_make_local_top(fplog
,dd
,&comm
->zones
,dd
->npbcdim
,state_local
->box
,
8484 comm
->cellsize_min
,np
,
8485 fr
,vsite
,top_global
,top_local
);
8487 /* Set up the special atom communication */
8488 n
= comm
->nat
[ddnatZONE
];
8489 for(i
=ddnatZONE
+1; i
<ddnatNR
; i
++)
8494 if (vsite
&& vsite
->n_intercg_vsite
)
8496 n
= dd_make_local_vsites(dd
,n
,top_local
->idef
.il
);
8500 if (dd
->bInterCGcons
)
8502 /* Only for inter-cg constraints we need special code */
8503 n
= dd_make_local_constraints(dd
,n
,top_global
,
8504 constr
,ir
->nProjOrder
,
8505 &top_local
->idef
.il
[F_CONSTR
]);
8509 gmx_incons("Unknown special atom type setup");
8514 /* Make space for the extra coordinates for virtual site
8515 * or constraint communication.
8517 state_local
->natoms
= comm
->nat
[ddnatNR
-1];
8518 if (state_local
->natoms
> state_local
->nalloc
)
8520 dd_realloc_state(state_local
,f
,state_local
->natoms
);
8523 if (fr
->bF_NoVirSum
)
8525 if (vsite
&& vsite
->n_intercg_vsite
)
8527 nat_f_novirsum
= comm
->nat
[ddnatVSITE
];
8531 if (EEL_FULL(ir
->coulombtype
) && dd
->n_intercg_excl
> 0)
8533 nat_f_novirsum
= dd
->nat_tot
;
8537 nat_f_novirsum
= dd
->nat_home
;
8546 /* Set the number of atoms required for the force calculation.
8547 * Forces need to be constrained when using a twin-range setup
8548 * or with energy minimization. For simple simulations we could
8549 * avoid some allocation, zeroing and copying, but this is
8550 * probably not worth the complications ande checking.
8552 forcerec_set_ranges(fr
,dd
->ncg_home
,dd
->ncg_tot
,
8553 dd
->nat_tot
,comm
->nat
[ddnatCON
],nat_f_novirsum
);
8555 /* We make the all mdatoms up to nat_tot_con.
8556 * We could save some work by only setting invmass
8557 * between nat_tot and nat_tot_con.
8559 /* This call also sets the new number of home particles to dd->nat_home */
8560 atoms2md(top_global
,ir
,
8561 comm
->nat
[ddnatCON
],dd
->gatindex
,0,dd
->nat_home
,mdatoms
);
8563 /* Now we have the charges we can sort the FE interactions */
8564 dd_sort_local_top(dd
,mdatoms
,top_local
);
8568 /* Make the local shell stuff, currently no communication is done */
8569 make_local_shells(cr
,mdatoms
,shellfc
);
8572 if (ir
->implicit_solvent
)
8574 make_local_gb(cr
,fr
->born
,ir
->gb_algorithm
);
8577 if (!(cr
->duty
& DUTY_PME
))
8579 /* Send the charges to our PME only node */
8580 gmx_pme_send_q(cr
,mdatoms
->nChargePerturbed
,
8581 mdatoms
->chargeA
,mdatoms
->chargeB
,
8582 dd_pme_maxshift_x(dd
),dd_pme_maxshift_y(dd
));
8587 set_constraints(constr
,top_local
,ir
,mdatoms
,cr
);
8590 if (ir
->ePull
!= epullNO
)
8592 /* Update the local pull groups */
8593 dd_make_local_pull_groups(dd
,ir
->pull
,mdatoms
);
8598 /* Update the local rotation groups */
8599 dd_make_local_rotation_groups(dd
,ir
->rot
);
8603 add_dd_statistics(dd
);
8605 /* Make sure we only count the cycles for this DD partitioning */
8606 clear_dd_cycle_counts(dd
);
8608 /* Because the order of the atoms might have changed since
8609 * the last vsite construction, we need to communicate the constructing
8610 * atom coordinates again (for spreading the forces this MD step).
8612 dd_move_x_vsites(dd
,state_local
->box
,state_local
->x
);
8614 if (comm
->nstDDDump
> 0 && step
% comm
->nstDDDump
== 0)
8616 dd_move_x(dd
,state_local
->box
,state_local
->x
);
8617 write_dd_pdb("dd_dump",step
,"dump",top_global
,cr
,
8618 -1,state_local
->x
,state_local
->box
);
8621 /* Store the partitioning step */
8622 comm
->partition_step
= step
;
8624 /* Increase the DD partitioning counter */
8626 /* The state currently matches this DD partitioning count, store it */
8627 state_local
->ddp_count
= dd
->ddp_count
;
8630 /* The DD master node knows the complete cg distribution,
8631 * store the count so we can possibly skip the cg info communication.
8633 comm
->master_cg_ddp_count
= (bSortCG
? 0 : dd
->ddp_count
);
8636 if (comm
->DD_debug
> 0)
8638 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
8639 check_index_consistency(dd
,top_global
->natoms
,ncg_mtop(top_global
),
8640 "after partitioning");