1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of Gromacs Copyright (c) 1991-2008
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
32 #include "domdec_network.h"
35 #include "chargegroup.h"
44 #include "gmx_wallcycle.h"
48 #include "mtop_util.h"
50 #include "gmx_ga2la.h"
59 #define DDRANK(dd,rank) (rank)
60 #define DDMASTERRANK(dd) (dd->masterrank)
62 typedef struct gmx_domdec_master
64 /* The cell boundaries */
66 /* The global charge group division */
67 int *ncg
; /* Number of home charge groups for each node */
68 int *index
; /* Index of nnodes+1 into cg */
69 int *cg
; /* Global charge group index */
70 int *nat
; /* Number of home atoms for each node. */
71 int *ibuf
; /* Buffer for communication */
72 rvec
*vbuf
; /* Buffer for state scattering and gathering */
73 } gmx_domdec_master_t
;
77 /* The numbers of charge groups to send and receive for each cell
78 * that requires communication, the last entry contains the total
79 * number of atoms that needs to be communicated.
81 int nsend
[DD_MAXIZONE
+2];
82 int nrecv
[DD_MAXIZONE
+2];
83 /* The charge groups to send */
86 /* The atom range for non-in-place communication */
87 int cell2at0
[DD_MAXIZONE
];
88 int cell2at1
[DD_MAXIZONE
];
93 int np
; /* Number of grid pulses in this dimension */
94 int np_dlb
; /* For dlb, for use with edlbAUTO */
95 gmx_domdec_ind_t
*ind
; /* The indices to communicate, size np */
97 bool bInPlace
; /* Can we communicate in place? */
98 } gmx_domdec_comm_dim_t
;
102 bool *bCellMin
; /* Temp. var.: is this cell size at the limit */
103 real
*cell_f
; /* State var.: cell boundaries, box relative */
104 real
*old_cell_f
; /* Temp. var.: old cell size */
105 real
*cell_f_max0
; /* State var.: max lower boundary, incl neighbors */
106 real
*cell_f_min1
; /* State var.: min upper boundary, incl neighbors */
107 real
*bound_min
; /* Temp. var.: lower limit for cell boundary */
108 real
*bound_max
; /* Temp. var.: upper limit for cell boundary */
109 bool bLimited
; /* State var.: is DLB limited in this dim and row */
110 real
*buf_ncd
; /* Temp. var. */
113 #define DD_NLOAD_MAX 9
115 /* Here floats are accurate enough, since these variables
116 * only influence the load balancing, not the actual MD results.
140 gmx_cgsort_t
*sort1
,*sort2
;
142 gmx_cgsort_t
*sort_new
;
154 /* This enum determines the order of the coordinates.
155 * ddnatHOME and ddnatZONE should be first and second,
156 * the others can be ordered as wanted.
158 enum { ddnatHOME
, ddnatZONE
, ddnatVSITE
, ddnatCON
, ddnatNR
};
160 enum { edlbAUTO
, edlbNO
, edlbYES
, edlbNR
};
161 const char *edlb_names
[edlbNR
] = { "auto", "no", "yes" };
165 int dimind
; /* The dimension index */
166 int nslab
; /* The number of PME slabs in this dimension */
167 real
*slb_dim_f
; /* Cell sizes for determining the PME comm. with SLB */
168 int *pp_min
; /* The minimum pp node location, size nslab */
169 int *pp_max
; /* The maximum pp node location,size nslab */
170 int maxshift
; /* The maximum shift for coordinate redistribution in PME */
175 real min0
; /* The minimum bottom of this zone */
176 real max1
; /* The maximum top of this zone */
177 real mch0
; /* The maximum bottom communicaton height for this zone */
178 real mch1
; /* The maximum top communicaton height for this zone */
179 real p1_0
; /* The bottom value of the first cell in this zone */
180 real p1_1
; /* The top value of the first cell in this zone */
183 typedef struct gmx_domdec_comm
185 /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
186 * unless stated otherwise.
189 /* The number of decomposition dimensions for PME, 0: no PME */
191 /* The number of nodes doing PME (PP/PME or only PME) */
195 /* The communication setup including the PME only nodes */
196 bool bCartesianPP_PME
;
199 int *pmenodes
; /* size npmenodes */
200 int *ddindex2simnodeid
; /* size npmenodes, only with bCartesianPP
201 * but with bCartesianPP_PME */
202 gmx_ddpme_t ddpme
[2];
204 /* The DD particle-particle nodes only */
206 int *ddindex2ddnodeid
; /* size npmenode, only with bCartesianPP_PME */
208 /* The global charge groups */
211 /* Should we sort the cgs */
213 gmx_domdec_sort_t
*sort
;
215 /* Are there bonded and multi-body interactions between charge groups? */
216 bool bInterCGBondeds
;
217 bool bInterCGMultiBody
;
219 /* Data for the optional bonded interaction atom communication range */
226 /* Are we actually using DLB? */
229 /* Cell sizes for static load balancing, first index cartesian */
232 /* The width of the communicated boundaries */
235 /* The minimum cell size (including triclinic correction) */
237 /* For dlb, for use with edlbAUTO */
238 rvec cellsize_min_dlb
;
239 /* The lower limit for the DD cell size with DLB */
241 /* Effectively no NB cut-off limit with DLB for systems without PBC? */
244 /* tric_dir is only stored here because dd_get_ns_ranges needs it */
246 /* box0 and box_size are required with dim's without pbc and -gcom */
250 /* The cell boundaries */
254 /* The old location of the cell boundaries, to check cg displacements */
258 /* The communication setup and charge group boundaries for the zones */
259 gmx_domdec_zones_t zones
;
261 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
262 * cell boundaries of neighboring cells for dynamic load balancing.
264 gmx_ddzone_t zone_d1
[2];
265 gmx_ddzone_t zone_d2
[2][2];
267 /* The coordinate/force communication setup and indices */
268 gmx_domdec_comm_dim_t cd
[DIM
];
269 /* The maximum number of cells to communicate with in one dimension */
272 /* Which cg distribution is stored on the master node */
273 int master_cg_ddp_count
;
275 /* The number of cg's received from the direct neighbors */
276 int zone_ncg1
[DD_MAXZONE
];
278 /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
281 /* Communication buffer for general use */
285 /* Communication buffer for general use */
288 /* Communication buffers only used with multiple grid pulses */
293 /* Communication buffers for local redistribution */
295 int cggl_flag_nalloc
[DIM
*2];
297 int cgcm_state_nalloc
[DIM
*2];
299 /* Cell sizes for dynamic load balancing */
300 gmx_domdec_root_t
**root
;
304 real cell_f_max0
[DIM
];
305 real cell_f_min1
[DIM
];
307 /* Stuff for load communication */
309 gmx_domdec_load_t
*load
;
311 MPI_Comm
*mpi_comm_load
;
314 float cycl
[ddCyclNr
];
315 int cycl_n
[ddCyclNr
];
316 float cycl_max
[ddCyclNr
];
317 /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
321 /* Have often have did we have load measurements */
323 /* Have often have we collected the load measurements */
327 double sum_nat
[ddnatNR
-ddnatZONE
];
337 /* The last partition step */
338 gmx_large_int_t partition_step
;
346 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
349 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
350 #define DD_FLAG_NRCG 65535
351 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
352 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
354 /* Zone permutation required to obtain consecutive charge groups
355 * for neighbor searching.
357 static const int zone_perm
[3][4] = { {0,0,0,0},{1,0,0,0},{3,0,1,2} };
359 /* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
360 * components see only j zones with that component 0.
363 /* The DD zone order */
364 static const ivec dd_zo
[DD_MAXZONE
] =
365 {{0,0,0},{1,0,0},{1,1,0},{0,1,0},{0,1,1},{0,0,1},{1,0,1},{1,1,1}};
370 static const ivec dd_zp3
[dd_zp3n
] = {{0,0,8},{1,3,6},{2,5,6},{3,5,7}};
375 static const ivec dd_zp2
[dd_zp2n
] = {{0,0,4},{1,3,4}};
380 static const ivec dd_zp1
[dd_zp1n
] = {{0,0,2}};
382 /* Factors used to avoid problems due to rounding issues */
383 #define DD_CELL_MARGIN 1.0001
384 #define DD_CELL_MARGIN2 1.00005
385 /* Factor to account for pressure scaling during nstlist steps */
386 #define DD_PRES_SCALE_MARGIN 1.02
388 /* Allowed performance loss before we DLB or warn */
389 #define DD_PERF_LOSS 0.05
391 #define DD_CELL_F_SIZE(dd,di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
393 /* Use separate MPI send and receive commands
394 * when nnodes <= GMX_DD_NNODES_SENDRECV.
395 * This saves memory (and some copying for small nnodes).
396 * For high parallelization scatter and gather calls are used.
398 #define GMX_DD_NNODES_SENDRECV 4
402 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
404 static void index2xyz(ivec nc,int ind,ivec xyz)
406 xyz[XX] = ind % nc[XX];
407 xyz[YY] = (ind / nc[XX]) % nc[YY];
408 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
412 /* This order is required to minimize the coordinate communication in PME
413 * which uses decomposition in the x direction.
415 #define dd_index(n,i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
417 static void ddindex2xyz(ivec nc
,int ind
,ivec xyz
)
419 xyz
[XX
] = ind
/ (nc
[YY
]*nc
[ZZ
]);
420 xyz
[YY
] = (ind
/ nc
[ZZ
]) % nc
[YY
];
421 xyz
[ZZ
] = ind
% nc
[ZZ
];
424 static int ddcoord2ddnodeid(gmx_domdec_t
*dd
,ivec c
)
429 ddindex
= dd_index(dd
->nc
,c
);
430 if (dd
->comm
->bCartesianPP_PME
)
432 ddnodeid
= dd
->comm
->ddindex2ddnodeid
[ddindex
];
434 else if (dd
->comm
->bCartesianPP
)
437 MPI_Cart_rank(dd
->mpi_comm_all
,c
,&ddnodeid
);
448 static bool dynamic_dd_box(gmx_ddbox_t
*ddbox
,t_inputrec
*ir
)
450 return (ddbox
->nboundeddim
< DIM
|| DYNAMIC_BOX(*ir
));
453 int ddglatnr(gmx_domdec_t
*dd
,int i
)
463 if (i
>= dd
->comm
->nat
[ddnatNR
-1])
465 gmx_fatal(FARGS
,"glatnr called with %d, which is larger than the local number of atoms (%d)",i
,dd
->comm
->nat
[ddnatNR
-1]);
467 atnr
= dd
->gatindex
[i
] + 1;
473 t_block
*dd_charge_groups_global(gmx_domdec_t
*dd
)
475 return &dd
->comm
->cgs_gl
;
478 static void vec_rvec_init(vec_rvec_t
*v
)
484 static void vec_rvec_check_alloc(vec_rvec_t
*v
,int n
)
488 v
->nalloc
= over_alloc_dd(n
);
489 srenew(v
->v
,v
->nalloc
);
493 void dd_store_state(gmx_domdec_t
*dd
,t_state
*state
)
497 if (state
->ddp_count
!= dd
->ddp_count
)
499 gmx_incons("The state does not the domain decomposition state");
502 state
->ncg_gl
= dd
->ncg_home
;
503 if (state
->ncg_gl
> state
->cg_gl_nalloc
)
505 state
->cg_gl_nalloc
= over_alloc_dd(state
->ncg_gl
);
506 srenew(state
->cg_gl
,state
->cg_gl_nalloc
);
508 for(i
=0; i
<state
->ncg_gl
; i
++)
510 state
->cg_gl
[i
] = dd
->index_gl
[i
];
513 state
->ddp_count_cg_gl
= dd
->ddp_count
;
516 gmx_domdec_zones_t
*domdec_zones(gmx_domdec_t
*dd
)
518 return &dd
->comm
->zones
;
521 void dd_get_ns_ranges(gmx_domdec_t
*dd
,int icg
,
522 int *jcg0
,int *jcg1
,ivec shift0
,ivec shift1
)
524 gmx_domdec_zones_t
*zones
;
527 zones
= &dd
->comm
->zones
;
530 while (icg
>= zones
->izone
[izone
].cg1
)
539 else if (izone
< zones
->nizone
)
541 *jcg0
= zones
->izone
[izone
].jcg0
;
545 gmx_fatal(FARGS
,"DD icg %d out of range: izone (%d) >= nizone (%d)",
546 icg
,izone
,zones
->nizone
);
549 *jcg1
= zones
->izone
[izone
].jcg1
;
551 for(d
=0; d
<dd
->ndim
; d
++)
554 shift0
[dim
] = zones
->izone
[izone
].shift0
[dim
];
555 shift1
[dim
] = zones
->izone
[izone
].shift1
[dim
];
556 if (dd
->comm
->tric_dir
[dim
] || (dd
->bGridJump
&& d
> 0))
558 /* A conservative approach, this can be optimized */
565 int dd_natoms_vsite(gmx_domdec_t
*dd
)
567 return dd
->comm
->nat
[ddnatVSITE
];
570 void dd_get_constraint_range(gmx_domdec_t
*dd
,int *at_start
,int *at_end
)
572 *at_start
= dd
->comm
->nat
[ddnatCON
-1];
573 *at_end
= dd
->comm
->nat
[ddnatCON
];
576 void dd_move_x(gmx_domdec_t
*dd
,matrix box
,rvec x
[])
578 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
580 gmx_domdec_comm_t
*comm
;
581 gmx_domdec_comm_dim_t
*cd
;
582 gmx_domdec_ind_t
*ind
;
583 rvec shift
={0,0,0},*buf
,*rbuf
;
588 cgindex
= dd
->cgindex
;
593 nat_tot
= dd
->nat_home
;
594 for(d
=0; d
<dd
->ndim
; d
++)
596 bPBC
= (dd
->ci
[dd
->dim
[d
]] == 0);
597 bScrew
= (bPBC
&& dd
->bScrewPBC
&& dd
->dim
[d
] == XX
);
600 copy_rvec(box
[dd
->dim
[d
]],shift
);
603 for(p
=0; p
<cd
->np
; p
++)
610 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
612 at0
= cgindex
[index
[i
]];
613 at1
= cgindex
[index
[i
]+1];
614 for(j
=at0
; j
<at1
; j
++)
616 copy_rvec(x
[j
],buf
[n
]);
623 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
625 at0
= cgindex
[index
[i
]];
626 at1
= cgindex
[index
[i
]+1];
627 for(j
=at0
; j
<at1
; j
++)
629 /* We need to shift the coordinates */
630 rvec_add(x
[j
],shift
,buf
[n
]);
637 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
639 at0
= cgindex
[index
[i
]];
640 at1
= cgindex
[index
[i
]+1];
641 for(j
=at0
; j
<at1
; j
++)
644 buf
[n
][XX
] = x
[j
][XX
] + shift
[XX
];
646 * This operation requires a special shift force
647 * treatment, which is performed in calc_vir.
649 buf
[n
][YY
] = box
[YY
][YY
] - x
[j
][YY
];
650 buf
[n
][ZZ
] = box
[ZZ
][ZZ
] - x
[j
][ZZ
];
662 rbuf
= comm
->vbuf2
.v
;
664 /* Send and receive the coordinates */
665 dd_sendrecv_rvec(dd
, d
, dddirBackward
,
666 buf
, ind
->nsend
[nzone
+1],
667 rbuf
, ind
->nrecv
[nzone
+1]);
671 for(zone
=0; zone
<nzone
; zone
++)
673 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
675 copy_rvec(rbuf
[j
],x
[i
]);
680 nat_tot
+= ind
->nrecv
[nzone
+1];
686 void dd_move_f(gmx_domdec_t
*dd
,rvec f
[],rvec
*fshift
)
688 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
690 gmx_domdec_comm_t
*comm
;
691 gmx_domdec_comm_dim_t
*cd
;
692 gmx_domdec_ind_t
*ind
;
700 cgindex
= dd
->cgindex
;
705 nzone
= comm
->zones
.n
/2;
706 nat_tot
= dd
->nat_tot
;
707 for(d
=dd
->ndim
-1; d
>=0; d
--)
709 bPBC
= (dd
->ci
[dd
->dim
[d
]] == 0);
710 bScrew
= (bPBC
&& dd
->bScrewPBC
&& dd
->dim
[d
] == XX
);
711 if (fshift
== NULL
&& !bScrew
)
715 /* Determine which shift vector we need */
721 for(p
=cd
->np
-1; p
>=0; p
--) {
723 nat_tot
-= ind
->nrecv
[nzone
+1];
730 sbuf
= comm
->vbuf2
.v
;
732 for(zone
=0; zone
<nzone
; zone
++)
734 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
736 copy_rvec(f
[i
],sbuf
[j
]);
741 /* Communicate the forces */
742 dd_sendrecv_rvec(dd
, d
, dddirForward
,
743 sbuf
, ind
->nrecv
[nzone
+1],
744 buf
, ind
->nsend
[nzone
+1]);
746 /* Add the received forces */
750 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
752 at0
= cgindex
[index
[i
]];
753 at1
= cgindex
[index
[i
]+1];
754 for(j
=at0
; j
<at1
; j
++)
756 rvec_inc(f
[j
],buf
[n
]);
763 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
765 at0
= cgindex
[index
[i
]];
766 at1
= cgindex
[index
[i
]+1];
767 for(j
=at0
; j
<at1
; j
++)
769 rvec_inc(f
[j
],buf
[n
]);
770 /* Add this force to the shift force */
771 rvec_inc(fshift
[is
],buf
[n
]);
778 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
780 at0
= cgindex
[index
[i
]];
781 at1
= cgindex
[index
[i
]+1];
782 for(j
=at0
; j
<at1
; j
++)
784 /* Rotate the force */
785 f
[j
][XX
] += buf
[n
][XX
];
786 f
[j
][YY
] -= buf
[n
][YY
];
787 f
[j
][ZZ
] -= buf
[n
][ZZ
];
790 /* Add this force to the shift force */
791 rvec_inc(fshift
[is
],buf
[n
]);
802 void dd_atom_spread_real(gmx_domdec_t
*dd
,real v
[])
804 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
806 gmx_domdec_comm_t
*comm
;
807 gmx_domdec_comm_dim_t
*cd
;
808 gmx_domdec_ind_t
*ind
;
813 cgindex
= dd
->cgindex
;
815 buf
= &comm
->vbuf
.v
[0][0];
818 nat_tot
= dd
->nat_home
;
819 for(d
=0; d
<dd
->ndim
; d
++)
822 for(p
=0; p
<cd
->np
; p
++)
827 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
829 at0
= cgindex
[index
[i
]];
830 at1
= cgindex
[index
[i
]+1];
831 for(j
=at0
; j
<at1
; j
++)
844 rbuf
= &comm
->vbuf2
.v
[0][0];
846 /* Send and receive the coordinates */
847 dd_sendrecv_real(dd
, d
, dddirBackward
,
848 buf
, ind
->nsend
[nzone
+1],
849 rbuf
, ind
->nrecv
[nzone
+1]);
853 for(zone
=0; zone
<nzone
; zone
++)
855 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
862 nat_tot
+= ind
->nrecv
[nzone
+1];
868 void dd_atom_sum_real(gmx_domdec_t
*dd
,real v
[])
870 int nzone
,nat_tot
,n
,d
,p
,i
,j
,at0
,at1
,zone
;
872 gmx_domdec_comm_t
*comm
;
873 gmx_domdec_comm_dim_t
*cd
;
874 gmx_domdec_ind_t
*ind
;
879 cgindex
= dd
->cgindex
;
881 buf
= &comm
->vbuf
.v
[0][0];
884 nzone
= comm
->zones
.n
/2;
885 nat_tot
= dd
->nat_tot
;
886 for(d
=dd
->ndim
-1; d
>=0; d
--)
889 for(p
=cd
->np
-1; p
>=0; p
--) {
891 nat_tot
-= ind
->nrecv
[nzone
+1];
898 sbuf
= &comm
->vbuf2
.v
[0][0];
900 for(zone
=0; zone
<nzone
; zone
++)
902 for(i
=ind
->cell2at0
[zone
]; i
<ind
->cell2at1
[zone
]; i
++)
909 /* Communicate the forces */
910 dd_sendrecv_real(dd
, d
, dddirForward
,
911 sbuf
, ind
->nrecv
[nzone
+1],
912 buf
, ind
->nsend
[nzone
+1]);
914 /* Add the received forces */
916 for(i
=0; i
<ind
->nsend
[nzone
]; i
++)
918 at0
= cgindex
[index
[i
]];
919 at1
= cgindex
[index
[i
]+1];
920 for(j
=at0
; j
<at1
; j
++)
931 static void print_ddzone(FILE *fp
,int d
,int i
,int j
,gmx_ddzone_t
*zone
)
933 fprintf(fp
,"zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
935 zone
->min0
,zone
->max1
,
936 zone
->mch0
,zone
->mch0
,
937 zone
->p1_0
,zone
->p1_1
);
940 static void dd_sendrecv_ddzone(const gmx_domdec_t
*dd
,
941 int ddimind
,int direction
,
942 gmx_ddzone_t
*buf_s
,int n_s
,
943 gmx_ddzone_t
*buf_r
,int n_r
)
945 rvec vbuf_s
[5*2],vbuf_r
[5*2];
950 vbuf_s
[i
*2 ][0] = buf_s
[i
].min0
;
951 vbuf_s
[i
*2 ][1] = buf_s
[i
].max1
;
952 vbuf_s
[i
*2 ][2] = buf_s
[i
].mch0
;
953 vbuf_s
[i
*2+1][0] = buf_s
[i
].mch1
;
954 vbuf_s
[i
*2+1][1] = buf_s
[i
].p1_0
;
955 vbuf_s
[i
*2+1][2] = buf_s
[i
].p1_1
;
958 dd_sendrecv_rvec(dd
, ddimind
, direction
,
964 buf_r
[i
].min0
= vbuf_r
[i
*2 ][0];
965 buf_r
[i
].max1
= vbuf_r
[i
*2 ][1];
966 buf_r
[i
].mch0
= vbuf_r
[i
*2 ][2];
967 buf_r
[i
].mch1
= vbuf_r
[i
*2+1][0];
968 buf_r
[i
].p1_0
= vbuf_r
[i
*2+1][1];
969 buf_r
[i
].p1_1
= vbuf_r
[i
*2+1][2];
973 static void dd_move_cellx(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
,
974 rvec cell_ns_x0
,rvec cell_ns_x1
)
976 int d
,d1
,dim
,dim1
,pos
,buf_size
,i
,j
,k
,p
,npulse
,npulse_min
;
977 gmx_ddzone_t
*zp
,buf_s
[5],buf_r
[5],buf_e
[5];
978 rvec extr_s
[2],extr_r
[2];
981 gmx_domdec_comm_t
*comm
;
986 for(d
=1; d
<dd
->ndim
; d
++)
989 zp
= (d
== 1) ? &comm
->zone_d1
[0] : &comm
->zone_d2
[0][0];
990 zp
->min0
= cell_ns_x0
[dim
];
991 zp
->max1
= cell_ns_x1
[dim
];
992 zp
->mch0
= cell_ns_x0
[dim
];
993 zp
->mch1
= cell_ns_x1
[dim
];
994 zp
->p1_0
= cell_ns_x0
[dim
];
995 zp
->p1_1
= cell_ns_x1
[dim
];
998 for(d
=dd
->ndim
-2; d
>=0; d
--)
1001 bPBC
= (dim
< ddbox
->npbcdim
);
1003 /* Use an rvec to store two reals */
1004 extr_s
[d
][0] = comm
->cell_f0
[d
+1];
1005 extr_s
[d
][1] = comm
->cell_f1
[d
+1];
1009 /* Store the extremes in the backward sending buffer,
1010 * so the get updated separately from the forward communication.
1012 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1014 /* We invert the order to be able to use the same loop for buf_e */
1015 buf_s
[pos
].min0
= extr_s
[d1
][1];
1016 buf_s
[pos
].max1
= extr_s
[d1
][0];
1017 buf_s
[pos
].mch0
= 0;
1018 buf_s
[pos
].mch1
= 0;
1019 /* Store the cell corner of the dimension we communicate along */
1020 buf_s
[pos
].p1_0
= comm
->cell_x0
[dim
];
1021 buf_s
[pos
].p1_1
= 0;
1025 buf_s
[pos
] = (dd
->ndim
== 2) ? comm
->zone_d1
[0] : comm
->zone_d2
[0][0];
1028 if (dd
->ndim
== 3 && d
== 0)
1030 buf_s
[pos
] = comm
->zone_d2
[0][1];
1032 buf_s
[pos
] = comm
->zone_d1
[0];
1036 /* We only need to communicate the extremes
1037 * in the forward direction
1039 npulse
= comm
->cd
[d
].np
;
1042 /* Take the minimum to avoid double communication */
1043 npulse_min
= min(npulse
,dd
->nc
[dim
]-1-npulse
);
1047 /* Without PBC we should really not communicate over
1048 * the boundaries, but implementing that complicates
1049 * the communication setup and therefore we simply
1050 * do all communication, but ignore some data.
1052 npulse_min
= npulse
;
1054 for(p
=0; p
<npulse_min
; p
++)
1056 /* Communicate the extremes forward */
1057 bUse
= (bPBC
|| dd
->ci
[dim
] > 0);
1059 dd_sendrecv_rvec(dd
, d
, dddirForward
,
1060 extr_s
+d
, dd
->ndim
-d
-1,
1061 extr_r
+d
, dd
->ndim
-d
-1);
1065 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1067 extr_s
[d1
][0] = max(extr_s
[d1
][0],extr_r
[d1
][0]);
1068 extr_s
[d1
][1] = min(extr_s
[d1
][1],extr_r
[d1
][1]);
1074 for(p
=0; p
<npulse
; p
++)
1076 /* Communicate all the zone information backward */
1077 bUse
= (bPBC
|| dd
->ci
[dim
] < dd
->nc
[dim
] - 1);
1079 dd_sendrecv_ddzone(dd
, d
, dddirBackward
,
1086 for(d1
=d
+1; d1
<dd
->ndim
; d1
++)
1088 /* Determine the decrease of maximum required
1089 * communication height along d1 due to the distance along d,
1090 * this avoids a lot of useless atom communication.
1092 dist_d
= comm
->cell_x1
[dim
] - buf_r
[0].p1_0
;
1094 if (ddbox
->tric_dir
[dim
])
1096 /* c is the off-diagonal coupling between the cell planes
1097 * along directions d and d1.
1099 c
= ddbox
->v
[dim
][dd
->dim
[d1
]][dim
];
1105 det
= (1 + c
*c
)*comm
->cutoff
*comm
->cutoff
- dist_d
*dist_d
;
1108 dh
[d1
] = comm
->cutoff
- (c
*dist_d
+ sqrt(det
))/(1 + c
*c
);
1112 /* A negative value signals out of range */
1118 /* Accumulate the extremes over all pulses */
1119 for(i
=0; i
<buf_size
; i
++)
1123 buf_e
[i
] = buf_r
[i
];
1129 buf_e
[i
].min0
= min(buf_e
[i
].min0
,buf_r
[i
].min0
);
1130 buf_e
[i
].max1
= max(buf_e
[i
].max1
,buf_r
[i
].max1
);
1133 if (dd
->ndim
== 3 && d
== 0 && i
== buf_size
- 1)
1141 if (bUse
&& dh
[d1
] >= 0)
1143 buf_e
[i
].mch0
= max(buf_e
[i
].mch0
,buf_r
[i
].mch0
-dh
[d1
]);
1144 buf_e
[i
].mch1
= max(buf_e
[i
].mch1
,buf_r
[i
].mch1
-dh
[d1
]);
1147 /* Copy the received buffer to the send buffer,
1148 * to pass the data through with the next pulse.
1150 buf_s
[i
] = buf_r
[i
];
1152 if (((bPBC
|| dd
->ci
[dim
]+npulse
< dd
->nc
[dim
]) && p
== npulse
-1) ||
1153 (!bPBC
&& dd
->ci
[dim
]+1+p
== dd
->nc
[dim
]-1))
1155 /* Store the extremes */
1158 for(d1
=d
; d1
<dd
->ndim
-1; d1
++)
1160 extr_s
[d1
][1] = min(extr_s
[d1
][1],buf_e
[pos
].min0
);
1161 extr_s
[d1
][0] = max(extr_s
[d1
][0],buf_e
[pos
].max1
);
1165 if (d
== 1 || (d
== 0 && dd
->ndim
== 3))
1169 comm
->zone_d2
[1-d
][i
] = buf_e
[pos
];
1175 comm
->zone_d1
[1] = buf_e
[pos
];
1189 print_ddzone(debug
,1,i
,0,&comm
->zone_d1
[i
]);
1191 cell_ns_x0
[dim
] = min(cell_ns_x0
[dim
],comm
->zone_d1
[i
].min0
);
1192 cell_ns_x1
[dim
] = max(cell_ns_x1
[dim
],comm
->zone_d1
[i
].max1
);
1204 print_ddzone(debug
,2,i
,j
,&comm
->zone_d2
[i
][j
]);
1206 cell_ns_x0
[dim
] = min(cell_ns_x0
[dim
],comm
->zone_d2
[i
][j
].min0
);
1207 cell_ns_x1
[dim
] = max(cell_ns_x1
[dim
],comm
->zone_d2
[i
][j
].max1
);
1211 for(d
=1; d
<dd
->ndim
; d
++)
1213 comm
->cell_f_max0
[d
] = extr_s
[d
-1][0];
1214 comm
->cell_f_min1
[d
] = extr_s
[d
-1][1];
1217 fprintf(debug
,"Cell fraction d %d, max0 %f, min1 %f\n",
1218 d
,comm
->cell_f_max0
[d
],comm
->cell_f_min1
[d
]);
1223 static void dd_collect_cg(gmx_domdec_t
*dd
,
1224 t_state
*state_local
)
1226 gmx_domdec_master_t
*ma
=NULL
;
1227 int buf2
[2],*ibuf
,i
,ncg_home
=0,*cg
=NULL
,nat_home
=0;
1230 if (state_local
->ddp_count
== dd
->comm
->master_cg_ddp_count
)
1232 /* The master has the correct distribution */
1236 if (state_local
->ddp_count
== dd
->ddp_count
)
1238 ncg_home
= dd
->ncg_home
;
1240 nat_home
= dd
->nat_home
;
1242 else if (state_local
->ddp_count_cg_gl
== state_local
->ddp_count
)
1244 cgs_gl
= &dd
->comm
->cgs_gl
;
1246 ncg_home
= state_local
->ncg_gl
;
1247 cg
= state_local
->cg_gl
;
1249 for(i
=0; i
<ncg_home
; i
++)
1251 nat_home
+= cgs_gl
->index
[cg
[i
]+1] - cgs_gl
->index
[cg
[i
]];
1256 gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
1259 buf2
[0] = dd
->ncg_home
;
1260 buf2
[1] = dd
->nat_home
;
1270 /* Collect the charge group and atom counts on the master */
1271 dd_gather(dd
,2*sizeof(int),buf2
,ibuf
);
1276 for(i
=0; i
<dd
->nnodes
; i
++)
1278 ma
->ncg
[i
] = ma
->ibuf
[2*i
];
1279 ma
->nat
[i
] = ma
->ibuf
[2*i
+1];
1280 ma
->index
[i
+1] = ma
->index
[i
] + ma
->ncg
[i
];
1283 /* Make byte counts and indices */
1284 for(i
=0; i
<dd
->nnodes
; i
++)
1286 ma
->ibuf
[i
] = ma
->ncg
[i
]*sizeof(int);
1287 ma
->ibuf
[dd
->nnodes
+i
] = ma
->index
[i
]*sizeof(int);
1291 fprintf(debug
,"Initial charge group distribution: ");
1292 for(i
=0; i
<dd
->nnodes
; i
++)
1293 fprintf(debug
," %d",ma
->ncg
[i
]);
1294 fprintf(debug
,"\n");
1298 /* Collect the charge group indices on the master */
1300 dd
->ncg_home
*sizeof(int),dd
->index_gl
,
1301 DDMASTER(dd
) ? ma
->ibuf
: NULL
,
1302 DDMASTER(dd
) ? ma
->ibuf
+dd
->nnodes
: NULL
,
1303 DDMASTER(dd
) ? ma
->cg
: NULL
);
1305 dd
->comm
->master_cg_ddp_count
= state_local
->ddp_count
;
1308 static void dd_collect_vec_sendrecv(gmx_domdec_t
*dd
,
1311 gmx_domdec_master_t
*ma
;
1312 int n
,i
,c
,a
,nalloc
=0;
1321 MPI_Send(lv
,dd
->nat_home
*sizeof(rvec
),MPI_BYTE
,DDMASTERRANK(dd
),
1322 dd
->rank
,dd
->mpi_comm_all
);
1325 /* Copy the master coordinates to the global array */
1326 cgs_gl
= &dd
->comm
->cgs_gl
;
1328 n
= DDMASTERRANK(dd
);
1330 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1332 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1334 copy_rvec(lv
[a
++],v
[c
]);
1338 for(n
=0; n
<dd
->nnodes
; n
++)
1342 if (ma
->nat
[n
] > nalloc
)
1344 nalloc
= over_alloc_dd(ma
->nat
[n
]);
1348 MPI_Recv(buf
,ma
->nat
[n
]*sizeof(rvec
),MPI_BYTE
,DDRANK(dd
,n
),
1349 n
,dd
->mpi_comm_all
,MPI_STATUS_IGNORE
);
1352 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1354 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1356 copy_rvec(buf
[a
++],v
[c
]);
1365 static void get_commbuffer_counts(gmx_domdec_t
*dd
,
1366 int **counts
,int **disps
)
1368 gmx_domdec_master_t
*ma
;
1373 /* Make the rvec count and displacment arrays */
1375 *disps
= ma
->ibuf
+ dd
->nnodes
;
1376 for(n
=0; n
<dd
->nnodes
; n
++)
1378 (*counts
)[n
] = ma
->nat
[n
]*sizeof(rvec
);
1379 (*disps
)[n
] = (n
== 0 ? 0 : (*disps
)[n
-1] + (*counts
)[n
-1]);
1383 static void dd_collect_vec_gatherv(gmx_domdec_t
*dd
,
1386 gmx_domdec_master_t
*ma
;
1387 int *rcounts
=NULL
,*disps
=NULL
;
1396 get_commbuffer_counts(dd
,&rcounts
,&disps
);
1401 dd_gatherv(dd
,dd
->nat_home
*sizeof(rvec
),lv
,rcounts
,disps
,buf
);
1405 cgs_gl
= &dd
->comm
->cgs_gl
;
1408 for(n
=0; n
<dd
->nnodes
; n
++)
1410 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1412 for(c
=cgs_gl
->index
[ma
->cg
[i
]]; c
<cgs_gl
->index
[ma
->cg
[i
]+1]; c
++)
1414 copy_rvec(buf
[a
++],v
[c
]);
1421 void dd_collect_vec(gmx_domdec_t
*dd
,
1422 t_state
*state_local
,rvec
*lv
,rvec
*v
)
1424 gmx_domdec_master_t
*ma
;
1425 int n
,i
,c
,a
,nalloc
=0;
1428 dd_collect_cg(dd
,state_local
);
1430 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
1432 dd_collect_vec_sendrecv(dd
,lv
,v
);
1436 dd_collect_vec_gatherv(dd
,lv
,v
);
1441 void dd_collect_state(gmx_domdec_t
*dd
,
1442 t_state
*state_local
,t_state
*state
)
1446 nh
= state
->nhchainlength
;
1450 state
->lambda
= state_local
->lambda
;
1451 state
->veta
= state_local
->veta
;
1452 state
->vol0
= state_local
->vol0
;
1453 copy_mat(state_local
->box
,state
->box
);
1454 copy_mat(state_local
->boxv
,state
->boxv
);
1455 copy_mat(state_local
->svir_prev
,state
->svir_prev
);
1456 copy_mat(state_local
->fvir_prev
,state
->fvir_prev
);
1457 copy_mat(state_local
->pres_prev
,state
->pres_prev
);
1460 for(i
=0; i
<state_local
->ngtc
; i
++)
1462 for(j
=0; j
<nh
; j
++) {
1463 state
->nosehoover_xi
[i
*nh
+j
] = state_local
->nosehoover_xi
[i
*nh
+j
];
1464 state
->nosehoover_vxi
[i
*nh
+j
] = state_local
->nosehoover_vxi
[i
*nh
+j
];
1466 state
->therm_integral
[i
] = state_local
->therm_integral
[i
];
1468 for(i
=0; i
<state_local
->nnhpres
; i
++)
1470 for(j
=0; j
<nh
; j
++) {
1471 state
->nhpres_xi
[i
*nh
+j
] = state_local
->nhpres_xi
[i
*nh
+j
];
1472 state
->nhpres_vxi
[i
*nh
+j
] = state_local
->nhpres_vxi
[i
*nh
+j
];
1476 for(est
=0; est
<estNR
; est
++)
1478 if (EST_DISTR(est
) && state_local
->flags
& (1<<est
))
1482 dd_collect_vec(dd
,state_local
,state_local
->x
,state
->x
);
1485 dd_collect_vec(dd
,state_local
,state_local
->v
,state
->v
);
1488 dd_collect_vec(dd
,state_local
,state_local
->sd_X
,state
->sd_X
);
1491 dd_collect_vec(dd
,state_local
,state_local
->cg_p
,state
->cg_p
);
1494 if (state
->nrngi
== 1)
1498 for(i
=0; i
<state_local
->nrng
; i
++)
1500 state
->ld_rng
[i
] = state_local
->ld_rng
[i
];
1506 dd_gather(dd
,state_local
->nrng
*sizeof(state
->ld_rng
[0]),
1507 state_local
->ld_rng
,state
->ld_rng
);
1511 if (state
->nrngi
== 1)
1515 state
->ld_rngi
[0] = state_local
->ld_rngi
[0];
1520 dd_gather(dd
,sizeof(state
->ld_rngi
[0]),
1521 state_local
->ld_rngi
,state
->ld_rngi
);
1524 case estDISRE_INITF
:
1525 case estDISRE_RM3TAV
:
1526 case estORIRE_INITF
:
1530 gmx_incons("Unknown state entry encountered in dd_collect_state");
1536 static void dd_realloc_fr_cg(t_forcerec
*fr
,int nalloc
)
1540 fprintf(debug
,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr
->cg_nalloc
,nalloc
,over_alloc_dd(nalloc
));
1542 fr
->cg_nalloc
= over_alloc_dd(nalloc
);
1543 srenew(fr
->cg_cm
,fr
->cg_nalloc
);
1544 srenew(fr
->cginfo
,fr
->cg_nalloc
);
1547 static void dd_realloc_state(t_state
*state
,rvec
**f
,int nalloc
)
1553 fprintf(debug
,"Reallocating state: currently %d, required %d, allocating %d\n",state
->nalloc
,nalloc
,over_alloc_dd(nalloc
));
1556 state
->nalloc
= over_alloc_dd(nalloc
);
1558 for(est
=0; est
<estNR
; est
++)
1560 if (EST_DISTR(est
) && state
->flags
& (1<<est
))
1564 srenew(state
->x
,state
->nalloc
);
1567 srenew(state
->v
,state
->nalloc
);
1570 srenew(state
->sd_X
,state
->nalloc
);
1573 srenew(state
->cg_p
,state
->nalloc
);
1577 case estDISRE_INITF
:
1578 case estDISRE_RM3TAV
:
1579 case estORIRE_INITF
:
1581 /* No reallocation required */
1584 gmx_incons("Unknown state entry encountered in dd_realloc_state");
1591 srenew(*f
,state
->nalloc
);
1595 static void dd_distribute_vec_sendrecv(gmx_domdec_t
*dd
,t_block
*cgs
,
1598 gmx_domdec_master_t
*ma
;
1599 int n
,i
,c
,a
,nalloc
=0;
1606 for(n
=0; n
<dd
->nnodes
; n
++)
1610 if (ma
->nat
[n
] > nalloc
)
1612 nalloc
= over_alloc_dd(ma
->nat
[n
]);
1615 /* Use lv as a temporary buffer */
1617 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1619 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1621 copy_rvec(v
[c
],buf
[a
++]);
1624 if (a
!= ma
->nat
[n
])
1626 gmx_fatal(FARGS
,"Internal error a (%d) != nat (%d)",
1631 MPI_Send(buf
,ma
->nat
[n
]*sizeof(rvec
),MPI_BYTE
,
1632 DDRANK(dd
,n
),n
,dd
->mpi_comm_all
);
1637 n
= DDMASTERRANK(dd
);
1639 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1641 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1643 copy_rvec(v
[c
],lv
[a
++]);
1650 MPI_Recv(lv
,dd
->nat_home
*sizeof(rvec
),MPI_BYTE
,DDMASTERRANK(dd
),
1651 MPI_ANY_TAG
,dd
->mpi_comm_all
,MPI_STATUS_IGNORE
);
1656 static void dd_distribute_vec_scatterv(gmx_domdec_t
*dd
,t_block
*cgs
,
1659 gmx_domdec_master_t
*ma
;
1660 int *scounts
=NULL
,*disps
=NULL
;
1661 int n
,i
,c
,a
,nalloc
=0;
1668 get_commbuffer_counts(dd
,&scounts
,&disps
);
1672 for(n
=0; n
<dd
->nnodes
; n
++)
1674 for(i
=ma
->index
[n
]; i
<ma
->index
[n
+1]; i
++)
1676 for(c
=cgs
->index
[ma
->cg
[i
]]; c
<cgs
->index
[ma
->cg
[i
]+1]; c
++)
1678 copy_rvec(v
[c
],buf
[a
++]);
1684 dd_scatterv(dd
,scounts
,disps
,buf
,dd
->nat_home
*sizeof(rvec
),lv
);
1687 static void dd_distribute_vec(gmx_domdec_t
*dd
,t_block
*cgs
,rvec
*v
,rvec
*lv
)
1689 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
1691 dd_distribute_vec_sendrecv(dd
,cgs
,v
,lv
);
1695 dd_distribute_vec_scatterv(dd
,cgs
,v
,lv
);
1699 static void dd_distribute_state(gmx_domdec_t
*dd
,t_block
*cgs
,
1700 t_state
*state
,t_state
*state_local
,
1703 int i
,j
,ngtch
,ngtcp
,nh
;
1705 nh
= state
->nhchainlength
;
1709 state_local
->lambda
= state
->lambda
;
1710 state_local
->veta
= state
->veta
;
1711 state_local
->vol0
= state
->vol0
;
1712 copy_mat(state
->box
,state_local
->box
);
1713 copy_mat(state
->box_rel
,state_local
->box_rel
);
1714 copy_mat(state
->boxv
,state_local
->boxv
);
1715 copy_mat(state
->svir_prev
,state_local
->svir_prev
);
1716 copy_mat(state
->fvir_prev
,state_local
->fvir_prev
);
1717 for(i
=0; i
<state_local
->ngtc
; i
++)
1719 for(j
=0; j
<nh
; j
++) {
1720 state_local
->nosehoover_xi
[i
*nh
+j
] = state
->nosehoover_xi
[i
*nh
+j
];
1721 state_local
->nosehoover_vxi
[i
*nh
+j
] = state
->nosehoover_vxi
[i
*nh
+j
];
1723 state_local
->therm_integral
[i
] = state
->therm_integral
[i
];
1725 for(i
=0; i
<state_local
->nnhpres
; i
++)
1727 for(j
=0; j
<nh
; j
++) {
1728 state_local
->nhpres_xi
[i
*nh
+j
] = state
->nhpres_xi
[i
*nh
+j
];
1729 state_local
->nhpres_vxi
[i
*nh
+j
] = state
->nhpres_vxi
[i
*nh
+j
];
1733 dd_bcast(dd
,sizeof(real
),&state_local
->lambda
);
1734 dd_bcast(dd
,sizeof(real
),&state_local
->veta
);
1735 dd_bcast(dd
,sizeof(real
),&state_local
->vol0
);
1736 dd_bcast(dd
,sizeof(state_local
->box
),state_local
->box
);
1737 dd_bcast(dd
,sizeof(state_local
->box_rel
),state_local
->box_rel
);
1738 dd_bcast(dd
,sizeof(state_local
->boxv
),state_local
->boxv
);
1739 dd_bcast(dd
,sizeof(state_local
->svir_prev
),state_local
->svir_prev
);
1740 dd_bcast(dd
,sizeof(state_local
->fvir_prev
),state_local
->fvir_prev
);
1741 dd_bcast(dd
,((state_local
->ngtc
*nh
)*sizeof(double)),state_local
->nosehoover_xi
);
1742 dd_bcast(dd
,((state_local
->ngtc
*nh
)*sizeof(double)),state_local
->nosehoover_vxi
);
1743 dd_bcast(dd
,state_local
->ngtc
*sizeof(double),state_local
->therm_integral
);
1744 dd_bcast(dd
,((state_local
->nnhpres
*nh
)*sizeof(double)),state_local
->nhpres_xi
);
1745 dd_bcast(dd
,((state_local
->nnhpres
*nh
)*sizeof(double)),state_local
->nhpres_vxi
);
1747 if (dd
->nat_home
> state_local
->nalloc
)
1749 dd_realloc_state(state_local
,f
,dd
->nat_home
);
1751 for(i
=0; i
<estNR
; i
++)
1753 if (EST_DISTR(i
) && state_local
->flags
& (1<<i
))
1757 dd_distribute_vec(dd
,cgs
,state
->x
,state_local
->x
);
1760 dd_distribute_vec(dd
,cgs
,state
->v
,state_local
->v
);
1763 dd_distribute_vec(dd
,cgs
,state
->sd_X
,state_local
->sd_X
);
1766 dd_distribute_vec(dd
,cgs
,state
->cg_p
,state_local
->cg_p
);
1769 if (state
->nrngi
== 1)
1772 state_local
->nrng
*sizeof(state_local
->ld_rng
[0]),
1773 state
->ld_rng
,state_local
->ld_rng
);
1778 state_local
->nrng
*sizeof(state_local
->ld_rng
[0]),
1779 state
->ld_rng
,state_local
->ld_rng
);
1783 if (state
->nrngi
== 1)
1785 dd_bcastc(dd
,sizeof(state_local
->ld_rngi
[0]),
1786 state
->ld_rngi
,state_local
->ld_rngi
);
1790 dd_scatter(dd
,sizeof(state_local
->ld_rngi
[0]),
1791 state
->ld_rngi
,state_local
->ld_rngi
);
1794 case estDISRE_INITF
:
1795 case estDISRE_RM3TAV
:
1796 case estORIRE_INITF
:
1798 /* Not implemented yet */
1801 gmx_incons("Unknown state entry encountered in dd_distribute_state");
1807 static char dim2char(int dim
)
1813 case XX
: c
= 'X'; break;
1814 case YY
: c
= 'Y'; break;
1815 case ZZ
: c
= 'Z'; break;
1816 default: gmx_fatal(FARGS
,"Unknown dim %d",dim
);
1822 static void write_dd_grid_pdb(const char *fn
,gmx_large_int_t step
,
1823 gmx_domdec_t
*dd
,matrix box
,gmx_ddbox_t
*ddbox
)
1825 rvec grid_s
[2],*grid_r
=NULL
,cx
,r
;
1826 char fname
[STRLEN
],format
[STRLEN
],buf
[22];
1832 copy_rvec(dd
->comm
->cell_x0
,grid_s
[0]);
1833 copy_rvec(dd
->comm
->cell_x1
,grid_s
[1]);
1837 snew(grid_r
,2*dd
->nnodes
);
1840 dd_gather(dd
,2*sizeof(rvec
),grid_s
[0],DDMASTER(dd
) ? grid_r
[0] : NULL
);
1844 for(d
=0; d
<DIM
; d
++)
1846 for(i
=0; i
<DIM
; i
++)
1854 if (dd
->nc
[d
] > 1 && d
< ddbox
->npbcdim
)
1856 tric
[d
][i
] = box
[i
][d
]/box
[i
][i
];
1865 sprintf(fname
,"%s_%s.pdb",fn
,gmx_step_str(step
,buf
));
1866 sprintf(format
,"%s%s\n",pdbformat
,"%6.2f%6.2f");
1867 out
= gmx_fio_fopen(fname
,"w");
1868 gmx_write_pdb_box(out
,dd
->bScrewPBC
? epbcSCREW
: epbcXYZ
,box
);
1870 for(i
=0; i
<dd
->nnodes
; i
++)
1872 vol
= dd
->nnodes
/(box
[XX
][XX
]*box
[YY
][YY
]*box
[ZZ
][ZZ
]);
1873 for(d
=0; d
<DIM
; d
++)
1875 vol
*= grid_r
[i
*2+1][d
] - grid_r
[i
*2][d
];
1883 cx
[XX
] = grid_r
[i
*2+x
][XX
];
1884 cx
[YY
] = grid_r
[i
*2+y
][YY
];
1885 cx
[ZZ
] = grid_r
[i
*2+z
][ZZ
];
1887 fprintf(out
,format
,"ATOM",a
++,"CA","GLY",' ',1+i
,
1888 10*r
[XX
],10*r
[YY
],10*r
[ZZ
],1.0,vol
);
1892 for(d
=0; d
<DIM
; d
++)
1898 case 0: y
= 1 + i
*8 + 2*x
; break;
1899 case 1: y
= 1 + i
*8 + 2*x
- (x
% 2); break;
1900 case 2: y
= 1 + i
*8 + x
; break;
1902 fprintf(out
,"%6s%5d%5d\n","CONECT",y
,y
+(1<<d
));
1906 gmx_fio_fclose(out
);
1911 void write_dd_pdb(const char *fn
,gmx_large_int_t step
,const char *title
,
1912 gmx_mtop_t
*mtop
,t_commrec
*cr
,
1913 int natoms
,rvec x
[],matrix box
)
1915 char fname
[STRLEN
],format
[STRLEN
],format4
[STRLEN
],buf
[22];
1918 char *atomname
,*resname
;
1925 natoms
= dd
->comm
->nat
[ddnatVSITE
];
1928 sprintf(fname
,"%s_%s_n%d.pdb",fn
,gmx_step_str(step
,buf
),cr
->sim_nodeid
);
1930 sprintf(format
,"%s%s\n",pdbformat
,"%6.2f%6.2f");
1931 sprintf(format4
,"%s%s\n",pdbformat4
,"%6.2f%6.2f");
1933 out
= gmx_fio_fopen(fname
,"w");
1935 fprintf(out
,"TITLE %s\n",title
);
1936 gmx_write_pdb_box(out
,dd
->bScrewPBC
? epbcSCREW
: epbcXYZ
,box
);
1937 for(i
=0; i
<natoms
; i
++)
1939 ii
= dd
->gatindex
[i
];
1940 gmx_mtop_atominfo_global(mtop
,ii
,&atomname
,&resnr
,&resname
);
1941 if (i
< dd
->comm
->nat
[ddnatZONE
])
1944 while (i
>= dd
->cgindex
[dd
->comm
->zones
.cg_range
[c
+1]])
1950 else if (i
< dd
->comm
->nat
[ddnatVSITE
])
1952 b
= dd
->comm
->zones
.n
;
1956 b
= dd
->comm
->zones
.n
+ 1;
1958 fprintf(out
,strlen(atomname
)<4 ? format
: format4
,
1959 "ATOM",(ii
+1)%100000,
1960 atomname
,resname
,' ',(resnr
+1)%10000,' ',
1961 10*x
[i
][XX
],10*x
[i
][YY
],10*x
[i
][ZZ
],1.0,b
);
1963 fprintf(out
,"TER\n");
1965 gmx_fio_fclose(out
);
1968 real
dd_cutoff_mbody(gmx_domdec_t
*dd
)
1970 gmx_domdec_comm_t
*comm
;
1977 if (comm
->bInterCGBondeds
)
1979 if (comm
->cutoff_mbody
> 0)
1981 r
= comm
->cutoff_mbody
;
1985 /* cutoff_mbody=0 means we do not have DLB */
1986 r
= comm
->cellsize_min
[dd
->dim
[0]];
1987 for(di
=1; di
<dd
->ndim
; di
++)
1989 r
= min(r
,comm
->cellsize_min
[dd
->dim
[di
]]);
1991 if (comm
->bBondComm
)
1993 r
= max(r
,comm
->cutoff_mbody
);
1997 r
= min(r
,comm
->cutoff
);
2005 real
dd_cutoff_twobody(gmx_domdec_t
*dd
)
2009 r_mb
= dd_cutoff_mbody(dd
);
2011 return max(dd
->comm
->cutoff
,r_mb
);
2015 static void dd_cart_coord2pmecoord(gmx_domdec_t
*dd
,ivec coord
,ivec coord_pme
)
2019 nc
= dd
->nc
[dd
->comm
->cartpmedim
];
2020 ntot
= dd
->comm
->ntot
[dd
->comm
->cartpmedim
];
2021 copy_ivec(coord
,coord_pme
);
2022 coord_pme
[dd
->comm
->cartpmedim
] =
2023 nc
+ (coord
[dd
->comm
->cartpmedim
]*(ntot
- nc
) + (ntot
- nc
)/2)/nc
;
2026 static int low_ddindex2pmeindex(int ndd
,int npme
,int ddindex
)
2028 /* Here we assign a PME node to communicate with this DD node
2029 * by assuming that the major index of both is x.
2030 * We add cr->npmenodes/2 to obtain an even distribution.
2032 return (ddindex
*npme
+ npme
/2)/ndd
;
2035 static int ddindex2pmeindex(const gmx_domdec_t
*dd
,int ddindex
)
2037 return low_ddindex2pmeindex(dd
->nnodes
,dd
->comm
->npmenodes
,ddindex
);
2040 static int cr_ddindex2pmeindex(const t_commrec
*cr
,int ddindex
)
2042 return low_ddindex2pmeindex(cr
->dd
->nnodes
,cr
->npmenodes
,ddindex
);
2045 static int *dd_pmenodes(t_commrec
*cr
)
2050 snew(pmenodes
,cr
->npmenodes
);
2052 for(i
=0; i
<cr
->dd
->nnodes
; i
++) {
2053 p0
= cr_ddindex2pmeindex(cr
,i
);
2054 p1
= cr_ddindex2pmeindex(cr
,i
+1);
2055 if (i
+1 == cr
->dd
->nnodes
|| p1
> p0
) {
2057 fprintf(debug
,"pmenode[%d] = %d\n",n
,i
+1+n
);
2058 pmenodes
[n
] = i
+ 1 + n
;
2066 static int gmx_ddcoord2pmeindex(t_commrec
*cr
,int x
,int y
,int z
)
2069 ivec coords
,coords_pme
,nc
;
2074 if (dd->comm->bCartesian) {
2075 gmx_ddindex2xyz(dd->nc,ddindex,coords);
2076 dd_coords2pmecoords(dd,coords,coords_pme);
2077 copy_ivec(dd->ntot,nc);
2078 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2079 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
2081 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
2083 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
2089 slab
= ddindex2pmeindex(dd
,dd_index(dd
->nc
,coords
));
2094 static int ddcoord2simnodeid(t_commrec
*cr
,int x
,int y
,int z
)
2096 gmx_domdec_comm_t
*comm
;
2098 int ddindex
,nodeid
=-1;
2100 comm
= cr
->dd
->comm
;
2105 if (comm
->bCartesianPP_PME
)
2108 MPI_Cart_rank(cr
->mpi_comm_mysim
,coords
,&nodeid
);
2113 ddindex
= dd_index(cr
->dd
->nc
,coords
);
2114 if (comm
->bCartesianPP
)
2116 nodeid
= comm
->ddindex2simnodeid
[ddindex
];
2122 nodeid
= ddindex
+ gmx_ddcoord2pmeindex(cr
,x
,y
,z
);
2134 static int dd_simnode2pmenode(t_commrec
*cr
,int sim_nodeid
)
2137 gmx_domdec_comm_t
*comm
;
2138 ivec coord
,coord_pme
;
2145 /* This assumes a uniform x domain decomposition grid cell size */
2146 if (comm
->bCartesianPP_PME
)
2149 MPI_Cart_coords(cr
->mpi_comm_mysim
,sim_nodeid
,DIM
,coord
);
2150 if (coord
[comm
->cartpmedim
] < dd
->nc
[comm
->cartpmedim
])
2152 /* This is a PP node */
2153 dd_cart_coord2pmecoord(dd
,coord
,coord_pme
);
2154 MPI_Cart_rank(cr
->mpi_comm_mysim
,coord_pme
,&pmenode
);
2158 else if (comm
->bCartesianPP
)
2160 if (sim_nodeid
< dd
->nnodes
)
2162 pmenode
= dd
->nnodes
+ ddindex2pmeindex(dd
,sim_nodeid
);
2167 /* This assumes DD cells with identical x coordinates
2168 * are numbered sequentially.
2170 if (dd
->comm
->pmenodes
== NULL
)
2172 if (sim_nodeid
< dd
->nnodes
)
2174 /* The DD index equals the nodeid */
2175 pmenode
= dd
->nnodes
+ ddindex2pmeindex(dd
,sim_nodeid
);
2181 while (sim_nodeid
> dd
->comm
->pmenodes
[i
])
2185 if (sim_nodeid
< dd
->comm
->pmenodes
[i
])
2187 pmenode
= dd
->comm
->pmenodes
[i
];
2195 bool gmx_pmeonlynode(t_commrec
*cr
,int sim_nodeid
)
2199 if (DOMAINDECOMP(cr
))
2201 bPMEOnlyNode
= (dd_simnode2pmenode(cr
,sim_nodeid
) == -1);
2205 bPMEOnlyNode
= FALSE
;
2208 return bPMEOnlyNode
;
2211 void get_pme_ddnodes(t_commrec
*cr
,int pmenodeid
,
2212 int *nmy_ddnodes
,int **my_ddnodes
,int *node_peer
)
2216 ivec coord
,coord_pme
;
2220 snew(*my_ddnodes
,(dd
->nnodes
+cr
->npmenodes
-1)/cr
->npmenodes
);
2223 for(x
=0; x
<dd
->nc
[XX
]; x
++)
2225 for(y
=0; y
<dd
->nc
[YY
]; y
++)
2227 for(z
=0; z
<dd
->nc
[ZZ
]; z
++)
2229 if (dd
->comm
->bCartesianPP_PME
)
2234 dd_cart_coord2pmecoord(dd
,coord
,coord_pme
);
2235 if (dd
->ci
[XX
] == coord_pme
[XX
] &&
2236 dd
->ci
[YY
] == coord_pme
[YY
] &&
2237 dd
->ci
[ZZ
] == coord_pme
[ZZ
])
2238 (*my_ddnodes
)[(*nmy_ddnodes
)++] = ddcoord2simnodeid(cr
,x
,y
,z
);
2242 /* The slab corresponds to the nodeid in the PME group */
2243 if (gmx_ddcoord2pmeindex(cr
,x
,y
,z
) == pmenodeid
)
2245 (*my_ddnodes
)[(*nmy_ddnodes
)++] = ddcoord2simnodeid(cr
,x
,y
,z
);
2252 /* The last PP-only node is the peer node */
2253 *node_peer
= (*my_ddnodes
)[*nmy_ddnodes
-1];
2257 fprintf(debug
,"Receive coordinates from PP nodes:");
2258 for(x
=0; x
<*nmy_ddnodes
; x
++)
2260 fprintf(debug
," %d",(*my_ddnodes
)[x
]);
2262 fprintf(debug
,"\n");
2266 static bool receive_vir_ener(t_commrec
*cr
)
2268 gmx_domdec_comm_t
*comm
;
2269 int pmenode
,coords
[DIM
],rank
;
2273 if (cr
->npmenodes
< cr
->dd
->nnodes
)
2275 comm
= cr
->dd
->comm
;
2276 if (comm
->bCartesianPP_PME
)
2278 pmenode
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
2280 MPI_Cart_coords(cr
->mpi_comm_mysim
,cr
->sim_nodeid
,DIM
,coords
);
2281 coords
[comm
->cartpmedim
]++;
2282 if (coords
[comm
->cartpmedim
] < cr
->dd
->nc
[comm
->cartpmedim
])
2284 MPI_Cart_rank(cr
->mpi_comm_mysim
,coords
,&rank
);
2285 if (dd_simnode2pmenode(cr
,rank
) == pmenode
)
2287 /* This is not the last PP node for pmenode */
2295 pmenode
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
2296 if (cr
->sim_nodeid
+1 < cr
->nnodes
&&
2297 dd_simnode2pmenode(cr
,cr
->sim_nodeid
+1) == pmenode
)
2299 /* This is not the last PP node for pmenode */
2308 static void set_zones_ncg_home(gmx_domdec_t
*dd
)
2310 gmx_domdec_zones_t
*zones
;
2313 zones
= &dd
->comm
->zones
;
2315 zones
->cg_range
[0] = 0;
2316 for(i
=1; i
<zones
->n
+1; i
++)
2318 zones
->cg_range
[i
] = dd
->ncg_home
;
2322 static void rebuild_cgindex(gmx_domdec_t
*dd
,int *gcgs_index
,t_state
*state
)
2324 int nat
,i
,*ind
,*dd_cg_gl
,*cgindex
,cg_gl
;
2327 dd_cg_gl
= dd
->index_gl
;
2328 cgindex
= dd
->cgindex
;
2331 for(i
=0; i
<state
->ncg_gl
; i
++)
2335 dd_cg_gl
[i
] = cg_gl
;
2336 nat
+= gcgs_index
[cg_gl
+1] - gcgs_index
[cg_gl
];
2340 dd
->ncg_home
= state
->ncg_gl
;
2343 set_zones_ncg_home(dd
);
2346 static int ddcginfo(const cginfo_mb_t
*cginfo_mb
,int cg
)
2348 while (cg
>= cginfo_mb
->cg_end
)
2353 return cginfo_mb
->cginfo
[(cg
- cginfo_mb
->cg_start
) % cginfo_mb
->cg_mod
];
2356 static void dd_set_cginfo(int *index_gl
,int cg0
,int cg1
,
2357 t_forcerec
*fr
,char *bLocalCG
)
2359 cginfo_mb_t
*cginfo_mb
;
2365 cginfo_mb
= fr
->cginfo_mb
;
2366 cginfo
= fr
->cginfo
;
2368 for(cg
=cg0
; cg
<cg1
; cg
++)
2370 cginfo
[cg
] = ddcginfo(cginfo_mb
,index_gl
[cg
]);
2374 if (bLocalCG
!= NULL
)
2376 for(cg
=cg0
; cg
<cg1
; cg
++)
2378 bLocalCG
[index_gl
[cg
]] = TRUE
;
2383 static void make_dd_indices(gmx_domdec_t
*dd
,int *gcgs_index
,int cg_start
)
2385 int nzone
,zone
,zone1
,cg0
,cg
,cg_gl
,a
,a_gl
;
2386 int *zone2cg
,*zone_ncg1
,*index_gl
,*gatindex
;
2390 bLocalCG
= dd
->comm
->bLocalCG
;
2392 if (dd
->nat_tot
> dd
->gatindex_nalloc
)
2394 dd
->gatindex_nalloc
= over_alloc_dd(dd
->nat_tot
);
2395 srenew(dd
->gatindex
,dd
->gatindex_nalloc
);
2398 nzone
= dd
->comm
->zones
.n
;
2399 zone2cg
= dd
->comm
->zones
.cg_range
;
2400 zone_ncg1
= dd
->comm
->zone_ncg1
;
2401 index_gl
= dd
->index_gl
;
2402 gatindex
= dd
->gatindex
;
2404 if (zone2cg
[1] != dd
->ncg_home
)
2406 gmx_incons("dd->ncg_zone is not up to date");
2409 /* Make the local to global and global to local atom index */
2410 a
= dd
->cgindex
[cg_start
];
2411 for(zone
=0; zone
<nzone
; zone
++)
2419 cg0
= zone2cg
[zone
];
2421 for(cg
=cg0
; cg
<zone2cg
[zone
+1]; cg
++)
2424 if (cg
- cg0
>= zone_ncg1
[zone
])
2426 /* Signal that this cg is from more than one zone away */
2429 cg_gl
= index_gl
[cg
];
2430 for(a_gl
=gcgs_index
[cg_gl
]; a_gl
<gcgs_index
[cg_gl
+1]; a_gl
++)
2433 ga2la_set(dd
->ga2la
,a_gl
,a
,zone1
);
2440 static int check_bLocalCG(gmx_domdec_t
*dd
,int ncg_sys
,const char *bLocalCG
,
2446 if (bLocalCG
== NULL
)
2450 for(i
=0; i
<dd
->ncg_tot
; i
++)
2452 if (!bLocalCG
[dd
->index_gl
[i
]])
2455 "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n",dd
->rank
,where
,i
+1,dd
->index_gl
[i
]+1,dd
->ncg_home
);
2460 for(i
=0; i
<ncg_sys
; i
++)
2467 if (ngl
!= dd
->ncg_tot
)
2469 fprintf(stderr
,"DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n",dd
->rank
,where
,ngl
,dd
->ncg_tot
);
2476 static void check_index_consistency(gmx_domdec_t
*dd
,
2477 int natoms_sys
,int ncg_sys
,
2480 int nerr
,ngl
,i
,a
,cell
;
2485 if (dd
->comm
->DD_debug
> 1)
2487 snew(have
,natoms_sys
);
2488 for(a
=0; a
<dd
->nat_tot
; a
++)
2490 if (have
[dd
->gatindex
[a
]] > 0)
2492 fprintf(stderr
,"DD node %d: global atom %d occurs twice: index %d and %d\n",dd
->rank
,dd
->gatindex
[a
]+1,have
[dd
->gatindex
[a
]],a
+1);
2496 have
[dd
->gatindex
[a
]] = a
+ 1;
2502 snew(have
,dd
->nat_tot
);
2505 for(i
=0; i
<natoms_sys
; i
++)
2507 if (ga2la_get(dd
->ga2la
,i
,&a
,&cell
))
2509 if (a
>= dd
->nat_tot
)
2511 fprintf(stderr
,"DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n",dd
->rank
,i
+1,a
+1,dd
->nat_tot
);
2517 if (dd
->gatindex
[a
] != i
)
2519 fprintf(stderr
,"DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n",dd
->rank
,i
+1,a
+1,dd
->gatindex
[a
]+1);
2526 if (ngl
!= dd
->nat_tot
)
2529 "DD node %d, %s: %d global atom indices, %d local atoms\n",
2530 dd
->rank
,where
,ngl
,dd
->nat_tot
);
2532 for(a
=0; a
<dd
->nat_tot
; a
++)
2537 "DD node %d, %s: local atom %d, global %d has no global index\n",
2538 dd
->rank
,where
,a
+1,dd
->gatindex
[a
]+1);
2543 nerr
+= check_bLocalCG(dd
,ncg_sys
,dd
->comm
->bLocalCG
,where
);
2546 gmx_fatal(FARGS
,"DD node %d, %s: %d atom/cg index inconsistencies",
2547 dd
->rank
,where
,nerr
);
2551 static void clear_dd_indices(gmx_domdec_t
*dd
,int cg_start
,int a_start
)
2558 /* Clear the whole list without searching */
2559 ga2la_clear(dd
->ga2la
);
2563 for(i
=a_start
; i
<dd
->nat_tot
; i
++)
2565 ga2la_del(dd
->ga2la
,dd
->gatindex
[i
]);
2569 bLocalCG
= dd
->comm
->bLocalCG
;
2572 for(i
=cg_start
; i
<dd
->ncg_tot
; i
++)
2574 bLocalCG
[dd
->index_gl
[i
]] = FALSE
;
2578 dd_clear_local_vsite_indices(dd
);
2580 if (dd
->constraints
)
2582 dd_clear_local_constraint_indices(dd
);
2586 static real
grid_jump_limit(gmx_domdec_comm_t
*comm
,int dim_ind
)
2588 real grid_jump_limit
;
2590 /* The distance between the boundaries of cells at distance
2591 * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
2592 * and by the fact that cells should not be shifted by more than
2593 * half their size, such that cg's only shift by one cell
2594 * at redecomposition.
2596 grid_jump_limit
= comm
->cellsize_limit
;
2597 if (!comm
->bVacDLBNoLimit
)
2599 grid_jump_limit
= max(grid_jump_limit
,
2600 comm
->cutoff
/comm
->cd
[dim_ind
].np
);
2603 return grid_jump_limit
;
2606 static void check_grid_jump(gmx_large_int_t step
,gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
2608 gmx_domdec_comm_t
*comm
;
2614 for(d
=1; d
<dd
->ndim
; d
++)
2617 limit
= grid_jump_limit(comm
,d
);
2618 bfac
= ddbox
->box_size
[dim
];
2619 if (ddbox
->tric_dir
[dim
])
2621 bfac
*= ddbox
->skew_fac
[dim
];
2623 if ((comm
->cell_f1
[d
] - comm
->cell_f_max0
[d
])*bfac
< limit
||
2624 (comm
->cell_f0
[d
] - comm
->cell_f_min1
[d
])*bfac
> -limit
)
2627 gmx_fatal(FARGS
,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d\n",
2628 gmx_step_str(step
,buf
),
2629 dim2char(dim
),dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
2634 static int dd_load_count(gmx_domdec_comm_t
*comm
)
2636 return (comm
->eFlop
? comm
->flop_n
: comm
->cycl_n
[ddCyclF
]);
2639 static float dd_force_load(gmx_domdec_comm_t
*comm
)
2646 if (comm
->eFlop
> 1)
2648 load
*= 1.0 + (comm
->eFlop
- 1)*(0.1*rand()/RAND_MAX
- 0.05);
2653 load
= comm
->cycl
[ddCyclF
];
2654 if (comm
->cycl_n
[ddCyclF
] > 1)
2656 /* Subtract the maximum of the last n cycle counts
2657 * to get rid of possible high counts due to other soures,
2658 * for instance system activity, that would otherwise
2659 * affect the dynamic load balancing.
2661 load
-= comm
->cycl_max
[ddCyclF
];
2668 static void set_slb_pme_dim_f(gmx_domdec_t
*dd
,int dimind
,real
**dim_f
)
2670 gmx_domdec_comm_t
*comm
;
2675 if (dd
->dim
[dimind
] != dimind
)
2681 snew(*dim_f
,dd
->nc
[dimind
]+1);
2683 for(i
=1; i
<dd
->nc
[dimind
]; i
++)
2685 if (comm
->slb_frac
[dimind
])
2687 (*dim_f
)[i
] = (*dim_f
)[i
-1] + comm
->slb_frac
[dimind
][i
-1];
2691 (*dim_f
)[i
] = (real
)i
/(real
)dd
->nc
[dimind
];
2694 (*dim_f
)[dd
->nc
[dimind
]] = 1;
2697 static void init_ddpme(gmx_domdec_t
*dd
,gmx_ddpme_t
*ddpme
,
2698 int dimind
,int nslab
)
2700 int pmeindex
,slab
,nso
,i
;
2703 ddpme
->dimind
= dimind
;
2704 ddpme
->nslab
= nslab
;
2711 nso
= dd
->comm
->npmenodes
/nslab
;
2712 /* Determine for each PME slab the PP location range for dimension dim */
2713 snew(ddpme
->pp_min
,nslab
);
2714 snew(ddpme
->pp_max
,nslab
);
2715 for(slab
=0; slab
<nslab
; slab
++) {
2716 ddpme
->pp_min
[slab
] = dd
->nc
[dd
->dim
[dimind
]] - 1;
2717 ddpme
->pp_max
[slab
] = 0;
2719 for(i
=0; i
<dd
->nnodes
; i
++) {
2720 ddindex2xyz(dd
->nc
,i
,xyz
);
2721 /* For y only use our y/z slab.
2722 * This assumes that the PME x grid size matches the DD grid size.
2724 if (dimind
== 0 || xyz
[XX
] == dd
->ci
[XX
]) {
2725 pmeindex
= ddindex2pmeindex(dd
,i
);
2727 slab
= pmeindex
/nso
;
2729 slab
= pmeindex
% nslab
;
2731 ddpme
->pp_min
[slab
] = min(ddpme
->pp_min
[slab
],xyz
[dimind
]);
2732 ddpme
->pp_max
[slab
] = max(ddpme
->pp_max
[slab
],xyz
[dimind
]);
2736 set_slb_pme_dim_f(dd
,ddpme
->dimind
,&ddpme
->slb_dim_f
);
2739 int dd_pme_maxshift0(gmx_domdec_t
*dd
)
2741 return dd
->comm
->ddpme
[0].maxshift
;
2744 int dd_pme_maxshift1(gmx_domdec_t
*dd
)
2746 return dd
->comm
->ddpme
[1].maxshift
;
2749 static void set_pme_maxshift(gmx_domdec_t
*dd
,gmx_ddpme_t
*ddpme
,
2750 bool bUniform
,gmx_ddbox_t
*ddbox
,real
*cell_f
)
2752 gmx_domdec_comm_t
*comm
;
2755 real range
,pme_boundary
;
2759 dim
= ddpme
->dimind
;
2763 if (dd
->dim
[dim
] != dim
)
2765 /* PP decomposition is not along dim: the worst situation */
2768 else if (ns
<= 3 || (bUniform
&& ns
== nc
))
2770 /* The optimal situation */
2775 /* We need to check for all pme nodes which nodes they
2776 * could possibly need to communicate with.
2778 xmin
= ddpme
->pp_min
;
2779 xmax
= ddpme
->pp_max
;
2780 /* Allow for atoms to be maximally 2/3 times the cut-off
2781 * out of their DD cell. This is a reasonable balance between
2782 * between performance and support for most charge-group/cut-off
2785 range
= 2.0/3.0*comm
->cutoff
/ddbox
->box_size
[dim
];
2786 /* Avoid extra communication when we are exactly at a boundary */
2792 /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
2793 pme_boundary
= (real
)s
/ns
;
2796 cell_f
[xmax
[s
-(sh
+1) ]+1] + range
> pme_boundary
) ||
2798 cell_f
[xmax
[s
-(sh
+1)+ns
]+1] - 1 + range
> pme_boundary
)))
2802 pme_boundary
= (real
)(s
+1)/ns
;
2805 cell_f
[xmin
[s
+(sh
+1) ] ] - range
< pme_boundary
) ||
2807 cell_f
[xmin
[s
+(sh
+1)-ns
] ] + 1 - range
< pme_boundary
)))
2814 ddpme
->maxshift
= sh
;
2818 fprintf(debug
,"PME slab communication range for dimind %d is %d\n",
2819 ddpme
->dimind
,ddpme
->maxshift
);
2823 static void check_box_size(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
2827 for(d
=0; d
<dd
->ndim
; d
++)
2830 if (dim
< ddbox
->nboundeddim
&&
2831 ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
] <
2832 dd
->nc
[dim
]*dd
->comm
->cellsize_limit
*DD_CELL_MARGIN
)
2834 gmx_fatal(FARGS
,"The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
2835 dim2char(dim
),ddbox
->box_size
[dim
],ddbox
->skew_fac
[dim
],
2836 dd
->nc
[dim
],dd
->comm
->cellsize_limit
);
2841 static void set_dd_cell_sizes_slb(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
,
2842 bool bMaster
,ivec npulse
)
2844 gmx_domdec_comm_t
*comm
;
2847 real
*cell_x
,cell_dx
,cellsize
;
2851 for(d
=0; d
<DIM
; d
++)
2853 cellsize_min
[d
] = ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
];
2855 if (dd
->nc
[d
] == 1 || comm
->slb_frac
[d
] == NULL
)
2858 cell_dx
= ddbox
->box_size
[d
]/dd
->nc
[d
];
2861 for(j
=0; j
<dd
->nc
[d
]+1; j
++)
2863 dd
->ma
->cell_x
[d
][j
] = ddbox
->box0
[d
] + j
*cell_dx
;
2868 comm
->cell_x0
[d
] = ddbox
->box0
[d
] + (dd
->ci
[d
] )*cell_dx
;
2869 comm
->cell_x1
[d
] = ddbox
->box0
[d
] + (dd
->ci
[d
]+1)*cell_dx
;
2871 cellsize
= cell_dx
*ddbox
->skew_fac
[d
];
2872 while (cellsize
*npulse
[d
] < comm
->cutoff
&& npulse
[d
] < dd
->nc
[d
]-1)
2876 cellsize_min
[d
] = cellsize
;
2880 /* Statically load balanced grid */
2881 /* Also when we are not doing a master distribution we determine
2882 * all cell borders in a loop to obtain identical values
2883 * to the master distribution case and to determine npulse.
2887 cell_x
= dd
->ma
->cell_x
[d
];
2891 snew(cell_x
,dd
->nc
[d
]+1);
2893 cell_x
[0] = ddbox
->box0
[d
];
2894 for(j
=0; j
<dd
->nc
[d
]; j
++)
2896 cell_dx
= ddbox
->box_size
[d
]*comm
->slb_frac
[d
][j
];
2897 cell_x
[j
+1] = cell_x
[j
] + cell_dx
;
2898 cellsize
= cell_dx
*ddbox
->skew_fac
[d
];
2899 while (cellsize
*npulse
[d
] < comm
->cutoff
&&
2900 npulse
[d
] < dd
->nc
[d
]-1)
2904 cellsize_min
[d
] = min(cellsize_min
[d
],cellsize
);
2908 comm
->cell_x0
[d
] = cell_x
[dd
->ci
[d
]];
2909 comm
->cell_x1
[d
] = cell_x
[dd
->ci
[d
]+1];
2913 /* The following limitation is to avoid that a cell would receive
2914 * some of its own home charge groups back over the periodic boundary.
2915 * Double charge groups cause trouble with the global indices.
2917 if (d
< ddbox
->npbcdim
&&
2918 dd
->nc
[d
] > 1 && npulse
[d
] >= dd
->nc
[d
])
2920 gmx_fatal_collective(FARGS
,DDMASTER(dd
),
2921 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
2922 dim2char(d
),ddbox
->box_size
[d
],ddbox
->skew_fac
[d
],
2924 dd
->nc
[d
],dd
->nc
[d
],
2925 dd
->nnodes
> dd
->nc
[d
] ? "cells" : "processors");
2929 if (!comm
->bDynLoadBal
)
2931 copy_rvec(cellsize_min
,comm
->cellsize_min
);
2934 for(d
=0; d
<comm
->npmedecompdim
; d
++)
2936 set_pme_maxshift(dd
,&comm
->ddpme
[d
],
2937 comm
->slb_frac
[dd
->dim
[d
]]==NULL
,ddbox
,
2938 comm
->ddpme
[d
].slb_dim_f
);
2943 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t
*dd
,
2944 int d
,int dim
,gmx_domdec_root_t
*root
,
2946 bool bUniform
,gmx_large_int_t step
, real cellsize_limit_f
, int range
[])
2948 gmx_domdec_comm_t
*comm
;
2949 int ncd
,i
,j
,nmin
,nmin_old
;
2952 real fac
,halfway
,cellsize_limit_f_i
,region_size
;
2953 bool bPBC
,bLastHi
=FALSE
;
2954 int nrange
[]={range
[0],range
[1]};
2956 region_size
= root
->cell_f
[range
[1]]-root
->cell_f
[range
[0]];
2962 bPBC
= (dim
< ddbox
->npbcdim
);
2964 cell_size
= root
->buf_ncd
;
2968 fprintf(debug
,"enforce_limits: %d %d\n",range
[0],range
[1]);
2971 /* First we need to check if the scaling does not make cells
2972 * smaller than the smallest allowed size.
2973 * We need to do this iteratively, since if a cell is too small,
2974 * it needs to be enlarged, which makes all the other cells smaller,
2975 * which could in turn make another cell smaller than allowed.
2977 for(i
=range
[0]; i
<range
[1]; i
++)
2979 root
->bCellMin
[i
] = FALSE
;
2985 /* We need the total for normalization */
2987 for(i
=range
[0]; i
<range
[1]; i
++)
2989 if (root
->bCellMin
[i
] == FALSE
)
2991 fac
+= cell_size
[i
];
2994 fac
= ( region_size
- nmin
*cellsize_limit_f
)/fac
; /* substracting cells already set to cellsize_limit_f */
2995 /* Determine the cell boundaries */
2996 for(i
=range
[0]; i
<range
[1]; i
++)
2998 if (root
->bCellMin
[i
] == FALSE
)
3000 cell_size
[i
] *= fac
;
3001 if (!bPBC
&& (i
== 0 || i
== dd
->nc
[dim
] -1))
3003 cellsize_limit_f_i
= 0;
3007 cellsize_limit_f_i
= cellsize_limit_f
;
3009 if (cell_size
[i
] < cellsize_limit_f_i
)
3011 root
->bCellMin
[i
] = TRUE
;
3012 cell_size
[i
] = cellsize_limit_f_i
;
3016 root
->cell_f
[i
+1] = root
->cell_f
[i
] + cell_size
[i
];
3019 while (nmin
> nmin_old
);
3022 cell_size
[i
] = root
->cell_f
[i
+1] - root
->cell_f
[i
];
3023 /* For this check we should not use DD_CELL_MARGIN,
3024 * but a slightly smaller factor,
3025 * since rounding could get use below the limit.
3027 if (bPBC
&& cell_size
[i
] < cellsize_limit_f
*DD_CELL_MARGIN2
/DD_CELL_MARGIN
)
3030 gmx_fatal(FARGS
,"Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
3031 gmx_step_str(step
,buf
),
3032 dim2char(dim
),ddbox
->box_size
[dim
],ddbox
->skew_fac
[dim
],
3033 ncd
,comm
->cellsize_min
[dim
]);
3036 root
->bLimited
= (nmin
> 0) || (range
[0]>0) || (range
[1]<ncd
);
3040 /* Check if the boundary did not displace more than halfway
3041 * each of the cells it bounds, as this could cause problems,
3042 * especially when the differences between cell sizes are large.
3043 * If changes are applied, they will not make cells smaller
3044 * than the cut-off, as we check all the boundaries which
3045 * might be affected by a change and if the old state was ok,
3046 * the cells will at most be shrunk back to their old size.
3048 for(i
=range
[0]+1; i
<range
[1]; i
++)
3050 halfway
= 0.5*(root
->old_cell_f
[i
] + root
->old_cell_f
[i
-1]);
3051 if (root
->cell_f
[i
] < halfway
)
3053 root
->cell_f
[i
] = halfway
;
3054 /* Check if the change also causes shifts of the next boundaries */
3055 for(j
=i
+1; j
<range
[1]; j
++)
3057 if (root
->cell_f
[j
] < root
->cell_f
[j
-1] + cellsize_limit_f
)
3058 root
->cell_f
[j
] = root
->cell_f
[j
-1] + cellsize_limit_f
;
3061 halfway
= 0.5*(root
->old_cell_f
[i
] + root
->old_cell_f
[i
+1]);
3062 if (root
->cell_f
[i
] > halfway
)
3064 root
->cell_f
[i
] = halfway
;
3065 /* Check if the change also causes shifts of the next boundaries */
3066 for(j
=i
-1; j
>=range
[0]+1; j
--)
3068 if (root
->cell_f
[j
] > root
->cell_f
[j
+1] - cellsize_limit_f
)
3069 root
->cell_f
[j
] = root
->cell_f
[j
+1] - cellsize_limit_f
;
3075 /* nrange is defined as [lower, upper) range for new call to enforce_limits */
3076 /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
3077 * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
3078 * for a and b nrange is used */
3081 /* Take care of the staggering of the cell boundaries */
3084 for(i
=range
[0]; i
<range
[1]; i
++)
3086 root
->cell_f_max0
[i
] = root
->cell_f
[i
];
3087 root
->cell_f_min1
[i
] = root
->cell_f
[i
+1];
3092 for(i
=range
[0]+1; i
<range
[1]; i
++)
3094 bLimLo
= (root
->cell_f
[i
] < root
->bound_min
[i
]);
3095 bLimHi
= (root
->cell_f
[i
] > root
->bound_max
[i
]);
3096 if (bLimLo
&& bLimHi
)
3098 /* Both limits violated, try the best we can */
3099 /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
3100 root
->cell_f
[i
] = 0.5*(root
->bound_min
[i
] + root
->bound_max
[i
]);
3103 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3107 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3113 /* root->cell_f[i] = root->bound_min[i]; */
3114 nrange
[1]=i
; /* only store violation location. There could be a LimLo violation following with an higher index */
3117 else if (bLimHi
&& !bLastHi
)
3120 if (nrange
[1] < range
[1]) /* found a LimLo before */
3122 root
->cell_f
[nrange
[1]] = root
->bound_min
[nrange
[1]];
3123 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3124 nrange
[0]=nrange
[1];
3126 root
->cell_f
[i
] = root
->bound_max
[i
];
3128 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3133 if (nrange
[1] < range
[1]) /* found last a LimLo */
3135 root
->cell_f
[nrange
[1]] = root
->bound_min
[nrange
[1]];
3136 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3137 nrange
[0]=nrange
[1];
3139 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3141 else if (nrange
[0] > range
[0]) /* found at least one LimHi */
3143 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, nrange
);
3150 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t
*dd
,
3151 int d
,int dim
,gmx_domdec_root_t
*root
,
3152 gmx_ddbox_t
*ddbox
,bool bDynamicBox
,
3153 bool bUniform
,gmx_large_int_t step
)
3155 gmx_domdec_comm_t
*comm
;
3158 real load_aver
,load_i
,imbalance
,change
,change_max
,sc
;
3159 real cellsize_limit_f
,dist_min_f
,dist_min_f_hard
,space
;
3160 real change_limit
= 0.1;
3163 int range
[] = { 0, 0 };
3169 bPBC
= (dim
< ddbox
->npbcdim
);
3171 cell_size
= root
->buf_ncd
;
3173 /* Store the original boundaries */
3174 for(i
=0; i
<ncd
+1; i
++)
3176 root
->old_cell_f
[i
] = root
->cell_f
[i
];
3179 for(i
=0; i
<ncd
; i
++)
3181 cell_size
[i
] = 1.0/ncd
;
3184 else if (dd_load_count(comm
))
3186 load_aver
= comm
->load
[d
].sum_m
/ncd
;
3188 for(i
=0; i
<ncd
; i
++)
3190 /* Determine the relative imbalance of cell i */
3191 load_i
= comm
->load
[d
].load
[i
*comm
->load
[d
].nload
+2];
3192 imbalance
= (load_i
- load_aver
)/(load_aver
>0 ? load_aver
: 1);
3193 /* Determine the change of the cell size using underrelaxation */
3194 change
= -relax
*imbalance
;
3195 change_max
= max(change_max
,max(change
,-change
));
3197 /* Limit the amount of scaling.
3198 * We need to use the same rescaling for all cells in one row,
3199 * otherwise the load balancing might not converge.
3202 if (change_max
> change_limit
)
3204 sc
*= change_limit
/change_max
;
3206 for(i
=0; i
<ncd
; i
++)
3208 /* Determine the relative imbalance of cell i */
3209 load_i
= comm
->load
[d
].load
[i
*comm
->load
[d
].nload
+2];
3210 imbalance
= (load_i
- load_aver
)/(load_aver
>0 ? load_aver
: 1);
3211 /* Determine the change of the cell size using underrelaxation */
3212 change
= -sc
*imbalance
;
3213 cell_size
[i
] = (root
->cell_f
[i
+1]-root
->cell_f
[i
])*(1 + change
);
3217 cellsize_limit_f
= comm
->cellsize_min
[dim
]/ddbox
->box_size
[dim
];
3218 cellsize_limit_f
*= DD_CELL_MARGIN
;
3219 dist_min_f_hard
= grid_jump_limit(comm
,d
)/ddbox
->box_size
[dim
];
3220 dist_min_f
= dist_min_f_hard
* DD_CELL_MARGIN
;
3221 if (ddbox
->tric_dir
[dim
])
3223 cellsize_limit_f
/= ddbox
->skew_fac
[dim
];
3224 dist_min_f
/= ddbox
->skew_fac
[dim
];
3226 if (bDynamicBox
&& d
> 0)
3228 dist_min_f
*= DD_PRES_SCALE_MARGIN
;
3230 if (d
> 0 && !bUniform
)
3232 /* Make sure that the grid is not shifted too much */
3233 for(i
=1; i
<ncd
; i
++) {
3234 if (root
->cell_f_min1
[i
] - root
->cell_f_max0
[i
-1] < 2 * dist_min_f_hard
)
3236 gmx_incons("Inconsistent DD boundary staggering limits!");
3238 root
->bound_min
[i
] = root
->cell_f_max0
[i
-1] + dist_min_f
;
3239 space
= root
->cell_f
[i
] - (root
->cell_f_max0
[i
-1] + dist_min_f
);
3241 root
->bound_min
[i
] += 0.5*space
;
3243 root
->bound_max
[i
] = root
->cell_f_min1
[i
] - dist_min_f
;
3244 space
= root
->cell_f
[i
] - (root
->cell_f_min1
[i
] - dist_min_f
);
3246 root
->bound_max
[i
] += 0.5*space
;
3251 "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
3253 root
->cell_f_max0
[i
-1] + dist_min_f
,
3254 root
->bound_min
[i
],root
->cell_f
[i
],root
->bound_max
[i
],
3255 root
->cell_f_min1
[i
] - dist_min_f
);
3260 root
->cell_f
[0] = 0;
3261 root
->cell_f
[ncd
] = 1;
3262 dd_cell_sizes_dlb_root_enforce_limits(dd
, d
, dim
, root
, ddbox
, bUniform
, step
, cellsize_limit_f
, range
);
3265 /* After the checks above, the cells should obey the cut-off
3266 * restrictions, but it does not hurt to check.
3268 for(i
=0; i
<ncd
; i
++)
3272 fprintf(debug
,"Relative bounds dim %d cell %d: %f %f\n",
3273 dim
,i
,root
->cell_f
[i
],root
->cell_f
[i
+1]);
3276 if ((bPBC
|| (i
!= 0 && i
!= dd
->nc
[dim
]-1)) &&
3277 root
->cell_f
[i
+1] - root
->cell_f
[i
] <
3278 cellsize_limit_f
/DD_CELL_MARGIN
)
3282 "\nWARNING step %s: direction %c, cell %d too small: %f\n",
3283 gmx_step_str(step
,buf
),dim2char(dim
),i
,
3284 (root
->cell_f
[i
+1] - root
->cell_f
[i
])
3285 *ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
]);
3290 /* Store the cell boundaries of the lower dimensions at the end */
3291 for(d1
=0; d1
<d
; d1
++)
3293 root
->cell_f
[pos
++] = comm
->cell_f0
[d1
];
3294 root
->cell_f
[pos
++] = comm
->cell_f1
[d1
];
3297 if (d
< comm
->npmedecompdim
)
3299 /* The master determines the maximum shift for
3300 * the coordinate communication between separate PME nodes.
3302 set_pme_maxshift(dd
,&comm
->ddpme
[d
],bUniform
,ddbox
,root
->cell_f
);
3304 root
->cell_f
[pos
++] = comm
->ddpme
[0].maxshift
;
3307 root
->cell_f
[pos
++] = comm
->ddpme
[1].maxshift
;
3311 static void relative_to_absolute_cell_bounds(gmx_domdec_t
*dd
,
3312 gmx_ddbox_t
*ddbox
,int dimind
)
3314 gmx_domdec_comm_t
*comm
;
3319 /* Set the cell dimensions */
3320 dim
= dd
->dim
[dimind
];
3321 comm
->cell_x0
[dim
] = comm
->cell_f0
[dimind
]*ddbox
->box_size
[dim
];
3322 comm
->cell_x1
[dim
] = comm
->cell_f1
[dimind
]*ddbox
->box_size
[dim
];
3323 if (dim
>= ddbox
->nboundeddim
)
3325 comm
->cell_x0
[dim
] += ddbox
->box0
[dim
];
3326 comm
->cell_x1
[dim
] += ddbox
->box0
[dim
];
3330 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t
*dd
,
3331 int d
,int dim
,real
*cell_f_row
,
3334 gmx_domdec_comm_t
*comm
;
3340 /* Each node would only need to know two fractions,
3341 * but it is probably cheaper to broadcast the whole array.
3343 MPI_Bcast(cell_f_row
,DD_CELL_F_SIZE(dd
,d
)*sizeof(real
),MPI_BYTE
,
3344 0,comm
->mpi_comm_load
[d
]);
3346 /* Copy the fractions for this dimension from the buffer */
3347 comm
->cell_f0
[d
] = cell_f_row
[dd
->ci
[dim
] ];
3348 comm
->cell_f1
[d
] = cell_f_row
[dd
->ci
[dim
]+1];
3349 /* The whole array was communicated, so set the buffer position */
3350 pos
= dd
->nc
[dim
] + 1;
3351 for(d1
=0; d1
<=d
; d1
++)
3355 /* Copy the cell fractions of the lower dimensions */
3356 comm
->cell_f0
[d1
] = cell_f_row
[pos
++];
3357 comm
->cell_f1
[d1
] = cell_f_row
[pos
++];
3359 relative_to_absolute_cell_bounds(dd
,ddbox
,d1
);
3361 /* Convert the communicated shift from float to int */
3362 comm
->ddpme
[0].maxshift
= (int)(cell_f_row
[pos
++] + 0.5);
3365 comm
->ddpme
[1].maxshift
= (int)(cell_f_row
[pos
++] + 0.5);
3369 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t
*dd
,
3370 gmx_ddbox_t
*ddbox
,bool bDynamicBox
,
3371 bool bUniform
,gmx_large_int_t step
)
3373 gmx_domdec_comm_t
*comm
;
3375 bool bRowMember
,bRowRoot
;
3380 for(d
=0; d
<dd
->ndim
; d
++)
3385 for(d1
=d
; d1
<dd
->ndim
; d1
++)
3387 if (dd
->ci
[dd
->dim
[d1
]] > 0)
3400 set_dd_cell_sizes_dlb_root(dd
,d
,dim
,comm
->root
[d
],
3401 ddbox
,bDynamicBox
,bUniform
,step
);
3402 cell_f_row
= comm
->root
[d
]->cell_f
;
3406 cell_f_row
= comm
->cell_f_row
;
3408 distribute_dd_cell_sizes_dlb(dd
,d
,dim
,cell_f_row
,ddbox
);
3413 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
3417 /* This function assumes the box is static and should therefore
3418 * not be called when the box has changed since the last
3419 * call to dd_partition_system.
3421 for(d
=0; d
<dd
->ndim
; d
++)
3423 relative_to_absolute_cell_bounds(dd
,ddbox
,d
);
3429 static void set_dd_cell_sizes_dlb(gmx_domdec_t
*dd
,
3430 gmx_ddbox_t
*ddbox
,bool bDynamicBox
,
3431 bool bUniform
,bool bDoDLB
,gmx_large_int_t step
,
3432 gmx_wallcycle_t wcycle
)
3434 gmx_domdec_comm_t
*comm
;
3441 wallcycle_start(wcycle
,ewcDDCOMMBOUND
);
3442 set_dd_cell_sizes_dlb_change(dd
,ddbox
,bDynamicBox
,bUniform
,step
);
3443 wallcycle_stop(wcycle
,ewcDDCOMMBOUND
);
3445 else if (bDynamicBox
)
3447 set_dd_cell_sizes_dlb_nochange(dd
,ddbox
);
3450 /* Set the dimensions for which no DD is used */
3451 for(dim
=0; dim
<DIM
; dim
++) {
3452 if (dd
->nc
[dim
] == 1) {
3453 comm
->cell_x0
[dim
] = 0;
3454 comm
->cell_x1
[dim
] = ddbox
->box_size
[dim
];
3455 if (dim
>= ddbox
->nboundeddim
)
3457 comm
->cell_x0
[dim
] += ddbox
->box0
[dim
];
3458 comm
->cell_x1
[dim
] += ddbox
->box0
[dim
];
3464 static void realloc_comm_ind(gmx_domdec_t
*dd
,ivec npulse
)
3467 gmx_domdec_comm_dim_t
*cd
;
3469 for(d
=0; d
<dd
->ndim
; d
++)
3471 cd
= &dd
->comm
->cd
[d
];
3472 np
= npulse
[dd
->dim
[d
]];
3473 if (np
> cd
->np_nalloc
)
3477 fprintf(debug
,"(Re)allocing cd for %c to %d pulses\n",
3478 dim2char(dd
->dim
[d
]),np
);
3480 if (DDMASTER(dd
) && cd
->np_nalloc
> 0)
3482 fprintf(stderr
,"\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n",dim2char(dd
->dim
[d
]),np
);
3485 for(i
=cd
->np_nalloc
; i
<np
; i
++)
3487 cd
->ind
[i
].index
= NULL
;
3488 cd
->ind
[i
].nalloc
= 0;
3497 static void set_dd_cell_sizes(gmx_domdec_t
*dd
,
3498 gmx_ddbox_t
*ddbox
,bool bDynamicBox
,
3499 bool bUniform
,bool bDoDLB
,gmx_large_int_t step
,
3500 gmx_wallcycle_t wcycle
)
3502 gmx_domdec_comm_t
*comm
;
3508 /* Copy the old cell boundaries for the cg displacement check */
3509 copy_rvec(comm
->cell_x0
,comm
->old_cell_x0
);
3510 copy_rvec(comm
->cell_x1
,comm
->old_cell_x1
);
3512 if (comm
->bDynLoadBal
)
3516 check_box_size(dd
,ddbox
);
3518 set_dd_cell_sizes_dlb(dd
,ddbox
,bDynamicBox
,bUniform
,bDoDLB
,step
,wcycle
);
3522 set_dd_cell_sizes_slb(dd
,ddbox
,FALSE
,npulse
);
3523 realloc_comm_ind(dd
,npulse
);
3528 for(d
=0; d
<DIM
; d
++)
3530 fprintf(debug
,"cell_x[%d] %f - %f skew_fac %f\n",
3531 d
,comm
->cell_x0
[d
],comm
->cell_x1
[d
],ddbox
->skew_fac
[d
]);
3536 static void comm_dd_ns_cell_sizes(gmx_domdec_t
*dd
,
3538 rvec cell_ns_x0
,rvec cell_ns_x1
,
3539 gmx_large_int_t step
)
3541 gmx_domdec_comm_t
*comm
;
3546 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
3548 dim
= dd
->dim
[dim_ind
];
3550 /* Without PBC we don't have restrictions on the outer cells */
3551 if (!(dim
>= ddbox
->npbcdim
&&
3552 (dd
->ci
[dim
] == 0 || dd
->ci
[dim
] == dd
->nc
[dim
] - 1)) &&
3553 comm
->bDynLoadBal
&&
3554 (comm
->cell_x1
[dim
] - comm
->cell_x0
[dim
])*ddbox
->skew_fac
[dim
] <
3555 comm
->cellsize_min
[dim
])
3558 gmx_fatal(FARGS
,"Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
3559 gmx_step_str(step
,buf
),dim2char(dim
),
3560 comm
->cell_x1
[dim
] - comm
->cell_x0
[dim
],
3561 ddbox
->skew_fac
[dim
],
3562 dd
->comm
->cellsize_min
[dim
],
3563 dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
3567 if ((dd
->bGridJump
&& dd
->ndim
> 1) || ddbox
->nboundeddim
< DIM
)
3569 /* Communicate the boundaries and update cell_ns_x0/1 */
3570 dd_move_cellx(dd
,ddbox
,cell_ns_x0
,cell_ns_x1
);
3571 if (dd
->bGridJump
&& dd
->ndim
> 1)
3573 check_grid_jump(step
,dd
,ddbox
);
3578 static void make_tric_corr_matrix(int npbcdim
,matrix box
,matrix tcm
)
3582 tcm
[YY
][XX
] = -box
[YY
][XX
]/box
[YY
][YY
];
3590 tcm
[ZZ
][XX
] = -(box
[ZZ
][YY
]*tcm
[YY
][XX
] + box
[ZZ
][XX
])/box
[ZZ
][ZZ
];
3591 tcm
[ZZ
][YY
] = -box
[ZZ
][YY
]/box
[ZZ
][ZZ
];
3600 static void check_screw_box(matrix box
)
3602 /* Mathematical limitation */
3603 if (box
[YY
][XX
] != 0 || box
[ZZ
][XX
] != 0)
3605 gmx_fatal(FARGS
,"With screw pbc the unit cell can not have non-zero off-diagonal x-components");
3608 /* Limitation due to the asymmetry of the eighth shell method */
3609 if (box
[ZZ
][YY
] != 0)
3611 gmx_fatal(FARGS
,"pbc=screw with non-zero box_zy is not supported");
3615 static void distribute_cg(FILE *fplog
,gmx_large_int_t step
,
3616 matrix box
,ivec tric_dir
,t_block
*cgs
,rvec pos
[],
3619 gmx_domdec_master_t
*ma
;
3620 int **tmp_ind
=NULL
,*tmp_nalloc
=NULL
;
3621 int i
,icg
,j
,k
,k0
,k1
,d
,npbcdim
;
3623 rvec box_size
,cg_cm
;
3625 real nrcg
,inv_ncg
,pos_d
;
3627 bool bUnbounded
,bScrew
;
3631 if (tmp_ind
== NULL
)
3633 snew(tmp_nalloc
,dd
->nnodes
);
3634 snew(tmp_ind
,dd
->nnodes
);
3635 for(i
=0; i
<dd
->nnodes
; i
++)
3637 tmp_nalloc
[i
] = over_alloc_large(cgs
->nr
/dd
->nnodes
+1);
3638 snew(tmp_ind
[i
],tmp_nalloc
[i
]);
3642 /* Clear the count */
3643 for(i
=0; i
<dd
->nnodes
; i
++)
3649 make_tric_corr_matrix(dd
->npbcdim
,box
,tcm
);
3651 cgindex
= cgs
->index
;
3653 /* Compute the center of geometry for all charge groups */
3654 for(icg
=0; icg
<cgs
->nr
; icg
++)
3657 k1
= cgindex
[icg
+1];
3661 copy_rvec(pos
[k0
],cg_cm
);
3668 for(k
=k0
; (k
<k1
); k
++)
3670 rvec_inc(cg_cm
,pos
[k
]);
3672 for(d
=0; (d
<DIM
); d
++)
3674 cg_cm
[d
] *= inv_ncg
;
3677 /* Put the charge group in the box and determine the cell index */
3678 for(d
=DIM
-1; d
>=0; d
--) {
3680 if (d
< dd
->npbcdim
)
3682 bScrew
= (dd
->bScrewPBC
&& d
== XX
);
3683 if (tric_dir
[d
] && dd
->nc
[d
] > 1)
3685 /* Use triclinic coordintates for this dimension */
3686 for(j
=d
+1; j
<DIM
; j
++)
3688 pos_d
+= cg_cm
[j
]*tcm
[j
][d
];
3691 while(pos_d
>= box
[d
][d
])
3694 rvec_dec(cg_cm
,box
[d
]);
3697 cg_cm
[YY
] = box
[YY
][YY
] - cg_cm
[YY
];
3698 cg_cm
[ZZ
] = box
[ZZ
][ZZ
] - cg_cm
[ZZ
];
3700 for(k
=k0
; (k
<k1
); k
++)
3702 rvec_dec(pos
[k
],box
[d
]);
3705 pos
[k
][YY
] = box
[YY
][YY
] - pos
[k
][YY
];
3706 pos
[k
][ZZ
] = box
[ZZ
][ZZ
] - pos
[k
][ZZ
];
3713 rvec_inc(cg_cm
,box
[d
]);
3716 cg_cm
[YY
] = box
[YY
][YY
] - cg_cm
[YY
];
3717 cg_cm
[ZZ
] = box
[ZZ
][ZZ
] - cg_cm
[ZZ
];
3719 for(k
=k0
; (k
<k1
); k
++)
3721 rvec_inc(pos
[k
],box
[d
]);
3723 pos
[k
][YY
] = box
[YY
][YY
] - pos
[k
][YY
];
3724 pos
[k
][ZZ
] = box
[ZZ
][ZZ
] - pos
[k
][ZZ
];
3729 /* This could be done more efficiently */
3731 while(ind
[d
]+1 < dd
->nc
[d
] && pos_d
>= ma
->cell_x
[d
][ind
[d
]+1])
3736 i
= dd_index(dd
->nc
,ind
);
3737 if (ma
->ncg
[i
] == tmp_nalloc
[i
])
3739 tmp_nalloc
[i
] = over_alloc_large(ma
->ncg
[i
]+1);
3740 srenew(tmp_ind
[i
],tmp_nalloc
[i
]);
3742 tmp_ind
[i
][ma
->ncg
[i
]] = icg
;
3744 ma
->nat
[i
] += cgindex
[icg
+1] - cgindex
[icg
];
3748 for(i
=0; i
<dd
->nnodes
; i
++)
3751 for(k
=0; k
<ma
->ncg
[i
]; k
++)
3753 ma
->cg
[k1
++] = tmp_ind
[i
][k
];
3756 ma
->index
[dd
->nnodes
] = k1
;
3758 for(i
=0; i
<dd
->nnodes
; i
++)
3768 fprintf(fplog
,"Charge group distribution at step %s:",
3769 gmx_step_str(step
,buf
));
3770 for(i
=0; i
<dd
->nnodes
; i
++)
3772 fprintf(fplog
," %d",ma
->ncg
[i
]);
3774 fprintf(fplog
,"\n");
3778 static void get_cg_distribution(FILE *fplog
,gmx_large_int_t step
,gmx_domdec_t
*dd
,
3779 t_block
*cgs
,matrix box
,gmx_ddbox_t
*ddbox
,
3782 gmx_domdec_master_t
*ma
=NULL
;
3785 int *ibuf
,buf2
[2] = { 0, 0 };
3793 check_screw_box(box
);
3796 set_dd_cell_sizes_slb(dd
,ddbox
,TRUE
,npulse
);
3798 distribute_cg(fplog
,step
,box
,ddbox
->tric_dir
,cgs
,pos
,dd
);
3799 for(i
=0; i
<dd
->nnodes
; i
++)
3801 ma
->ibuf
[2*i
] = ma
->ncg
[i
];
3802 ma
->ibuf
[2*i
+1] = ma
->nat
[i
];
3810 dd_scatter(dd
,2*sizeof(int),ibuf
,buf2
);
3812 dd
->ncg_home
= buf2
[0];
3813 dd
->nat_home
= buf2
[1];
3814 dd
->ncg_tot
= dd
->ncg_home
;
3815 dd
->nat_tot
= dd
->nat_home
;
3816 if (dd
->ncg_home
> dd
->cg_nalloc
|| dd
->cg_nalloc
== 0)
3818 dd
->cg_nalloc
= over_alloc_dd(dd
->ncg_home
);
3819 srenew(dd
->index_gl
,dd
->cg_nalloc
);
3820 srenew(dd
->cgindex
,dd
->cg_nalloc
+1);
3824 for(i
=0; i
<dd
->nnodes
; i
++)
3826 ma
->ibuf
[i
] = ma
->ncg
[i
]*sizeof(int);
3827 ma
->ibuf
[dd
->nnodes
+i
] = ma
->index
[i
]*sizeof(int);
3832 DDMASTER(dd
) ? ma
->ibuf
: NULL
,
3833 DDMASTER(dd
) ? ma
->ibuf
+dd
->nnodes
: NULL
,
3834 DDMASTER(dd
) ? ma
->cg
: NULL
,
3835 dd
->ncg_home
*sizeof(int),dd
->index_gl
);
3837 /* Determine the home charge group sizes */
3839 for(i
=0; i
<dd
->ncg_home
; i
++)
3841 cg_gl
= dd
->index_gl
[i
];
3843 dd
->cgindex
[i
] + cgs
->index
[cg_gl
+1] - cgs
->index
[cg_gl
];
3848 fprintf(debug
,"Home charge groups:\n");
3849 for(i
=0; i
<dd
->ncg_home
; i
++)
3851 fprintf(debug
," %d",dd
->index_gl
[i
]);
3853 fprintf(debug
,"\n");
3855 fprintf(debug
,"\n");
3859 static int compact_and_copy_vec_at(int ncg
,int *move
,
3862 rvec
*src
,gmx_domdec_comm_t
*comm
,
3865 int m
,icg
,i
,i0
,i1
,nrcg
;
3871 for(m
=0; m
<DIM
*2; m
++)
3877 for(icg
=0; icg
<ncg
; icg
++)
3879 i1
= cgindex
[icg
+1];
3885 /* Compact the home array in place */
3886 for(i
=i0
; i
<i1
; i
++)
3888 copy_rvec(src
[i
],src
[home_pos
++]);
3894 /* Copy to the communication buffer */
3896 pos_vec
[m
] += 1 + vec
*nrcg
;
3897 for(i
=i0
; i
<i1
; i
++)
3899 copy_rvec(src
[i
],comm
->cgcm_state
[m
][pos_vec
[m
]++]);
3901 pos_vec
[m
] += (nvec
- vec
- 1)*nrcg
;
3905 home_pos
+= i1
- i0
;
3913 static int compact_and_copy_vec_cg(int ncg
,int *move
,
3915 int nvec
,rvec
*src
,gmx_domdec_comm_t
*comm
,
3918 int m
,icg
,i0
,i1
,nrcg
;
3924 for(m
=0; m
<DIM
*2; m
++)
3930 for(icg
=0; icg
<ncg
; icg
++)
3932 i1
= cgindex
[icg
+1];
3938 /* Compact the home array in place */
3939 copy_rvec(src
[icg
],src
[home_pos
++]);
3945 /* Copy to the communication buffer */
3946 copy_rvec(src
[icg
],comm
->cgcm_state
[m
][pos_vec
[m
]]);
3947 pos_vec
[m
] += 1 + nrcg
*nvec
;
3959 static int compact_ind(int ncg
,int *move
,
3960 int *index_gl
,int *cgindex
,
3962 gmx_ga2la_t ga2la
,char *bLocalCG
,
3965 int cg
,nat
,a0
,a1
,a
,a_gl
;
3970 for(cg
=0; cg
<ncg
; cg
++)
3976 /* Compact the home arrays in place.
3977 * Anything that can be done here avoids access to global arrays.
3979 cgindex
[home_pos
] = nat
;
3980 for(a
=a0
; a
<a1
; a
++)
3983 gatindex
[nat
] = a_gl
;
3984 /* The cell number stays 0, so we don't need to set it */
3985 ga2la_change_la(ga2la
,a_gl
,nat
);
3988 index_gl
[home_pos
] = index_gl
[cg
];
3989 cginfo
[home_pos
] = cginfo
[cg
];
3990 /* The charge group remains local, so bLocalCG does not change */
3995 /* Clear the global indices */
3996 for(a
=a0
; a
<a1
; a
++)
3998 ga2la_del(ga2la
,gatindex
[a
]);
4002 bLocalCG
[index_gl
[cg
]] = FALSE
;
4006 cgindex
[home_pos
] = nat
;
4011 static void clear_and_mark_ind(int ncg
,int *move
,
4012 int *index_gl
,int *cgindex
,int *gatindex
,
4013 gmx_ga2la_t ga2la
,char *bLocalCG
,
4018 for(cg
=0; cg
<ncg
; cg
++)
4024 /* Clear the global indices */
4025 for(a
=a0
; a
<a1
; a
++)
4027 ga2la_del(ga2la
,gatindex
[a
]);
4031 bLocalCG
[index_gl
[cg
]] = FALSE
;
4033 /* Signal that this cg has moved using the ns cell index.
4034 * Here we set it to -1.
4035 * fill_grid will change it from -1 to 4*grid->ncells.
4037 cell_index
[cg
] = -1;
4042 static void print_cg_move(FILE *fplog
,
4044 gmx_large_int_t step
,int cg
,int dim
,int dir
,
4046 rvec cm_old
,rvec cm_new
,real pos_d
)
4048 gmx_domdec_comm_t
*comm
;
4053 fprintf(fplog
,"\nStep %s:\n",gmx_step_str(step
,buf
));
4054 fprintf(fplog
,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition (%f) in direction %c\n",
4055 ddglatnr(dd
,dd
->cgindex
[cg
]),limitd
,dim2char(dim
));
4056 fprintf(fplog
,"distance out of cell %f\n",
4057 dir
==1 ? pos_d
- comm
->cell_x1
[dim
] : pos_d
- comm
->cell_x0
[dim
]);
4058 fprintf(fplog
,"Old coordinates: %8.3f %8.3f %8.3f\n",
4059 cm_old
[XX
],cm_old
[YY
],cm_old
[ZZ
]);
4060 fprintf(fplog
,"New coordinates: %8.3f %8.3f %8.3f\n",
4061 cm_new
[XX
],cm_new
[YY
],cm_new
[ZZ
]);
4062 fprintf(fplog
,"Old cell boundaries in direction %c: %8.3f %8.3f\n",
4064 comm
->old_cell_x0
[dim
],comm
->old_cell_x1
[dim
]);
4065 fprintf(fplog
,"New cell boundaries in direction %c: %8.3f %8.3f\n",
4067 comm
->cell_x0
[dim
],comm
->cell_x1
[dim
]);
4070 static void cg_move_error(FILE *fplog
,
4072 gmx_large_int_t step
,int cg
,int dim
,int dir
,
4074 rvec cm_old
,rvec cm_new
,real pos_d
)
4078 print_cg_move(fplog
, dd
,step
,cg
,dim
,dir
,limitd
,cm_old
,cm_new
,pos_d
);
4080 print_cg_move(stderr
,dd
,step
,cg
,dim
,dir
,limitd
,cm_old
,cm_new
,pos_d
);
4082 "A charge group moved too far between two domain decomposition steps\n"
4083 "This usually means that your system is not well equilibrated");
4086 static void rotate_state_atom(t_state
*state
,int a
)
4090 for(est
=0; est
<estNR
; est
++)
4092 if (EST_DISTR(est
) && state
->flags
& (1<<est
)) {
4095 /* Rotate the complete state; for a rectangular box only */
4096 state
->x
[a
][YY
] = state
->box
[YY
][YY
] - state
->x
[a
][YY
];
4097 state
->x
[a
][ZZ
] = state
->box
[ZZ
][ZZ
] - state
->x
[a
][ZZ
];
4100 state
->v
[a
][YY
] = -state
->v
[a
][YY
];
4101 state
->v
[a
][ZZ
] = -state
->v
[a
][ZZ
];
4104 state
->sd_X
[a
][YY
] = -state
->sd_X
[a
][YY
];
4105 state
->sd_X
[a
][ZZ
] = -state
->sd_X
[a
][ZZ
];
4108 state
->cg_p
[a
][YY
] = -state
->cg_p
[a
][YY
];
4109 state
->cg_p
[a
][ZZ
] = -state
->cg_p
[a
][ZZ
];
4111 case estDISRE_INITF
:
4112 case estDISRE_RM3TAV
:
4113 case estORIRE_INITF
:
4115 /* These are distances, so not affected by rotation */
4118 gmx_incons("Unknown state entry encountered in rotate_state_atom");
4124 static int dd_redistribute_cg(FILE *fplog
,gmx_large_int_t step
,
4125 gmx_domdec_t
*dd
,ivec tric_dir
,
4126 t_state
*state
,rvec
**f
,
4127 t_forcerec
*fr
,t_mdatoms
*md
,
4133 int ncg
[DIM
*2],nat
[DIM
*2];
4134 int c
,i
,cg
,k
,k0
,k1
,d
,dim
,dim2
,dir
,d2
,d3
,d4
,cell_d
;
4135 int mc
,cdd
,nrcg
,ncg_recv
,nat_recv
,nvs
,nvr
,nvec
,vec
;
4136 int sbuf
[2],rbuf
[2];
4137 int home_pos_cg
,home_pos_at
,ncg_stay_home
,buf_pos
;
4139 bool bV
=FALSE
,bSDX
=FALSE
,bCGP
=FALSE
;
4144 rvec
*cg_cm
,cell_x0
,cell_x1
,limitd
,limit0
,limit1
,cm_new
;
4146 cginfo_mb_t
*cginfo_mb
;
4147 gmx_domdec_comm_t
*comm
;
4151 check_screw_box(state
->box
);
4157 for(i
=0; i
<estNR
; i
++)
4163 case estX
: /* Always present */ break;
4164 case estV
: bV
= (state
->flags
& (1<<i
)); break;
4165 case estSDX
: bSDX
= (state
->flags
& (1<<i
)); break;
4166 case estCGP
: bCGP
= (state
->flags
& (1<<i
)); break;
4169 case estDISRE_INITF
:
4170 case estDISRE_RM3TAV
:
4171 case estORIRE_INITF
:
4173 /* No processing required */
4176 gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
4181 if (dd
->ncg_tot
> comm
->nalloc_int
)
4183 comm
->nalloc_int
= over_alloc_dd(dd
->ncg_tot
);
4184 srenew(comm
->buf_int
,comm
->nalloc_int
);
4186 move
= comm
->buf_int
;
4188 /* Clear the count */
4189 for(c
=0; c
<dd
->ndim
*2; c
++)
4195 npbcdim
= dd
->npbcdim
;
4197 for(d
=0; (d
<DIM
); d
++)
4199 limitd
[d
] = dd
->comm
->cellsize_min
[d
];
4200 if (d
>= npbcdim
&& dd
->ci
[d
] == 0)
4202 cell_x0
[d
] = -GMX_FLOAT_MAX
;
4206 cell_x0
[d
] = comm
->cell_x0
[d
];
4208 if (d
>= npbcdim
&& dd
->ci
[d
] == dd
->nc
[d
] - 1)
4210 cell_x1
[d
] = GMX_FLOAT_MAX
;
4214 cell_x1
[d
] = comm
->cell_x1
[d
];
4216 limit0
[d
] = comm
->old_cell_x0
[d
] - limitd
[d
];
4217 limit1
[d
] = comm
->old_cell_x1
[d
] + limitd
[d
];
4220 make_tric_corr_matrix(npbcdim
,state
->box
,tcm
);
4222 cgindex
= dd
->cgindex
;
4224 /* Compute the center of geometry for all home charge groups
4225 * and put them in the box and determine where they should go.
4227 for(cg
=0; cg
<dd
->ncg_home
; cg
++)
4234 copy_rvec(state
->x
[k0
],cm_new
);
4241 for(k
=k0
; (k
<k1
); k
++)
4243 rvec_inc(cm_new
,state
->x
[k
]);
4245 for(d
=0; (d
<DIM
); d
++)
4247 cm_new
[d
] = inv_ncg
*cm_new
[d
];
4252 /* Do pbc and check DD cell boundary crossings */
4253 for(d
=DIM
-1; d
>=0; d
--)
4257 bScrew
= (dd
->bScrewPBC
&& d
== XX
);
4258 /* Determine the location of this cg in lattice coordinates */
4262 for(d2
=d
+1; d2
<DIM
; d2
++)
4264 pos_d
+= cm_new
[d2
]*tcm
[d2
][d
];
4267 /* Put the charge group in the triclinic unit-cell */
4268 if (pos_d
>= cell_x1
[d
])
4270 if (pos_d
>= limit1
[d
])
4272 cg_move_error(fplog
,dd
,step
,cg
,d
,1,limitd
[d
],
4273 cg_cm
[cg
],cm_new
,pos_d
);
4276 if (dd
->ci
[d
] == dd
->nc
[d
] - 1)
4278 rvec_dec(cm_new
,state
->box
[d
]);
4281 cm_new
[YY
] = state
->box
[YY
][YY
] - cm_new
[YY
];
4282 cm_new
[ZZ
] = state
->box
[ZZ
][ZZ
] - cm_new
[ZZ
];
4284 for(k
=k0
; (k
<k1
); k
++)
4286 rvec_dec(state
->x
[k
],state
->box
[d
]);
4289 rotate_state_atom(state
,k
);
4294 else if (pos_d
< cell_x0
[d
])
4296 if (pos_d
< limit0
[d
])
4298 cg_move_error(fplog
,dd
,step
,cg
,d
,-1,limitd
[d
],
4299 cg_cm
[cg
],cm_new
,pos_d
);
4304 rvec_inc(cm_new
,state
->box
[d
]);
4307 cm_new
[YY
] = state
->box
[YY
][YY
] - cm_new
[YY
];
4308 cm_new
[ZZ
] = state
->box
[ZZ
][ZZ
] - cm_new
[ZZ
];
4310 for(k
=k0
; (k
<k1
); k
++)
4312 rvec_inc(state
->x
[k
],state
->box
[d
]);
4315 rotate_state_atom(state
,k
);
4321 else if (d
< npbcdim
)
4323 /* Put the charge group in the rectangular unit-cell */
4324 while (cm_new
[d
] >= state
->box
[d
][d
])
4326 rvec_dec(cm_new
,state
->box
[d
]);
4327 for(k
=k0
; (k
<k1
); k
++)
4329 rvec_dec(state
->x
[k
],state
->box
[d
]);
4332 while (cm_new
[d
] < 0)
4334 rvec_inc(cm_new
,state
->box
[d
]);
4335 for(k
=k0
; (k
<k1
); k
++)
4337 rvec_inc(state
->x
[k
],state
->box
[d
]);
4343 copy_rvec(cm_new
,cg_cm
[cg
]);
4345 /* Determine where this cg should go */
4348 for(d
=0; d
<dd
->ndim
; d
++)
4353 flag
|= DD_FLAG_FW(d
);
4359 else if (dev
[dim
] == -1)
4361 flag
|= DD_FLAG_BW(d
);
4363 if (dd
->nc
[dim
] > 2)
4377 if (ncg
[mc
]+1 > comm
->cggl_flag_nalloc
[mc
])
4379 comm
->cggl_flag_nalloc
[mc
] = over_alloc_dd(ncg
[mc
]+1);
4380 srenew(comm
->cggl_flag
[mc
],comm
->cggl_flag_nalloc
[mc
]*DD_CGIBS
);
4382 comm
->cggl_flag
[mc
][ncg
[mc
]*DD_CGIBS
] = dd
->index_gl
[cg
];
4383 /* We store the cg size in the lower 16 bits
4384 * and the place where the charge group should go
4385 * in the next 6 bits. This saves some communication volume.
4387 comm
->cggl_flag
[mc
][ncg
[mc
]*DD_CGIBS
+1] = nrcg
| flag
;
4393 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
4394 inc_nrnb(nrnb
,eNR_RESETX
,dd
->ncg_home
);
4410 /* Make sure the communication buffers are large enough */
4411 for(mc
=0; mc
<dd
->ndim
*2; mc
++)
4413 nvr
= ncg
[mc
] + nat
[mc
]*nvec
;
4414 if (nvr
> comm
->cgcm_state_nalloc
[mc
])
4416 comm
->cgcm_state_nalloc
[mc
] = over_alloc_dd(nvr
);
4417 srenew(comm
->cgcm_state
[mc
],comm
->cgcm_state_nalloc
[mc
]);
4421 /* Recalculating cg_cm might be cheaper than communicating,
4422 * but that could give rise to rounding issues.
4425 compact_and_copy_vec_cg(dd
->ncg_home
,move
,cgindex
,
4426 nvec
,cg_cm
,comm
,bCompact
);
4430 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4431 nvec
,vec
++,state
->x
,comm
,bCompact
);
4434 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4435 nvec
,vec
++,state
->v
,comm
,bCompact
);
4439 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4440 nvec
,vec
++,state
->sd_X
,comm
,bCompact
);
4444 compact_and_copy_vec_at(dd
->ncg_home
,move
,cgindex
,
4445 nvec
,vec
++,state
->cg_p
,comm
,bCompact
);
4450 compact_ind(dd
->ncg_home
,move
,
4451 dd
->index_gl
,dd
->cgindex
,dd
->gatindex
,
4452 dd
->ga2la
,comm
->bLocalCG
,
4457 clear_and_mark_ind(dd
->ncg_home
,move
,
4458 dd
->index_gl
,dd
->cgindex
,dd
->gatindex
,
4459 dd
->ga2la
,comm
->bLocalCG
,
4460 fr
->ns
.grid
->cell_index
);
4463 cginfo_mb
= fr
->cginfo_mb
;
4465 ncg_stay_home
= home_pos_cg
;
4466 for(d
=0; d
<dd
->ndim
; d
++)
4472 for(dir
=0; dir
<(dd
->nc
[dim
]==2 ? 1 : 2); dir
++)
4475 /* Communicate the cg and atom counts */
4480 fprintf(debug
,"Sending ddim %d dir %d: ncg %d nat %d\n",
4481 d
,dir
,sbuf
[0],sbuf
[1]);
4483 dd_sendrecv_int(dd
, d
, dir
, sbuf
, 2, rbuf
, 2);
4485 if ((ncg_recv
+rbuf
[0])*DD_CGIBS
> comm
->nalloc_int
)
4487 comm
->nalloc_int
= over_alloc_dd((ncg_recv
+rbuf
[0])*DD_CGIBS
);
4488 srenew(comm
->buf_int
,comm
->nalloc_int
);
4491 /* Communicate the charge group indices, sizes and flags */
4492 dd_sendrecv_int(dd
, d
, dir
,
4493 comm
->cggl_flag
[cdd
], sbuf
[0]*DD_CGIBS
,
4494 comm
->buf_int
+ncg_recv
*DD_CGIBS
, rbuf
[0]*DD_CGIBS
);
4496 nvs
= ncg
[cdd
] + nat
[cdd
]*nvec
;
4497 i
= rbuf
[0] + rbuf
[1] *nvec
;
4498 vec_rvec_check_alloc(&comm
->vbuf
,nvr
+i
);
4500 /* Communicate cgcm and state */
4501 dd_sendrecv_rvec(dd
, d
, dir
,
4502 comm
->cgcm_state
[cdd
], nvs
,
4503 comm
->vbuf
.v
+nvr
, i
);
4504 ncg_recv
+= rbuf
[0];
4505 nat_recv
+= rbuf
[1];
4509 /* Process the received charge groups */
4511 for(cg
=0; cg
<ncg_recv
; cg
++)
4513 flag
= comm
->buf_int
[cg
*DD_CGIBS
+1];
4517 /* Check which direction this cg should go */
4518 for(d2
=d
+1; (d2
<dd
->ndim
&& mc
==-1); d2
++)
4522 /* The cell boundaries for dimension d2 are not equal
4523 * for each cell row of the lower dimension(s),
4524 * therefore we might need to redetermine where
4525 * this cg should go.
4528 /* If this cg crosses the box boundary in dimension d2
4529 * we can use the communicated flag, so we do not
4530 * have to worry about pbc.
4532 if (!((dd
->ci
[dim2
] == dd
->nc
[dim2
]-1 &&
4533 (flag
& DD_FLAG_FW(d2
))) ||
4534 (dd
->ci
[dim2
] == 0 &&
4535 (flag
& DD_FLAG_BW(d2
)))))
4537 /* Clear the two flags for this dimension */
4538 flag
&= ~(DD_FLAG_FW(d2
) | DD_FLAG_BW(d2
));
4539 /* Determine the location of this cg
4540 * in lattice coordinates
4542 pos_d
= comm
->vbuf
.v
[buf_pos
][dim2
];
4545 for(d3
=dim2
+1; d3
<DIM
; d3
++)
4548 comm
->vbuf
.v
[buf_pos
][d3
]*tcm
[d3
][dim2
];
4551 /* Check of we are not at the box edge.
4552 * pbc is only handled in the first step above,
4553 * but this check could move over pbc while
4554 * the first step did not due to different rounding.
4556 if (pos_d
>= cell_x1
[dim2
] &&
4557 dd
->ci
[dim2
] != dd
->nc
[dim2
]-1)
4559 flag
|= DD_FLAG_FW(d2
);
4561 else if (pos_d
< cell_x0
[dim2
] &&
4564 flag
|= DD_FLAG_BW(d2
);
4566 comm
->buf_int
[cg
*DD_CGIBS
+1] = flag
;
4569 /* Set to which neighboring cell this cg should go */
4570 if (flag
& DD_FLAG_FW(d2
))
4574 else if (flag
& DD_FLAG_BW(d2
))
4576 if (dd
->nc
[dd
->dim
[d2
]] > 2)
4588 nrcg
= flag
& DD_FLAG_NRCG
;
4591 if (home_pos_cg
+1 > dd
->cg_nalloc
)
4593 dd
->cg_nalloc
= over_alloc_dd(home_pos_cg
+1);
4594 srenew(dd
->index_gl
,dd
->cg_nalloc
);
4595 srenew(dd
->cgindex
,dd
->cg_nalloc
+1);
4597 /* Set the global charge group index and size */
4598 dd
->index_gl
[home_pos_cg
] = comm
->buf_int
[cg
*DD_CGIBS
];
4599 dd
->cgindex
[home_pos_cg
+1] = dd
->cgindex
[home_pos_cg
] + nrcg
;
4600 /* Copy the state from the buffer */
4601 if (home_pos_cg
>= fr
->cg_nalloc
)
4603 dd_realloc_fr_cg(fr
,home_pos_cg
+1);
4606 copy_rvec(comm
->vbuf
.v
[buf_pos
++],cg_cm
[home_pos_cg
]);
4607 /* Set the cginfo */
4608 fr
->cginfo
[home_pos_cg
] = ddcginfo(cginfo_mb
,
4609 dd
->index_gl
[home_pos_cg
]);
4612 comm
->bLocalCG
[dd
->index_gl
[home_pos_cg
]] = TRUE
;
4615 if (home_pos_at
+nrcg
> state
->nalloc
)
4617 dd_realloc_state(state
,f
,home_pos_at
+nrcg
);
4619 for(i
=0; i
<nrcg
; i
++)
4621 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4622 state
->x
[home_pos_at
+i
]);
4626 for(i
=0; i
<nrcg
; i
++)
4628 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4629 state
->v
[home_pos_at
+i
]);
4634 for(i
=0; i
<nrcg
; i
++)
4636 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4637 state
->sd_X
[home_pos_at
+i
]);
4642 for(i
=0; i
<nrcg
; i
++)
4644 copy_rvec(comm
->vbuf
.v
[buf_pos
++],
4645 state
->cg_p
[home_pos_at
+i
]);
4649 home_pos_at
+= nrcg
;
4653 /* Reallocate the buffers if necessary */
4654 if (ncg
[mc
]+1 > comm
->cggl_flag_nalloc
[mc
])
4656 comm
->cggl_flag_nalloc
[mc
] = over_alloc_dd(ncg
[mc
]+1);
4657 srenew(comm
->cggl_flag
[mc
],comm
->cggl_flag_nalloc
[mc
]*DD_CGIBS
);
4659 nvr
= ncg
[mc
] + nat
[mc
]*nvec
;
4660 if (nvr
+ 1 + nrcg
*nvec
> comm
->cgcm_state_nalloc
[mc
])
4662 comm
->cgcm_state_nalloc
[mc
] = over_alloc_dd(nvr
+ 1 + nrcg
*nvec
);
4663 srenew(comm
->cgcm_state
[mc
],comm
->cgcm_state_nalloc
[mc
]);
4665 /* Copy from the receive to the send buffers */
4666 memcpy(comm
->cggl_flag
[mc
] + ncg
[mc
]*DD_CGIBS
,
4667 comm
->buf_int
+ cg
*DD_CGIBS
,
4668 DD_CGIBS
*sizeof(int));
4669 memcpy(comm
->cgcm_state
[mc
][nvr
],
4670 comm
->vbuf
.v
[buf_pos
],
4671 (1+nrcg
*nvec
)*sizeof(rvec
));
4672 buf_pos
+= 1 + nrcg
*nvec
;
4679 /* With sorting (!bCompact) the indices are now only partially up to date
4680 * and ncg_home and nat_home are not the real count, since there are
4681 * "holes" in the arrays for the charge groups that moved to neighbors.
4683 dd
->ncg_home
= home_pos_cg
;
4684 dd
->nat_home
= home_pos_at
;
4688 fprintf(debug
,"Finished repartitioning\n");
4691 return ncg_stay_home
;
4694 void dd_cycles_add(gmx_domdec_t
*dd
,float cycles
,int ddCycl
)
4696 dd
->comm
->cycl
[ddCycl
] += cycles
;
4697 dd
->comm
->cycl_n
[ddCycl
]++;
4698 if (cycles
> dd
->comm
->cycl_max
[ddCycl
])
4700 dd
->comm
->cycl_max
[ddCycl
] = cycles
;
4704 static double force_flop_count(t_nrnb
*nrnb
)
4711 for(i
=eNR_NBKERNEL010
; i
<eNR_NBKERNEL_FREE_ENERGY
; i
++)
4713 /* To get closer to the real timings, we half the count
4714 * for the normal loops and again half it for water loops.
4717 if (strstr(name
,"W3") != NULL
|| strstr(name
,"W4") != NULL
)
4719 sum
+= nrnb
->n
[i
]*0.25*cost_nrnb(i
);
4723 sum
+= nrnb
->n
[i
]*0.50*cost_nrnb(i
);
4726 for(i
=eNR_NBKERNEL_FREE_ENERGY
; i
<=eNR_NB14
; i
++)
4729 if (strstr(name
,"W3") != NULL
|| strstr(name
,"W4") != NULL
)
4730 sum
+= nrnb
->n
[i
]*cost_nrnb(i
);
4732 for(i
=eNR_BONDS
; i
<=eNR_WALLS
; i
++)
4734 sum
+= nrnb
->n
[i
]*cost_nrnb(i
);
4740 void dd_force_flop_start(gmx_domdec_t
*dd
,t_nrnb
*nrnb
)
4742 if (dd
->comm
->eFlop
)
4744 dd
->comm
->flop
-= force_flop_count(nrnb
);
4747 void dd_force_flop_stop(gmx_domdec_t
*dd
,t_nrnb
*nrnb
)
4749 if (dd
->comm
->eFlop
)
4751 dd
->comm
->flop
+= force_flop_count(nrnb
);
4756 static void clear_dd_cycle_counts(gmx_domdec_t
*dd
)
4760 for(i
=0; i
<ddCyclNr
; i
++)
4762 dd
->comm
->cycl
[i
] = 0;
4763 dd
->comm
->cycl_n
[i
] = 0;
4764 dd
->comm
->cycl_max
[i
] = 0;
4767 dd
->comm
->flop_n
= 0;
4770 static void get_load_distribution(gmx_domdec_t
*dd
,gmx_wallcycle_t wcycle
)
4772 gmx_domdec_comm_t
*comm
;
4773 gmx_domdec_load_t
*load
;
4774 gmx_domdec_root_t
*root
=NULL
;
4775 int d
,dim
,cid
,i
,pos
;
4776 float cell_frac
=0,sbuf
[DD_NLOAD_MAX
];
4781 fprintf(debug
,"get_load_distribution start\n");
4784 wallcycle_start(wcycle
,ewcDDCOMMLOAD
);
4788 bSepPME
= (dd
->pme_nodeid
>= 0);
4790 for(d
=dd
->ndim
-1; d
>=0; d
--)
4793 /* Check if we participate in the communication in this dimension */
4794 if (d
== dd
->ndim
-1 ||
4795 (dd
->ci
[dd
->dim
[d
+1]]==0 && dd
->ci
[dd
->dim
[dd
->ndim
-1]]==0))
4797 load
= &comm
->load
[d
];
4800 cell_frac
= comm
->cell_f1
[d
] - comm
->cell_f0
[d
];
4803 if (d
== dd
->ndim
-1)
4805 sbuf
[pos
++] = dd_force_load(comm
);
4806 sbuf
[pos
++] = sbuf
[0];
4809 sbuf
[pos
++] = sbuf
[0];
4810 sbuf
[pos
++] = cell_frac
;
4813 sbuf
[pos
++] = comm
->cell_f_max0
[d
];
4814 sbuf
[pos
++] = comm
->cell_f_min1
[d
];
4819 sbuf
[pos
++] = comm
->cycl
[ddCyclPPduringPME
];
4820 sbuf
[pos
++] = comm
->cycl
[ddCyclPME
];
4825 sbuf
[pos
++] = comm
->load
[d
+1].sum
;
4826 sbuf
[pos
++] = comm
->load
[d
+1].max
;
4829 sbuf
[pos
++] = comm
->load
[d
+1].sum_m
;
4830 sbuf
[pos
++] = comm
->load
[d
+1].cvol_min
*cell_frac
;
4831 sbuf
[pos
++] = comm
->load
[d
+1].flags
;
4834 sbuf
[pos
++] = comm
->cell_f_max0
[d
];
4835 sbuf
[pos
++] = comm
->cell_f_min1
[d
];
4840 sbuf
[pos
++] = comm
->load
[d
+1].mdf
;
4841 sbuf
[pos
++] = comm
->load
[d
+1].pme
;
4845 /* Communicate a row in DD direction d.
4846 * The communicators are setup such that the root always has rank 0.
4849 MPI_Gather(sbuf
,load
->nload
*sizeof(float),MPI_BYTE
,
4850 load
->load
,load
->nload
*sizeof(float),MPI_BYTE
,
4851 0,comm
->mpi_comm_load
[d
]);
4853 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
4855 /* We are the root, process this row */
4856 if (comm
->bDynLoadBal
)
4858 root
= comm
->root
[d
];
4868 for(i
=0; i
<dd
->nc
[dim
]; i
++)
4870 load
->sum
+= load
->load
[pos
++];
4871 load
->max
= max(load
->max
,load
->load
[pos
]);
4877 /* This direction could not be load balanced properly,
4878 * therefore we need to use the maximum iso the average load.
4880 load
->sum_m
= max(load
->sum_m
,load
->load
[pos
]);
4884 load
->sum_m
+= load
->load
[pos
];
4887 load
->cvol_min
= min(load
->cvol_min
,load
->load
[pos
]);
4891 load
->flags
= (int)(load
->load
[pos
++] + 0.5);
4895 root
->cell_f_max0
[i
] = load
->load
[pos
++];
4896 root
->cell_f_min1
[i
] = load
->load
[pos
++];
4901 load
->mdf
= max(load
->mdf
,load
->load
[pos
]);
4903 load
->pme
= max(load
->pme
,load
->load
[pos
]);
4907 if (comm
->bDynLoadBal
&& root
->bLimited
)
4909 load
->sum_m
*= dd
->nc
[dim
];
4910 load
->flags
|= (1<<d
);
4918 comm
->nload
+= dd_load_count(comm
);
4919 comm
->load_step
+= comm
->cycl
[ddCyclStep
];
4920 comm
->load_sum
+= comm
->load
[0].sum
;
4921 comm
->load_max
+= comm
->load
[0].max
;
4922 if (comm
->bDynLoadBal
)
4924 for(d
=0; d
<dd
->ndim
; d
++)
4926 if (comm
->load
[0].flags
& (1<<d
))
4928 comm
->load_lim
[d
]++;
4934 comm
->load_mdf
+= comm
->load
[0].mdf
;
4935 comm
->load_pme
+= comm
->load
[0].pme
;
4939 wallcycle_stop(wcycle
,ewcDDCOMMLOAD
);
4943 fprintf(debug
,"get_load_distribution finished\n");
4947 static float dd_force_imb_perf_loss(gmx_domdec_t
*dd
)
4949 /* Return the relative performance loss on the total run time
4950 * due to the force calculation load imbalance.
4952 if (dd
->comm
->nload
> 0)
4955 (dd
->comm
->load_max
*dd
->nnodes
- dd
->comm
->load_sum
)/
4956 (dd
->comm
->load_step
*dd
->nnodes
);
4964 static void print_dd_load_av(FILE *fplog
,gmx_domdec_t
*dd
)
4967 int npp
,npme
,nnodes
,d
,limp
;
4968 float imbal
,pme_f_ratio
,lossf
,lossp
=0;
4970 gmx_domdec_comm_t
*comm
;
4973 if (DDMASTER(dd
) && comm
->nload
> 0)
4976 npme
= (dd
->pme_nodeid
>= 0) ? comm
->npmenodes
: 0;
4977 nnodes
= npp
+ npme
;
4978 imbal
= comm
->load_max
*npp
/comm
->load_sum
- 1;
4979 lossf
= dd_force_imb_perf_loss(dd
);
4980 sprintf(buf
," Average load imbalance: %.1f %%\n",imbal
*100);
4981 fprintf(fplog
,"%s",buf
);
4982 fprintf(stderr
,"\n");
4983 fprintf(stderr
,"%s",buf
);
4984 sprintf(buf
," Part of the total run time spent waiting due to load imbalance: %.1f %%\n",lossf
*100);
4985 fprintf(fplog
,"%s",buf
);
4986 fprintf(stderr
,"%s",buf
);
4988 if (comm
->bDynLoadBal
)
4990 sprintf(buf
," Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
4991 for(d
=0; d
<dd
->ndim
; d
++)
4993 limp
= (200*comm
->load_lim
[d
]+1)/(2*comm
->nload
);
4994 sprintf(buf
+strlen(buf
)," %c %d %%",dim2char(dd
->dim
[d
]),limp
);
5000 sprintf(buf
+strlen(buf
),"\n");
5001 fprintf(fplog
,"%s",buf
);
5002 fprintf(stderr
,"%s",buf
);
5006 pme_f_ratio
= comm
->load_pme
/comm
->load_mdf
;
5007 lossp
= (comm
->load_pme
-comm
->load_mdf
)/comm
->load_step
;
5010 lossp
*= (float)npme
/(float)nnodes
;
5014 lossp
*= (float)npp
/(float)nnodes
;
5016 sprintf(buf
," Average PME mesh/force load: %5.3f\n",pme_f_ratio
);
5017 fprintf(fplog
,"%s",buf
);
5018 fprintf(stderr
,"%s",buf
);
5019 sprintf(buf
," Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n",fabs(lossp
)*100);
5020 fprintf(fplog
,"%s",buf
);
5021 fprintf(stderr
,"%s",buf
);
5023 fprintf(fplog
,"\n");
5024 fprintf(stderr
,"\n");
5026 if (lossf
>= DD_PERF_LOSS
)
5029 "NOTE: %.1f %% performance was lost due to load imbalance\n"
5030 " in the domain decomposition.\n",lossf
*100);
5031 if (!comm
->bDynLoadBal
)
5033 sprintf(buf
+strlen(buf
)," You might want to use dynamic load balancing (option -dlb.)\n");
5037 sprintf(buf
+strlen(buf
)," You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
5039 fprintf(fplog
,"%s\n",buf
);
5040 fprintf(stderr
,"%s\n",buf
);
5042 if (npme
> 0 && fabs(lossp
) >= DD_PERF_LOSS
)
5045 "NOTE: %.1f %% performance was lost because the PME nodes\n"
5046 " had %s work to do than the PP nodes.\n"
5047 " You might want to %s the number of PME nodes\n"
5048 " or %s the cut-off and the grid spacing.\n",
5050 (lossp
< 0) ? "less" : "more",
5051 (lossp
< 0) ? "decrease" : "increase",
5052 (lossp
< 0) ? "decrease" : "increase");
5053 fprintf(fplog
,"%s\n",buf
);
5054 fprintf(stderr
,"%s\n",buf
);
5059 static float dd_vol_min(gmx_domdec_t
*dd
)
5061 return dd
->comm
->load
[0].cvol_min
*dd
->nnodes
;
5064 static bool dd_load_flags(gmx_domdec_t
*dd
)
5066 return dd
->comm
->load
[0].flags
;
5069 static float dd_f_imbal(gmx_domdec_t
*dd
)
5071 return dd
->comm
->load
[0].max
*dd
->nnodes
/dd
->comm
->load
[0].sum
- 1;
5074 static float dd_pme_f_ratio(gmx_domdec_t
*dd
)
5076 return dd
->comm
->load
[0].pme
/dd
->comm
->load
[0].mdf
;
5079 static void dd_print_load(FILE *fplog
,gmx_domdec_t
*dd
,gmx_large_int_t step
)
5084 flags
= dd_load_flags(dd
);
5088 "DD load balancing is limited by minimum cell size in dimension");
5089 for(d
=0; d
<dd
->ndim
; d
++)
5093 fprintf(fplog
," %c",dim2char(dd
->dim
[d
]));
5096 fprintf(fplog
,"\n");
5098 fprintf(fplog
,"DD step %s",gmx_step_str(step
,buf
));
5099 if (dd
->comm
->bDynLoadBal
)
5101 fprintf(fplog
," vol min/aver %5.3f%c",
5102 dd_vol_min(dd
),flags
? '!' : ' ');
5104 fprintf(fplog
," load imb.: force %4.1f%%",dd_f_imbal(dd
)*100);
5105 if (dd
->comm
->cycl_n
[ddCyclPME
])
5107 fprintf(fplog
," pme mesh/force %5.3f",dd_pme_f_ratio(dd
));
5109 fprintf(fplog
,"\n\n");
5112 static void dd_print_load_verbose(gmx_domdec_t
*dd
)
5114 if (dd
->comm
->bDynLoadBal
)
5116 fprintf(stderr
,"vol %4.2f%c ",
5117 dd_vol_min(dd
),dd_load_flags(dd
) ? '!' : ' ');
5119 fprintf(stderr
,"imb F %2d%% ",(int)(dd_f_imbal(dd
)*100+0.5));
5120 if (dd
->comm
->cycl_n
[ddCyclPME
])
5122 fprintf(stderr
,"pme/F %4.2f ",dd_pme_f_ratio(dd
));
5127 static void make_load_communicator(gmx_domdec_t
*dd
,MPI_Group g_all
,
5128 int dim_ind
,ivec loc
)
5134 gmx_domdec_root_t
*root
;
5136 dim
= dd
->dim
[dim_ind
];
5137 copy_ivec(loc
,loc_c
);
5138 snew(rank
,dd
->nc
[dim
]);
5139 for(i
=0; i
<dd
->nc
[dim
]; i
++)
5142 rank
[i
] = dd_index(dd
->nc
,loc_c
);
5144 /* Here we create a new group, that does not necessarily
5145 * include our process. But MPI_Comm_create needs to be
5146 * called by all the processes in the original communicator.
5147 * Calling MPI_Group_free afterwards gives errors, so I assume
5148 * also the group is needed by all processes. (B. Hess)
5150 MPI_Group_incl(g_all
,dd
->nc
[dim
],rank
,&g_row
);
5151 MPI_Comm_create(dd
->mpi_comm_all
,g_row
,&c_row
);
5152 if (c_row
!= MPI_COMM_NULL
)
5154 /* This process is part of the group */
5155 dd
->comm
->mpi_comm_load
[dim_ind
] = c_row
;
5156 if (dd
->comm
->eDLB
!= edlbNO
)
5158 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
5160 /* This is the root process of this row */
5161 snew(dd
->comm
->root
[dim_ind
],1);
5162 root
= dd
->comm
->root
[dim_ind
];
5163 snew(root
->cell_f
,DD_CELL_F_SIZE(dd
,dim_ind
));
5164 snew(root
->old_cell_f
,dd
->nc
[dim
]+1);
5165 snew(root
->bCellMin
,dd
->nc
[dim
]);
5168 snew(root
->cell_f_max0
,dd
->nc
[dim
]);
5169 snew(root
->cell_f_min1
,dd
->nc
[dim
]);
5170 snew(root
->bound_min
,dd
->nc
[dim
]);
5171 snew(root
->bound_max
,dd
->nc
[dim
]);
5173 snew(root
->buf_ncd
,dd
->nc
[dim
]);
5177 /* This is not a root process, we only need to receive cell_f */
5178 snew(dd
->comm
->cell_f_row
,DD_CELL_F_SIZE(dd
,dim_ind
));
5181 if (dd
->ci
[dim
] == dd
->master_ci
[dim
])
5183 snew(dd
->comm
->load
[dim_ind
].load
,dd
->nc
[dim
]*DD_NLOAD_MAX
);
5190 static void make_load_communicators(gmx_domdec_t
*dd
)
5198 fprintf(debug
,"Making load communicators\n");
5200 MPI_Comm_group(dd
->mpi_comm_all
,&g_all
);
5202 snew(dd
->comm
->load
,dd
->ndim
);
5203 snew(dd
->comm
->mpi_comm_load
,dd
->ndim
);
5206 make_load_communicator(dd
,g_all
,0,loc
);
5209 for(i
=0; i
<dd
->nc
[dim0
]; i
++) {
5211 make_load_communicator(dd
,g_all
,1,loc
);
5216 for(i
=0; i
<dd
->nc
[dim0
]; i
++) {
5219 for(j
=0; j
<dd
->nc
[dim1
]; j
++) {
5221 make_load_communicator(dd
,g_all
,2,loc
);
5226 MPI_Group_free(&g_all
);
5229 fprintf(debug
,"Finished making load communicators\n");
5233 void setup_dd_grid(FILE *fplog
,gmx_domdec_t
*dd
)
5239 ivec dd_zp
[DD_MAXIZONE
];
5240 gmx_domdec_zones_t
*zones
;
5241 gmx_domdec_ns_ranges_t
*izone
;
5243 for(d
=0; d
<dd
->ndim
; d
++)
5246 copy_ivec(dd
->ci
,tmp
);
5247 tmp
[dim
] = (tmp
[dim
] + 1) % dd
->nc
[dim
];
5248 dd
->neighbor
[d
][0] = ddcoord2ddnodeid(dd
,tmp
);
5249 copy_ivec(dd
->ci
,tmp
);
5250 tmp
[dim
] = (tmp
[dim
] - 1 + dd
->nc
[dim
]) % dd
->nc
[dim
];
5251 dd
->neighbor
[d
][1] = ddcoord2ddnodeid(dd
,tmp
);
5254 fprintf(debug
,"DD rank %d neighbor ranks in dir %d are + %d - %d\n",
5257 dd
->neighbor
[d
][1]);
5263 fprintf(stderr
,"Making %dD domain decomposition %d x %d x %d\n",
5264 dd
->ndim
,dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
]);
5268 fprintf(fplog
,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
5270 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
],
5271 dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5278 for(i
=0; i
<nzonep
; i
++)
5280 copy_ivec(dd_zp3
[i
],dd_zp
[i
]);
5286 for(i
=0; i
<nzonep
; i
++)
5288 copy_ivec(dd_zp2
[i
],dd_zp
[i
]);
5294 for(i
=0; i
<nzonep
; i
++)
5296 copy_ivec(dd_zp1
[i
],dd_zp
[i
]);
5300 gmx_fatal(FARGS
,"Can only do 1, 2 or 3D domain decomposition");
5305 zones
= &dd
->comm
->zones
;
5307 for(i
=0; i
<nzone
; i
++)
5310 clear_ivec(zones
->shift
[i
]);
5311 for(d
=0; d
<dd
->ndim
; d
++)
5313 zones
->shift
[i
][dd
->dim
[d
]] = dd_zo
[i
][m
++];
5318 for(i
=0; i
<nzone
; i
++)
5320 for(d
=0; d
<DIM
; d
++)
5322 s
[d
] = dd
->ci
[d
] - zones
->shift
[i
][d
];
5327 else if (s
[d
] >= dd
->nc
[d
])
5333 zones
->nizone
= nzonep
;
5334 for(i
=0; i
<zones
->nizone
; i
++)
5336 if (dd_zp
[i
][0] != i
)
5338 gmx_fatal(FARGS
,"Internal inconsistency in the dd grid setup");
5340 izone
= &zones
->izone
[i
];
5341 izone
->j0
= dd_zp
[i
][1];
5342 izone
->j1
= dd_zp
[i
][2];
5343 for(dim
=0; dim
<DIM
; dim
++)
5345 if (dd
->nc
[dim
] == 1)
5347 /* All shifts should be allowed */
5348 izone
->shift0
[dim
] = -1;
5349 izone
->shift1
[dim
] = 1;
5354 izone->shift0[d] = 0;
5355 izone->shift1[d] = 0;
5356 for(j=izone->j0; j<izone->j1; j++) {
5357 if (dd->shift[j][d] > dd->shift[i][d])
5358 izone->shift0[d] = -1;
5359 if (dd->shift[j][d] < dd->shift[i][d])
5360 izone->shift1[d] = 1;
5366 /* Assume the shift are not more than 1 cell */
5367 izone
->shift0
[dim
] = 1;
5368 izone
->shift1
[dim
] = -1;
5369 for(j
=izone
->j0
; j
<izone
->j1
; j
++)
5371 shift_diff
= zones
->shift
[j
][dim
] - zones
->shift
[i
][dim
];
5372 if (shift_diff
< izone
->shift0
[dim
])
5374 izone
->shift0
[dim
] = shift_diff
;
5376 if (shift_diff
> izone
->shift1
[dim
])
5378 izone
->shift1
[dim
] = shift_diff
;
5385 if (dd
->comm
->eDLB
!= edlbNO
)
5387 snew(dd
->comm
->root
,dd
->ndim
);
5390 if (dd
->comm
->bRecordLoad
)
5392 make_load_communicators(dd
);
5396 static void make_pp_communicator(FILE *fplog
,t_commrec
*cr
,int reorder
)
5399 gmx_domdec_comm_t
*comm
;
5410 if (comm
->bCartesianPP
)
5412 /* Set up cartesian communication for the particle-particle part */
5415 fprintf(fplog
,"Will use a Cartesian communicator: %d x %d x %d\n",
5416 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
]);
5419 for(i
=0; i
<DIM
; i
++)
5423 MPI_Cart_create(cr
->mpi_comm_mygroup
,DIM
,dd
->nc
,periods
,reorder
,
5425 /* We overwrite the old communicator with the new cartesian one */
5426 cr
->mpi_comm_mygroup
= comm_cart
;
5429 dd
->mpi_comm_all
= cr
->mpi_comm_mygroup
;
5430 MPI_Comm_rank(dd
->mpi_comm_all
,&dd
->rank
);
5432 if (comm
->bCartesianPP_PME
)
5434 /* Since we want to use the original cartesian setup for sim,
5435 * and not the one after split, we need to make an index.
5437 snew(comm
->ddindex2ddnodeid
,dd
->nnodes
);
5438 comm
->ddindex2ddnodeid
[dd_index(dd
->nc
,dd
->ci
)] = dd
->rank
;
5439 gmx_sumi(dd
->nnodes
,comm
->ddindex2ddnodeid
,cr
);
5440 /* Get the rank of the DD master,
5441 * above we made sure that the master node is a PP node.
5451 MPI_Allreduce(&rank
,&dd
->masterrank
,1,MPI_INT
,MPI_SUM
,dd
->mpi_comm_all
);
5453 else if (comm
->bCartesianPP
)
5455 if (cr
->npmenodes
== 0)
5457 /* The PP communicator is also
5458 * the communicator for this simulation
5460 cr
->mpi_comm_mysim
= cr
->mpi_comm_mygroup
;
5462 cr
->nodeid
= dd
->rank
;
5464 MPI_Cart_coords(dd
->mpi_comm_all
,dd
->rank
,DIM
,dd
->ci
);
5466 /* We need to make an index to go from the coordinates
5467 * to the nodeid of this simulation.
5469 snew(comm
->ddindex2simnodeid
,dd
->nnodes
);
5470 snew(buf
,dd
->nnodes
);
5471 if (cr
->duty
& DUTY_PP
)
5473 buf
[dd_index(dd
->nc
,dd
->ci
)] = cr
->sim_nodeid
;
5475 /* Communicate the ddindex to simulation nodeid index */
5476 MPI_Allreduce(buf
,comm
->ddindex2simnodeid
,dd
->nnodes
,MPI_INT
,MPI_SUM
,
5477 cr
->mpi_comm_mysim
);
5480 /* Determine the master coordinates and rank.
5481 * The DD master should be the same node as the master of this sim.
5483 for(i
=0; i
<dd
->nnodes
; i
++)
5485 if (comm
->ddindex2simnodeid
[i
] == 0)
5487 ddindex2xyz(dd
->nc
,i
,dd
->master_ci
);
5488 MPI_Cart_rank(dd
->mpi_comm_all
,dd
->master_ci
,&dd
->masterrank
);
5493 fprintf(debug
,"The master rank is %d\n",dd
->masterrank
);
5498 /* No Cartesian communicators */
5499 /* We use the rank in dd->comm->all as DD index */
5500 ddindex2xyz(dd
->nc
,dd
->rank
,dd
->ci
);
5501 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
5503 clear_ivec(dd
->master_ci
);
5510 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5511 dd
->rank
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5516 "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
5517 dd
->rank
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5521 static void receive_ddindex2simnodeid(t_commrec
*cr
)
5525 gmx_domdec_comm_t
*comm
;
5532 if (!comm
->bCartesianPP_PME
&& comm
->bCartesianPP
)
5534 snew(comm
->ddindex2simnodeid
,dd
->nnodes
);
5535 snew(buf
,dd
->nnodes
);
5536 if (cr
->duty
& DUTY_PP
)
5538 buf
[dd_index(dd
->nc
,dd
->ci
)] = cr
->sim_nodeid
;
5541 /* Communicate the ddindex to simulation nodeid index */
5542 MPI_Allreduce(buf
,comm
->ddindex2simnodeid
,dd
->nnodes
,MPI_INT
,MPI_SUM
,
5543 cr
->mpi_comm_mysim
);
5550 static gmx_domdec_master_t
*init_gmx_domdec_master_t(gmx_domdec_t
*dd
,
5553 gmx_domdec_master_t
*ma
;
5558 snew(ma
->ncg
,dd
->nnodes
);
5559 snew(ma
->index
,dd
->nnodes
+1);
5561 snew(ma
->nat
,dd
->nnodes
);
5562 snew(ma
->ibuf
,dd
->nnodes
*2);
5563 snew(ma
->cell_x
,DIM
);
5564 for(i
=0; i
<DIM
; i
++)
5566 snew(ma
->cell_x
[i
],dd
->nc
[i
]+1);
5569 if (dd
->nnodes
<= GMX_DD_NNODES_SENDRECV
)
5575 snew(ma
->vbuf
,natoms
);
5581 static void split_communicator(FILE *fplog
,t_commrec
*cr
,int dd_node_order
,
5585 gmx_domdec_comm_t
*comm
;
5596 if (comm
->bCartesianPP
)
5598 for(i
=1; i
<DIM
; i
++)
5600 bDiv
[i
] = ((cr
->npmenodes
*dd
->nc
[i
]) % (dd
->nnodes
) == 0);
5602 if (bDiv
[YY
] || bDiv
[ZZ
])
5604 comm
->bCartesianPP_PME
= TRUE
;
5605 /* If we have 2D PME decomposition, which is always in x+y,
5606 * we stack the PME only nodes in z.
5607 * Otherwise we choose the direction that provides the thinnest slab
5608 * of PME only nodes as this will have the least effect
5609 * on the PP communication.
5610 * But for the PME communication the opposite might be better.
5612 if (bDiv
[ZZ
] && (comm
->npmenodes_minor
> 1 ||
5614 dd
->nc
[YY
] > dd
->nc
[ZZ
]))
5616 comm
->cartpmedim
= ZZ
;
5620 comm
->cartpmedim
= YY
;
5622 comm
->ntot
[comm
->cartpmedim
]
5623 += (cr
->npmenodes
*dd
->nc
[comm
->cartpmedim
])/dd
->nnodes
;
5627 fprintf(fplog
,"#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n",cr
->npmenodes
,dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[XX
],dd
->nc
[ZZ
]);
5629 "Will not use a Cartesian communicator for PP <-> PME\n\n");
5634 if (comm
->bCartesianPP_PME
)
5638 fprintf(fplog
,"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n",comm
->ntot
[XX
],comm
->ntot
[YY
],comm
->ntot
[ZZ
]);
5641 for(i
=0; i
<DIM
; i
++)
5645 MPI_Cart_create(cr
->mpi_comm_mysim
,DIM
,comm
->ntot
,periods
,reorder
,
5648 MPI_Comm_rank(comm_cart
,&rank
);
5649 if (MASTERNODE(cr
) && rank
!= 0)
5651 gmx_fatal(FARGS
,"MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
5654 /* With this assigment we loose the link to the original communicator
5655 * which will usually be MPI_COMM_WORLD, unless have multisim.
5657 cr
->mpi_comm_mysim
= comm_cart
;
5658 cr
->sim_nodeid
= rank
;
5660 MPI_Cart_coords(cr
->mpi_comm_mysim
,cr
->sim_nodeid
,DIM
,dd
->ci
);
5664 fprintf(fplog
,"Cartesian nodeid %d, coordinates %d %d %d\n\n",
5665 cr
->sim_nodeid
,dd
->ci
[XX
],dd
->ci
[YY
],dd
->ci
[ZZ
]);
5668 if (dd
->ci
[comm
->cartpmedim
] < dd
->nc
[comm
->cartpmedim
])
5672 if (cr
->npmenodes
== 0 ||
5673 dd
->ci
[comm
->cartpmedim
] >= dd
->nc
[comm
->cartpmedim
])
5675 cr
->duty
= DUTY_PME
;
5678 /* Split the sim communicator into PP and PME only nodes */
5679 MPI_Comm_split(cr
->mpi_comm_mysim
,
5681 dd_index(comm
->ntot
,dd
->ci
),
5682 &cr
->mpi_comm_mygroup
);
5686 switch (dd_node_order
)
5691 fprintf(fplog
,"Order of the nodes: PP first, PME last\n");
5694 case ddnoINTERLEAVE
:
5695 /* Interleave the PP-only and PME-only nodes,
5696 * as on clusters with dual-core machines this will double
5697 * the communication bandwidth of the PME processes
5698 * and thus speed up the PP <-> PME and inter PME communication.
5702 fprintf(fplog
,"Interleaving PP and PME nodes\n");
5704 comm
->pmenodes
= dd_pmenodes(cr
);
5709 gmx_fatal(FARGS
,"Unknown dd_node_order=%d",dd_node_order
);
5712 if (dd_simnode2pmenode(cr
,cr
->sim_nodeid
) == -1)
5714 cr
->duty
= DUTY_PME
;
5721 /* Split the sim communicator into PP and PME only nodes */
5722 MPI_Comm_split(cr
->mpi_comm_mysim
,
5725 &cr
->mpi_comm_mygroup
);
5726 MPI_Comm_rank(cr
->mpi_comm_mygroup
,&cr
->nodeid
);
5732 fprintf(fplog
,"This is a %s only node\n\n",
5733 (cr
->duty
& DUTY_PP
) ? "particle-particle" : "PME-mesh");
5737 void make_dd_communicators(FILE *fplog
,t_commrec
*cr
,int dd_node_order
)
5740 gmx_domdec_comm_t
*comm
;
5746 copy_ivec(dd
->nc
,comm
->ntot
);
5748 comm
->bCartesianPP
= (dd_node_order
== ddnoCARTESIAN
);
5749 comm
->bCartesianPP_PME
= FALSE
;
5751 /* Reorder the nodes by default. This might change the MPI ranks.
5752 * Real reordering is only supported on very few architectures,
5753 * Blue Gene is one of them.
5755 CartReorder
= (getenv("GMX_NO_CART_REORDER") == NULL
);
5757 if (cr
->npmenodes
> 0)
5759 /* Split the communicator into a PP and PME part */
5760 split_communicator(fplog
,cr
,dd_node_order
,CartReorder
);
5761 if (comm
->bCartesianPP_PME
)
5763 /* We (possibly) reordered the nodes in split_communicator,
5764 * so it is no longer required in make_pp_communicator.
5766 CartReorder
= FALSE
;
5771 /* All nodes do PP and PME */
5773 /* We do not require separate communicators */
5774 cr
->mpi_comm_mygroup
= cr
->mpi_comm_mysim
;
5778 if (cr
->duty
& DUTY_PP
)
5780 /* Copy or make a new PP communicator */
5781 make_pp_communicator(fplog
,cr
,CartReorder
);
5785 receive_ddindex2simnodeid(cr
);
5788 if (!(cr
->duty
& DUTY_PME
))
5790 /* Set up the commnuication to our PME node */
5791 dd
->pme_nodeid
= dd_simnode2pmenode(cr
,cr
->sim_nodeid
);
5792 dd
->pme_receive_vir_ener
= receive_vir_ener(cr
);
5795 fprintf(debug
,"My pme_nodeid %d receive ener %d\n",
5796 dd
->pme_nodeid
,dd
->pme_receive_vir_ener
);
5801 dd
->pme_nodeid
= -1;
5806 dd
->ma
= init_gmx_domdec_master_t(dd
,
5808 comm
->cgs_gl
.index
[comm
->cgs_gl
.nr
]);
5812 static real
*get_slb_frac(FILE *fplog
,const char *dir
,int nc
,const char *size_string
)
5819 if (nc
> 1 && size_string
!= NULL
)
5823 fprintf(fplog
,"Using static load balancing for the %s direction\n",
5828 for (i
=0; i
<nc
; i
++)
5831 sscanf(size_string
,"%lf%n",&dbl
,&n
);
5834 gmx_fatal(FARGS
,"Incorrect or not enough DD cell size entries for direction %s: '%s'",dir
,size_string
);
5843 fprintf(fplog
,"Relative cell sizes:");
5845 for (i
=0; i
<nc
; i
++)
5850 fprintf(fplog
," %5.3f",slb_frac
[i
]);
5855 fprintf(fplog
,"\n");
5862 static int multi_body_bondeds_count(gmx_mtop_t
*mtop
)
5865 gmx_mtop_ilistloop_t iloop
;
5869 iloop
= gmx_mtop_ilistloop_init(mtop
);
5870 while (gmx_mtop_ilistloop_next(iloop
,&il
,&nmol
))
5872 for(ftype
=0; ftype
<F_NRE
; ftype
++)
5874 if ((interaction_function
[ftype
].flags
& IF_BOND
) &&
5877 n
+= nmol
*il
[ftype
].nr
/(1 + NRAL(ftype
));
5885 static int dd_nst_env(FILE *fplog
,const char *env_var
,int def
)
5891 val
= getenv(env_var
);
5894 if (sscanf(val
,"%d",&nst
) <= 0)
5900 fprintf(fplog
,"Found env.var. %s = %s, using value %d\n",
5908 static void dd_warning(t_commrec
*cr
,FILE *fplog
,const char *warn_string
)
5912 fprintf(stderr
,"\n%s\n",warn_string
);
5916 fprintf(fplog
,"\n%s\n",warn_string
);
5920 static void check_dd_restrictions(t_commrec
*cr
,gmx_domdec_t
*dd
,
5921 t_inputrec
*ir
,FILE *fplog
)
5923 if (ir
->ePBC
== epbcSCREW
&&
5924 (dd
->nc
[XX
] == 1 || dd
->nc
[YY
] > 1 || dd
->nc
[ZZ
] > 1))
5926 gmx_fatal(FARGS
,"With pbc=%s can only do domain decomposition in the x-direction",epbc_names
[ir
->ePBC
]);
5929 if (ir
->ns_type
== ensSIMPLE
)
5931 gmx_fatal(FARGS
,"Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
5934 if (ir
->nstlist
== 0)
5936 gmx_fatal(FARGS
,"Domain decomposition does not work with nstlist=0");
5939 if (ir
->comm_mode
== ecmANGULAR
&& ir
->ePBC
!= epbcNONE
)
5941 dd_warning(cr
,fplog
,"comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
5945 static real
average_cellsize_min(gmx_domdec_t
*dd
,gmx_ddbox_t
*ddbox
)
5950 r
= ddbox
->box_size
[XX
];
5951 for(di
=0; di
<dd
->ndim
; di
++)
5954 /* Check using the initial average cell size */
5955 r
= min(r
,ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
]/dd
->nc
[d
]);
5961 static int check_dlb_support(FILE *fplog
,t_commrec
*cr
,
5962 const char *dlb_opt
,bool bRecordLoad
,
5963 unsigned long Flags
,t_inputrec
*ir
)
5971 case 'a': eDLB
= edlbAUTO
; break;
5972 case 'n': eDLB
= edlbNO
; break;
5973 case 'y': eDLB
= edlbYES
; break;
5974 default: gmx_incons("Unknown dlb_opt");
5977 if (Flags
& MD_RERUN
)
5982 if (!EI_DYNAMICS(ir
->eI
))
5984 if (eDLB
== edlbYES
)
5986 sprintf(buf
,"NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n",EI(ir
->eI
));
5987 dd_warning(cr
,fplog
,buf
);
5995 dd_warning(cr
,fplog
,"NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
6000 if (Flags
& MD_REPRODUCIBLE
)
6007 dd_warning(cr
,fplog
,"NOTE: reproducability requested, will not use dynamic load balancing\n");
6011 dd_warning(cr
,fplog
,"WARNING: reproducability requested with dynamic load balancing, the simulation will NOT be binary reproducable\n");
6014 gmx_fatal(FARGS
,"Death horror: undefined case (%d) for load balancing choice",eDLB
);
6022 static void set_dd_dim(FILE *fplog
,gmx_domdec_t
*dd
)
6027 if (getenv("GMX_DD_ORDER_ZYX"))
6029 /* Decomposition order z,y,x */
6032 fprintf(fplog
,"Using domain decomposition order z, y, x\n");
6034 for(dim
=DIM
-1; dim
>=0; dim
--)
6036 if (dd
->nc
[dim
] > 1)
6038 dd
->dim
[dd
->ndim
++] = dim
;
6044 /* Decomposition order x,y,z */
6045 for(dim
=0; dim
<DIM
; dim
++)
6047 if (dd
->nc
[dim
] > 1)
6049 dd
->dim
[dd
->ndim
++] = dim
;
6055 static gmx_domdec_comm_t
*init_dd_comm()
6057 gmx_domdec_comm_t
*comm
;
6061 snew(comm
->cggl_flag
,DIM
*2);
6062 snew(comm
->cgcm_state
,DIM
*2);
6063 for(i
=0; i
<DIM
*2; i
++)
6065 comm
->cggl_flag_nalloc
[i
] = 0;
6066 comm
->cgcm_state_nalloc
[i
] = 0;
6069 comm
->nalloc_int
= 0;
6070 comm
->buf_int
= NULL
;
6072 vec_rvec_init(&comm
->vbuf
);
6074 comm
->n_load_have
= 0;
6075 comm
->n_load_collect
= 0;
6077 for(i
=0; i
<ddnatNR
-ddnatZONE
; i
++)
6079 comm
->sum_nat
[i
] = 0;
6083 comm
->load_step
= 0;
6086 clear_ivec(comm
->load_lim
);
6093 gmx_domdec_t
*init_domain_decomposition(FILE *fplog
,t_commrec
*cr
,
6094 unsigned long Flags
,
6096 real comm_distance_min
,real rconstr
,
6097 const char *dlb_opt
,real dlb_scale
,
6098 const char *sizex
,const char *sizey
,const char *sizez
,
6099 gmx_mtop_t
*mtop
,t_inputrec
*ir
,
6102 int *npme_major
,int *npme_minor
)
6105 gmx_domdec_comm_t
*comm
;
6108 real r_2b
,r_mb
,r_bonded
=-1,r_bonded_limit
=-1,limit
,acs
;
6115 "\nInitializing Domain Decomposition on %d nodes\n",cr
->nnodes
);
6119 dd
->comm
= init_dd_comm();
6121 snew(comm
->cggl_flag
,DIM
*2);
6122 snew(comm
->cgcm_state
,DIM
*2);
6124 dd
->npbcdim
= ePBC2npbcdim(ir
->ePBC
);
6125 dd
->bScrewPBC
= (ir
->ePBC
== epbcSCREW
);
6127 dd
->bSendRecv2
= dd_nst_env(fplog
,"GMX_DD_SENDRECV2",0);
6128 comm
->eFlop
= dd_nst_env(fplog
,"GMX_DLB_FLOP",0);
6129 recload
= dd_nst_env(fplog
,"GMX_DD_LOAD",1);
6130 comm
->nstSortCG
= dd_nst_env(fplog
,"GMX_DD_SORT",1);
6131 comm
->nstDDDump
= dd_nst_env(fplog
,"GMX_DD_DUMP",0);
6132 comm
->nstDDDumpGrid
= dd_nst_env(fplog
,"GMX_DD_DUMP_GRID",0);
6133 comm
->DD_debug
= dd_nst_env(fplog
,"GMX_DD_DEBUG",0);
6135 dd
->pme_recv_f_alloc
= 0;
6136 dd
->pme_recv_f_buf
= NULL
;
6138 if (dd
->bSendRecv2
&& fplog
)
6140 fprintf(fplog
,"Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
6146 fprintf(fplog
,"Will load balance based on FLOP count\n");
6148 if (comm
->eFlop
> 1)
6150 srand(1+cr
->nodeid
);
6152 comm
->bRecordLoad
= TRUE
;
6156 comm
->bRecordLoad
= (wallcycle_have_counter() && recload
> 0);
6160 comm
->eDLB
= check_dlb_support(fplog
,cr
,dlb_opt
,comm
->bRecordLoad
,Flags
,ir
);
6162 comm
->bDynLoadBal
= (comm
->eDLB
== edlbYES
);
6165 fprintf(fplog
,"Dynamic load balancing: %s\n",edlb_names
[comm
->eDLB
]);
6167 dd
->bGridJump
= comm
->bDynLoadBal
;
6169 if (comm
->nstSortCG
)
6173 if (comm
->nstSortCG
== 1)
6175 fprintf(fplog
,"Will sort the charge groups at every domain (re)decomposition\n");
6179 fprintf(fplog
,"Will sort the charge groups every %d steps\n",
6189 fprintf(fplog
,"Will not sort the charge groups\n");
6193 comm
->bInterCGBondeds
= (ncg_mtop(mtop
) > mtop
->mols
.nr
);
6194 if (comm
->bInterCGBondeds
)
6196 comm
->bInterCGMultiBody
= (multi_body_bondeds_count(mtop
) > 0);
6200 comm
->bInterCGMultiBody
= FALSE
;
6203 dd
->bInterCGcons
= inter_charge_group_constraints(mtop
);
6205 if (ir
->rlistlong
== 0)
6207 /* Set the cut-off to some very large value,
6208 * so we don't need if statements everywhere in the code.
6209 * We use sqrt, since the cut-off is squared in some places.
6211 comm
->cutoff
= GMX_CUTOFF_INF
;
6215 comm
->cutoff
= ir
->rlistlong
;
6217 comm
->cutoff_mbody
= 0;
6219 comm
->cellsize_limit
= 0;
6220 comm
->bBondComm
= FALSE
;
6222 if (comm
->bInterCGBondeds
)
6224 if (comm_distance_min
> 0)
6226 comm
->cutoff_mbody
= comm_distance_min
;
6227 if (Flags
& MD_DDBONDCOMM
)
6229 comm
->bBondComm
= (comm
->cutoff_mbody
> comm
->cutoff
);
6233 comm
->cutoff
= max(comm
->cutoff
,comm
->cutoff_mbody
);
6235 r_bonded_limit
= comm
->cutoff_mbody
;
6237 else if (ir
->bPeriodicMols
)
6239 /* Can not easily determine the required cut-off */
6240 dd_warning(cr
,fplog
,"NOTE: Periodic molecules: can not easily determine the required minimum bonded cut-off, using half the non-bonded cut-off\n");
6241 comm
->cutoff_mbody
= comm
->cutoff
/2;
6242 r_bonded_limit
= comm
->cutoff_mbody
;
6248 dd_bonded_cg_distance(fplog
,dd
,mtop
,ir
,x
,box
,
6249 Flags
& MD_DDBONDCHECK
,&r_2b
,&r_mb
);
6251 gmx_bcast(sizeof(r_2b
),&r_2b
,cr
);
6252 gmx_bcast(sizeof(r_mb
),&r_mb
,cr
);
6254 /* We use an initial margin of 10% for the minimum cell size,
6255 * except when we are just below the non-bonded cut-off.
6257 if (Flags
& MD_DDBONDCOMM
)
6259 if (max(r_2b
,r_mb
) > comm
->cutoff
)
6261 r_bonded
= max(r_2b
,r_mb
);
6262 r_bonded_limit
= 1.1*r_bonded
;
6263 comm
->bBondComm
= TRUE
;
6268 r_bonded_limit
= min(1.1*r_bonded
,comm
->cutoff
);
6270 /* We determine cutoff_mbody later */
6274 /* No special bonded communication,
6275 * simply increase the DD cut-off.
6277 r_bonded_limit
= 1.1*max(r_2b
,r_mb
);
6278 comm
->cutoff_mbody
= r_bonded_limit
;
6279 comm
->cutoff
= max(comm
->cutoff
,comm
->cutoff_mbody
);
6282 comm
->cellsize_limit
= max(comm
->cellsize_limit
,r_bonded_limit
);
6286 "Minimum cell size due to bonded interactions: %.3f nm\n",
6287 comm
->cellsize_limit
);
6291 if (dd
->bInterCGcons
&& rconstr
<= 0)
6293 /* There is a cell size limit due to the constraints (P-LINCS) */
6294 rconstr
= constr_r_max(fplog
,mtop
,ir
);
6298 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
6300 if (rconstr
> comm
->cellsize_limit
)
6302 fprintf(fplog
,"This distance will limit the DD cell size, you can override this with -rcon\n");
6306 else if (rconstr
> 0 && fplog
)
6308 /* Here we do not check for dd->bInterCGcons,
6309 * because one can also set a cell size limit for virtual sites only
6310 * and at this point we don't know yet if there are intercg v-sites.
6313 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
6316 comm
->cellsize_limit
= max(comm
->cellsize_limit
,rconstr
);
6318 comm
->cgs_gl
= gmx_mtop_global_cgs(mtop
);
6322 copy_ivec(nc
,dd
->nc
);
6323 set_dd_dim(fplog
,dd
);
6324 set_ddbox_cr(cr
,&dd
->nc
,ir
,box
,&comm
->cgs_gl
,x
,ddbox
);
6326 if (cr
->npmenodes
== -1)
6330 acs
= average_cellsize_min(dd
,ddbox
);
6331 if (acs
< comm
->cellsize_limit
)
6335 fprintf(fplog
,"ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n",acs
,comm
->cellsize_limit
);
6337 gmx_fatal_collective(FARGS
,MASTER(cr
),
6338 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
6339 acs
,comm
->cellsize_limit
);
6344 set_ddbox_cr(cr
,NULL
,ir
,box
,&comm
->cgs_gl
,x
,ddbox
);
6346 /* We need to choose the optimal DD grid and possibly PME nodes */
6347 limit
= dd_choose_grid(fplog
,cr
,dd
,ir
,mtop
,box
,ddbox
,
6348 comm
->eDLB
!=edlbNO
,dlb_scale
,
6349 comm
->cellsize_limit
,comm
->cutoff
,
6350 comm
->bInterCGBondeds
,comm
->bInterCGMultiBody
);
6352 if (dd
->nc
[XX
] == 0)
6354 bC
= (dd
->bInterCGcons
&& rconstr
> r_bonded_limit
);
6355 sprintf(buf
,"Change the number of nodes or mdrun option %s%s%s",
6356 !bC
? "-rdd" : "-rcon",
6357 comm
->eDLB
!=edlbNO
? " or -dds" : "",
6358 bC
? " or your LINCS settings" : "");
6360 gmx_fatal_collective(FARGS
,MASTER(cr
),
6361 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
6363 "Look in the log file for details on the domain decomposition",
6364 cr
->nnodes
-cr
->npmenodes
,limit
,buf
);
6366 set_dd_dim(fplog
,dd
);
6372 "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
6373 dd
->nc
[XX
],dd
->nc
[YY
],dd
->nc
[ZZ
],cr
->npmenodes
);
6376 dd
->nnodes
= dd
->nc
[XX
]*dd
->nc
[YY
]*dd
->nc
[ZZ
];
6377 if (cr
->nnodes
- dd
->nnodes
!= cr
->npmenodes
)
6379 gmx_fatal(FARGS
,"The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
6380 dd
->nnodes
,cr
->nnodes
- cr
->npmenodes
,cr
->nnodes
);
6382 if (cr
->npmenodes
> dd
->nnodes
)
6384 gmx_fatal(FARGS
,"The number of separate PME node (%d) is larger than the number of PP nodes (%d), this is not supported.",cr
->npmenodes
,dd
->nnodes
);
6386 if (cr
->npmenodes
> 0)
6388 comm
->npmenodes
= cr
->npmenodes
;
6392 comm
->npmenodes
= dd
->nnodes
;
6395 if (EEL_PME(ir
->coulombtype
))
6397 if (comm
->npmenodes
> dd
->nc
[XX
] && comm
->npmenodes
% dd
->nc
[XX
] == 0 &&
6398 getenv("GMX_PMEONEDD") == NULL
)
6400 comm
->npmedecompdim
= 2;
6401 comm
->npmenodes_major
= dd
->nc
[XX
];
6402 comm
->npmenodes_minor
= comm
->npmenodes
/comm
->npmenodes_major
;
6406 comm
->npmedecompdim
= 1;
6407 comm
->npmenodes_major
= comm
->npmenodes
;
6408 comm
->npmenodes_minor
= comm
->npmenodes
/comm
->npmenodes_major
;
6412 fprintf(fplog
,"PME domain decomposition: %d x %d x %d\n",
6413 comm
->npmenodes_major
,comm
->npmenodes_minor
,1);
6418 comm
->npmedecompdim
= 0;
6419 comm
->npmenodes_major
= 0;
6420 comm
->npmenodes_minor
= 0;
6423 /* Technically we don't need both of these, but it simplifies code not having to recalculate it */
6424 *npme_major
= comm
->npmenodes_major
;
6425 *npme_minor
= comm
->npmenodes_minor
;
6427 snew(comm
->slb_frac
,DIM
);
6428 if (comm
->eDLB
== edlbNO
)
6430 comm
->slb_frac
[XX
] = get_slb_frac(fplog
,"x",dd
->nc
[XX
],sizex
);
6431 comm
->slb_frac
[YY
] = get_slb_frac(fplog
,"y",dd
->nc
[YY
],sizey
);
6432 comm
->slb_frac
[ZZ
] = get_slb_frac(fplog
,"z",dd
->nc
[ZZ
],sizez
);
6435 if (comm
->bInterCGBondeds
&& comm
->cutoff_mbody
== 0)
6437 if (comm
->bBondComm
|| comm
->eDLB
!= edlbNO
)
6439 /* Set the bonded communication distance to halfway
6440 * the minimum and the maximum,
6441 * since the extra communication cost is nearly zero.
6443 acs
= average_cellsize_min(dd
,ddbox
);
6444 comm
->cutoff_mbody
= 0.5*(r_bonded
+ acs
);
6445 if (comm
->eDLB
!= edlbNO
)
6447 /* Check if this does not limit the scaling */
6448 comm
->cutoff_mbody
= min(comm
->cutoff_mbody
,dlb_scale
*acs
);
6450 if (!comm
->bBondComm
)
6452 /* Without bBondComm do not go beyond the n.b. cut-off */
6453 comm
->cutoff_mbody
= min(comm
->cutoff_mbody
,comm
->cutoff
);
6454 if (comm
->cellsize_limit
>= comm
->cutoff
)
6456 /* We don't loose a lot of efficieny
6457 * when increasing it to the n.b. cut-off.
6458 * It can even be slightly faster, because we need
6459 * less checks for the communication setup.
6461 comm
->cutoff_mbody
= comm
->cutoff
;
6464 /* Check if we did not end up below our original limit */
6465 comm
->cutoff_mbody
= max(comm
->cutoff_mbody
,r_bonded_limit
);
6467 if (comm
->cutoff_mbody
> comm
->cellsize_limit
)
6469 comm
->cellsize_limit
= comm
->cutoff_mbody
;
6472 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
6477 fprintf(debug
,"Bonded atom communication beyond the cut-off: %d\n"
6478 "cellsize limit %f\n",
6479 comm
->bBondComm
,comm
->cellsize_limit
);
6484 check_dd_restrictions(cr
,dd
,ir
,fplog
);
6487 comm
->partition_step
= INT_MIN
;
6490 clear_dd_cycle_counts(dd
);
6495 static void set_dlb_limits(gmx_domdec_t
*dd
)
6500 for(d
=0; d
<dd
->ndim
; d
++)
6502 dd
->comm
->cd
[d
].np
= dd
->comm
->cd
[d
].np_dlb
;
6503 dd
->comm
->cellsize_min
[dd
->dim
[d
]] =
6504 dd
->comm
->cellsize_min_dlb
[dd
->dim
[d
]];
6509 static void turn_on_dlb(FILE *fplog
,t_commrec
*cr
,gmx_large_int_t step
)
6512 gmx_domdec_comm_t
*comm
;
6522 fprintf(fplog
,"At step %s the performance loss due to force load imbalance is %.1f %%\n",gmx_step_str(step
,buf
),dd_force_imb_perf_loss(dd
)*100);
6525 cellsize_min
= comm
->cellsize_min
[dd
->dim
[0]];
6526 for(d
=1; d
<dd
->ndim
; d
++)
6528 cellsize_min
= min(cellsize_min
,comm
->cellsize_min
[dd
->dim
[d
]]);
6531 if (cellsize_min
< comm
->cellsize_limit
*1.05)
6533 dd_warning(cr
,fplog
,"NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
6535 /* Change DLB from "auto" to "no". */
6536 comm
->eDLB
= edlbNO
;
6541 dd_warning(cr
,fplog
,"NOTE: Turning on dynamic load balancing\n");
6542 comm
->bDynLoadBal
= TRUE
;
6543 dd
->bGridJump
= TRUE
;
6547 /* We can set the required cell size info here,
6548 * so we do not need to communicate this.
6549 * The grid is completely uniform.
6551 for(d
=0; d
<dd
->ndim
; d
++)
6555 comm
->load
[d
].sum_m
= comm
->load
[d
].sum
;
6557 nc
= dd
->nc
[dd
->dim
[d
]];
6560 comm
->root
[d
]->cell_f
[i
] = i
/(real
)nc
;
6563 comm
->root
[d
]->cell_f_max0
[i
] = i
/(real
)nc
;
6564 comm
->root
[d
]->cell_f_min1
[i
] = (i
+1)/(real
)nc
;
6567 comm
->root
[d
]->cell_f
[nc
] = 1.0;
6572 static char *init_bLocalCG(gmx_mtop_t
*mtop
)
6577 ncg
= ncg_mtop(mtop
);
6579 for(cg
=0; cg
<ncg
; cg
++)
6581 bLocalCG
[cg
] = FALSE
;
6587 void dd_init_bondeds(FILE *fplog
,
6588 gmx_domdec_t
*dd
,gmx_mtop_t
*mtop
,
6589 gmx_vsite_t
*vsite
,gmx_constr_t constr
,
6590 t_inputrec
*ir
,bool bBCheck
,cginfo_mb_t
*cginfo_mb
)
6592 gmx_domdec_comm_t
*comm
;
6596 dd_make_reverse_top(fplog
,dd
,mtop
,vsite
,constr
,ir
,bBCheck
);
6600 if (comm
->bBondComm
)
6602 /* Communicate atoms beyond the cut-off for bonded interactions */
6605 comm
->cglink
= make_charge_group_links(mtop
,dd
,cginfo_mb
);
6607 comm
->bLocalCG
= init_bLocalCG(mtop
);
6611 /* Only communicate atoms based on cut-off */
6612 comm
->cglink
= NULL
;
6613 comm
->bLocalCG
= NULL
;
6617 static void print_dd_settings(FILE *fplog
,gmx_domdec_t
*dd
,
6619 bool bDynLoadBal
,real dlb_scale
,
6622 gmx_domdec_comm_t
*comm
;
6637 fprintf(fplog
,"The maximum number of communication pulses is:");
6638 for(d
=0; d
<dd
->ndim
; d
++)
6640 fprintf(fplog
," %c %d",dim2char(dd
->dim
[d
]),comm
->cd
[d
].np_dlb
);
6642 fprintf(fplog
,"\n");
6643 fprintf(fplog
,"The minimum size for domain decomposition cells is %.3f nm\n",comm
->cellsize_limit
);
6644 fprintf(fplog
,"The requested allowed shrink of DD cells (option -dds) is: %.2f\n",dlb_scale
);
6645 fprintf(fplog
,"The allowed shrink of domain decomposition cells is:");
6646 for(d
=0; d
<DIM
; d
++)
6650 if (d
>= ddbox
->npbcdim
&& dd
->nc
[d
] == 2)
6657 comm
->cellsize_min_dlb
[d
]/
6658 (ddbox
->box_size
[d
]*ddbox
->skew_fac
[d
]/dd
->nc
[d
]);
6660 fprintf(fplog
," %c %.2f",dim2char(d
),shrink
);
6663 fprintf(fplog
,"\n");
6667 set_dd_cell_sizes_slb(dd
,ddbox
,FALSE
,np
);
6668 fprintf(fplog
,"The initial number of communication pulses is:");
6669 for(d
=0; d
<dd
->ndim
; d
++)
6671 fprintf(fplog
," %c %d",dim2char(dd
->dim
[d
]),np
[dd
->dim
[d
]]);
6673 fprintf(fplog
,"\n");
6674 fprintf(fplog
,"The initial domain decomposition cell size is:");
6675 for(d
=0; d
<DIM
; d
++) {
6678 fprintf(fplog
," %c %.2f nm",
6679 dim2char(d
),dd
->comm
->cellsize_min
[d
]);
6682 fprintf(fplog
,"\n\n");
6685 if (comm
->bInterCGBondeds
|| dd
->vsite_comm
|| dd
->constraint_comm
)
6687 fprintf(fplog
,"The maximum allowed distance for charge groups involved in interactions is:\n");
6688 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6689 "non-bonded interactions","",comm
->cutoff
);
6693 limit
= dd
->comm
->cellsize_limit
;
6697 if (dynamic_dd_box(ddbox
,ir
))
6699 fprintf(fplog
,"(the following are initial values, they could change due to box deformation)\n");
6701 limit
= dd
->comm
->cellsize_min
[XX
];
6702 for(d
=1; d
<DIM
; d
++)
6704 limit
= min(limit
,dd
->comm
->cellsize_min
[d
]);
6708 if (comm
->bInterCGBondeds
)
6710 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6711 "two-body bonded interactions","(-rdd)",
6712 max(comm
->cutoff
,comm
->cutoff_mbody
));
6713 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6714 "multi-body bonded interactions","(-rdd)",
6715 (comm
->bBondComm
|| dd
->bGridJump
) ? comm
->cutoff_mbody
: min(comm
->cutoff
,limit
));
6719 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6720 "virtual site constructions","(-rcon)",limit
);
6722 if (dd
->constraint_comm
)
6724 sprintf(buf
,"atoms separated by up to %d constraints",
6726 fprintf(fplog
,"%40s %-7s %6.3f nm\n",
6727 buf
,"(-rcon)",limit
);
6729 fprintf(fplog
,"\n");
6735 void set_dd_parameters(FILE *fplog
,gmx_domdec_t
*dd
,real dlb_scale
,
6736 t_inputrec
*ir
,t_forcerec
*fr
,
6739 gmx_domdec_comm_t
*comm
;
6740 int d
,dim
,npulse
,npulse_d_max
,npulse_d
;
6747 bNoCutOff
= (ir
->rvdw
== 0 || ir
->rcoulomb
== 0);
6749 if (EEL_PME(ir
->coulombtype
))
6751 init_ddpme(dd
,&comm
->ddpme
[0],0,comm
->npmenodes_major
);
6752 if (comm
->npmedecompdim
>= 2)
6754 init_ddpme(dd
,&comm
->ddpme
[1],1,
6755 comm
->npmenodes
/comm
->npmenodes_major
);
6760 comm
->npmenodes
= 0;
6761 if (dd
->pme_nodeid
>= 0)
6763 gmx_fatal(FARGS
,"Can not have separate PME nodes without PME electrostatics");
6767 /* If each molecule is a single charge group
6768 * or we use domain decomposition for each periodic dimension,
6769 * we do not need to take pbc into account for the bonded interactions.
6771 if (fr
->ePBC
== epbcNONE
|| !comm
->bInterCGBondeds
||
6772 (dd
->nc
[XX
]>1 && dd
->nc
[YY
]>1 && (dd
->nc
[ZZ
]>1 || fr
->ePBC
==epbcXY
)))
6774 fr
->bMolPBC
= FALSE
;
6783 fprintf(debug
,"The DD cut-off is %f\n",comm
->cutoff
);
6785 if (comm
->eDLB
!= edlbNO
)
6787 /* Determine the maximum number of comm. pulses in one dimension */
6789 comm
->cellsize_limit
= max(comm
->cellsize_limit
,comm
->cutoff_mbody
);
6791 /* Determine the maximum required number of grid pulses */
6792 if (comm
->cellsize_limit
>= comm
->cutoff
)
6794 /* Only a single pulse is required */
6797 else if (!bNoCutOff
&& comm
->cellsize_limit
> 0)
6799 /* We round down slightly here to avoid overhead due to the latency
6800 * of extra communication calls when the cut-off
6801 * would be only slightly longer than the cell size.
6802 * Later cellsize_limit is redetermined,
6803 * so we can not miss interactions due to this rounding.
6805 npulse
= (int)(0.96 + comm
->cutoff
/comm
->cellsize_limit
);
6809 /* There is no cell size limit */
6810 npulse
= max(dd
->nc
[XX
]-1,max(dd
->nc
[YY
]-1,dd
->nc
[ZZ
]-1));
6813 if (!bNoCutOff
&& npulse
> 1)
6815 /* See if we can do with less pulses, based on dlb_scale */
6817 for(d
=0; d
<dd
->ndim
; d
++)
6820 npulse_d
= (int)(1 + dd
->nc
[dim
]*comm
->cutoff
6821 /(ddbox
->box_size
[dim
]*ddbox
->skew_fac
[dim
]*dlb_scale
));
6822 npulse_d_max
= max(npulse_d_max
,npulse_d
);
6824 npulse
= min(npulse
,npulse_d_max
);
6827 /* This env var can override npulse */
6828 d
= dd_nst_env(fplog
,"GMX_DD_NPULSE",0);
6835 comm
->bVacDLBNoLimit
= (ir
->ePBC
== epbcNONE
);
6836 for(d
=0; d
<dd
->ndim
; d
++)
6838 comm
->cd
[d
].np_dlb
= min(npulse
,dd
->nc
[dd
->dim
[d
]]-1);
6839 comm
->cd
[d
].np_nalloc
= comm
->cd
[d
].np_dlb
;
6840 snew(comm
->cd
[d
].ind
,comm
->cd
[d
].np_nalloc
);
6841 comm
->maxpulse
= max(comm
->maxpulse
,comm
->cd
[d
].np_dlb
);
6842 if (comm
->cd
[d
].np_dlb
< dd
->nc
[dd
->dim
[d
]]-1)
6844 comm
->bVacDLBNoLimit
= FALSE
;
6848 /* cellsize_limit is set for LINCS in init_domain_decomposition */
6849 if (!comm
->bVacDLBNoLimit
)
6851 comm
->cellsize_limit
= max(comm
->cellsize_limit
,
6852 comm
->cutoff
/comm
->maxpulse
);
6854 comm
->cellsize_limit
= max(comm
->cellsize_limit
,comm
->cutoff_mbody
);
6855 /* Set the minimum cell size for each DD dimension */
6856 for(d
=0; d
<dd
->ndim
; d
++)
6858 if (comm
->bVacDLBNoLimit
||
6859 comm
->cd
[d
].np_dlb
*comm
->cellsize_limit
>= comm
->cutoff
)
6861 comm
->cellsize_min_dlb
[dd
->dim
[d
]] = comm
->cellsize_limit
;
6865 comm
->cellsize_min_dlb
[dd
->dim
[d
]] =
6866 comm
->cutoff
/comm
->cd
[d
].np_dlb
;
6869 if (comm
->cutoff_mbody
<= 0)
6871 comm
->cutoff_mbody
= min(comm
->cutoff
,comm
->cellsize_limit
);
6873 if (comm
->bDynLoadBal
)
6879 print_dd_settings(fplog
,dd
,ir
,comm
->bDynLoadBal
,dlb_scale
,ddbox
);
6880 if (comm
->eDLB
== edlbAUTO
)
6884 fprintf(fplog
,"When dynamic load balancing gets turned on, these settings will change to:\n");
6886 print_dd_settings(fplog
,dd
,ir
,TRUE
,dlb_scale
,ddbox
);
6889 vol_frac
= 1/(real
)dd
->nnodes
+ comm_box_frac(dd
->nc
,comm
->cutoff
,ddbox
);
6892 fprintf(debug
,"Volume fraction for all DD zones: %f\n",vol_frac
);
6894 natoms_tot
= comm
->cgs_gl
.index
[comm
->cgs_gl
.nr
];
6896 dd
->ga2la
= ga2la_init(natoms_tot
,vol_frac
*natoms_tot
);
6899 static void merge_cg_buffers(int ncell
,
6900 gmx_domdec_comm_dim_t
*cd
, int pulse
,
6902 int *index_gl
, int *recv_i
,
6903 rvec
*cg_cm
, rvec
*recv_vr
,
6905 cginfo_mb_t
*cginfo_mb
,int *cginfo
)
6907 gmx_domdec_ind_t
*ind
,*ind_p
;
6908 int p
,cell
,c
,cg
,cg0
,cg1
,cg_gl
,nat
;
6911 ind
= &cd
->ind
[pulse
];
6913 /* First correct the already stored data */
6914 shift
= ind
->nrecv
[ncell
];
6915 for(cell
=ncell
-1; cell
>=0; cell
--)
6917 shift
-= ind
->nrecv
[cell
];
6920 /* Move the cg's present from previous grid pulses */
6921 cg0
= ncg_cell
[ncell
+cell
];
6922 cg1
= ncg_cell
[ncell
+cell
+1];
6923 cgindex
[cg1
+shift
] = cgindex
[cg1
];
6924 for(cg
=cg1
-1; cg
>=cg0
; cg
--)
6926 index_gl
[cg
+shift
] = index_gl
[cg
];
6927 copy_rvec(cg_cm
[cg
],cg_cm
[cg
+shift
]);
6928 cgindex
[cg
+shift
] = cgindex
[cg
];
6929 cginfo
[cg
+shift
] = cginfo
[cg
];
6931 /* Correct the already stored send indices for the shift */
6932 for(p
=1; p
<=pulse
; p
++)
6934 ind_p
= &cd
->ind
[p
];
6936 for(c
=0; c
<cell
; c
++)
6938 cg0
+= ind_p
->nsend
[c
];
6940 cg1
= cg0
+ ind_p
->nsend
[cell
];
6941 for(cg
=cg0
; cg
<cg1
; cg
++)
6943 ind_p
->index
[cg
] += shift
;
6949 /* Merge in the communicated buffers */
6953 for(cell
=0; cell
<ncell
; cell
++)
6955 cg1
= ncg_cell
[ncell
+cell
+1] + shift
;
6958 /* Correct the old cg indices */
6959 for(cg
=ncg_cell
[ncell
+cell
]; cg
<cg1
; cg
++)
6961 cgindex
[cg
+1] += shift_at
;
6964 for(cg
=0; cg
<ind
->nrecv
[cell
]; cg
++)
6966 /* Copy this charge group from the buffer */
6967 index_gl
[cg1
] = recv_i
[cg0
];
6968 copy_rvec(recv_vr
[cg0
],cg_cm
[cg1
]);
6969 /* Add it to the cgindex */
6970 cg_gl
= index_gl
[cg1
];
6971 cginfo
[cg1
] = ddcginfo(cginfo_mb
,cg_gl
);
6972 nat
= GET_CGINFO_NATOMS(cginfo
[cg1
]);
6973 cgindex
[cg1
+1] = cgindex
[cg1
] + nat
;
6978 shift
+= ind
->nrecv
[cell
];
6979 ncg_cell
[ncell
+cell
+1] = cg1
;
6983 static void make_cell2at_index(gmx_domdec_comm_dim_t
*cd
,
6984 int nzone
,int cg0
,const int *cgindex
)
6988 /* Store the atom block boundaries for easy copying of communication buffers
6991 for(zone
=0; zone
<nzone
; zone
++)
6993 for(p
=0; p
<cd
->np
; p
++) {
6994 cd
->ind
[p
].cell2at0
[zone
] = cgindex
[cg
];
6995 cg
+= cd
->ind
[p
].nrecv
[zone
];
6996 cd
->ind
[p
].cell2at1
[zone
] = cgindex
[cg
];
7001 static bool missing_link(t_blocka
*link
,int cg_gl
,char *bLocalCG
)
7007 for(i
=link
->index
[cg_gl
]; i
<link
->index
[cg_gl
+1]; i
++)
7009 if (!bLocalCG
[link
->a
[i
]])
7018 static void setup_dd_communication(gmx_domdec_t
*dd
,
7019 matrix box
,gmx_ddbox_t
*ddbox
,t_forcerec
*fr
)
7021 int dim_ind
,dim
,dim0
,dim1
=-1,dim2
=-1,dimd
,p
,nat_tot
;
7022 int nzone
,nzone_send
,zone
,zonei
,cg0
,cg1
;
7023 int c
,i
,j
,cg
,cg_gl
,nrcg
;
7024 int *zone_cg_range
,pos_cg
,*index_gl
,*cgindex
,*recv_i
;
7025 gmx_domdec_comm_t
*comm
;
7026 gmx_domdec_zones_t
*zones
;
7027 gmx_domdec_comm_dim_t
*cd
;
7028 gmx_domdec_ind_t
*ind
;
7029 cginfo_mb_t
*cginfo_mb
;
7030 bool bBondComm
,bDist2B
,bDistMB
,bDistMB_pulse
,bDistBonded
,bScrew
;
7031 real r_mb
,r_comm2
,r_scomm2
,r_bcomm2
,r
,r_0
,r_1
,r2
,rb2
,r2inc
,inv_ncg
,tric_sh
;
7033 real corner
[DIM
][4],corner_round_0
=0,corner_round_1
[4];
7034 real bcorner
[DIM
],bcorner_round_1
=0;
7036 rvec
*cg_cm
,*normal
,*v_d
,*v_0
=NULL
,*v_1
=NULL
,*recv_vr
;
7037 real skew_fac2_d
,skew_fac_01
;
7043 fprintf(debug
,"Setting up DD communication\n");
7049 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
7051 dim
= dd
->dim
[dim_ind
];
7053 /* Check if we need to use triclinic distances */
7054 tric_dist
[dim_ind
] = 0;
7055 for(i
=0; i
<=dim_ind
; i
++)
7057 if (ddbox
->tric_dir
[dd
->dim
[i
]])
7059 tric_dist
[dim_ind
] = 1;
7064 bBondComm
= comm
->bBondComm
;
7066 /* Do we need to determine extra distances for multi-body bondeds? */
7067 bDistMB
= (comm
->bInterCGMultiBody
&& dd
->bGridJump
&& dd
->ndim
> 1);
7069 /* Do we need to determine extra distances for only two-body bondeds? */
7070 bDist2B
= (bBondComm
&& !bDistMB
);
7072 r_comm2
= sqr(comm
->cutoff
);
7073 r_bcomm2
= sqr(comm
->cutoff_mbody
);
7077 fprintf(debug
,"bBondComm %d, r_bc %f\n",bBondComm
,sqrt(r_bcomm2
));
7080 zones
= &comm
->zones
;
7083 /* The first dimension is equal for all cells */
7084 corner
[0][0] = comm
->cell_x0
[dim0
];
7087 bcorner
[0] = corner
[0][0];
7092 /* This cell row is only seen from the first row */
7093 corner
[1][0] = comm
->cell_x0
[dim1
];
7094 /* All rows can see this row */
7095 corner
[1][1] = comm
->cell_x0
[dim1
];
7098 corner
[1][1] = max(comm
->cell_x0
[dim1
],comm
->zone_d1
[1].mch0
);
7101 /* For the multi-body distance we need the maximum */
7102 bcorner
[1] = max(comm
->cell_x0
[dim1
],comm
->zone_d1
[1].p1_0
);
7105 /* Set the upper-right corner for rounding */
7106 corner_round_0
= comm
->cell_x1
[dim0
];
7113 corner
[2][j
] = comm
->cell_x0
[dim2
];
7117 /* Use the maximum of the i-cells that see a j-cell */
7118 for(i
=0; i
<zones
->nizone
; i
++)
7120 for(j
=zones
->izone
[i
].j0
; j
<zones
->izone
[i
].j1
; j
++)
7126 comm
->zone_d2
[zones
->shift
[i
][dim0
]][zones
->shift
[i
][dim1
]].mch0
);
7132 /* For the multi-body distance we need the maximum */
7133 bcorner
[2] = comm
->cell_x0
[dim2
];
7138 bcorner
[2] = max(bcorner
[2],
7139 comm
->zone_d2
[i
][j
].p1_0
);
7145 /* Set the upper-right corner for rounding */
7146 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
7147 * Only cell (0,0,0) can see cell 7 (1,1,1)
7149 corner_round_1
[0] = comm
->cell_x1
[dim1
];
7150 corner_round_1
[3] = comm
->cell_x1
[dim1
];
7153 corner_round_1
[0] = max(comm
->cell_x1
[dim1
],
7154 comm
->zone_d1
[1].mch1
);
7157 /* For the multi-body distance we need the maximum */
7158 bcorner_round_1
= max(comm
->cell_x1
[dim1
],
7159 comm
->zone_d1
[1].p1_1
);
7165 /* Triclinic stuff */
7166 normal
= ddbox
->normal
;
7170 v_0
= ddbox
->v
[dim0
];
7171 if (ddbox
->tric_dir
[dim0
] && ddbox
->tric_dir
[dim1
])
7173 /* Determine the coupling coefficient for the distances
7174 * to the cell planes along dim0 and dim1 through dim2.
7175 * This is required for correct rounding.
7178 ddbox
->v
[dim0
][dim1
+1][dim0
]*ddbox
->v
[dim1
][dim1
+1][dim1
];
7181 fprintf(debug
,"\nskew_fac_01 %f\n",skew_fac_01
);
7187 v_1
= ddbox
->v
[dim1
];
7190 zone_cg_range
= zones
->cg_range
;
7191 index_gl
= dd
->index_gl
;
7192 cgindex
= dd
->cgindex
;
7193 cginfo_mb
= fr
->cginfo_mb
;
7195 zone_cg_range
[0] = 0;
7196 zone_cg_range
[1] = dd
->ncg_home
;
7197 comm
->zone_ncg1
[0] = dd
->ncg_home
;
7198 pos_cg
= dd
->ncg_home
;
7200 nat_tot
= dd
->nat_home
;
7202 for(dim_ind
=0; dim_ind
<dd
->ndim
; dim_ind
++)
7204 dim
= dd
->dim
[dim_ind
];
7205 cd
= &comm
->cd
[dim_ind
];
7207 if (dim
>= ddbox
->npbcdim
&& dd
->ci
[dim
] == 0)
7209 /* No pbc in this dimension, the first node should not comm. */
7217 bScrew
= (dd
->bScrewPBC
&& dim
== XX
);
7219 v_d
= ddbox
->v
[dim
];
7220 skew_fac2_d
= sqr(ddbox
->skew_fac
[dim
]);
7222 cd
->bInPlace
= TRUE
;
7223 for(p
=0; p
<cd
->np
; p
++)
7225 /* Only atoms communicated in the first pulse are used
7226 * for multi-body bonded interactions or for bBondComm.
7228 bDistBonded
= ((bDistMB
|| bDist2B
) && p
== 0);
7229 bDistMB_pulse
= (bDistMB
&& bDistBonded
);
7234 for(zone
=0; zone
<nzone_send
; zone
++)
7236 if (tric_dist
[dim_ind
] && dim_ind
> 0)
7238 /* Determine slightly more optimized skew_fac's
7240 * This reduces the number of communicated atoms
7241 * by about 10% for 3D DD of rhombic dodecahedra.
7243 for(dimd
=0; dimd
<dim
; dimd
++)
7245 sf2_round
[dimd
] = 1;
7246 if (ddbox
->tric_dir
[dimd
])
7248 for(i
=dd
->dim
[dimd
]+1; i
<DIM
; i
++)
7250 /* If we are shifted in dimension i
7251 * and the cell plane is tilted forward
7252 * in dimension i, skip this coupling.
7254 if (!(zones
->shift
[nzone
+zone
][i
] &&
7255 ddbox
->v
[dimd
][i
][dimd
] >= 0))
7258 sqr(ddbox
->v
[dimd
][i
][dimd
]);
7261 sf2_round
[dimd
] = 1/sf2_round
[dimd
];
7266 zonei
= zone_perm
[dim_ind
][zone
];
7269 /* Here we permutate the zones to obtain a convenient order
7270 * for neighbor searching
7272 cg0
= zone_cg_range
[zonei
];
7273 cg1
= zone_cg_range
[zonei
+1];
7277 /* Look only at the cg's received in the previous grid pulse
7279 cg1
= zone_cg_range
[nzone
+zone
+1];
7280 cg0
= cg1
- cd
->ind
[p
-1].nrecv
[zone
];
7282 ind
->nsend
[zone
] = 0;
7283 for(cg
=cg0
; cg
<cg1
; cg
++)
7287 if (tric_dist
[dim_ind
] == 0)
7289 /* Rectangular direction, easy */
7290 r
= cg_cm
[cg
][dim
] - corner
[dim_ind
][zone
];
7297 r
= cg_cm
[cg
][dim
] - bcorner
[dim_ind
];
7303 /* Rounding gives at most a 16% reduction
7304 * in communicated atoms
7306 if (dim_ind
>= 1 && (zonei
== 1 || zonei
== 2))
7308 r
= cg_cm
[cg
][dim0
] - corner_round_0
;
7309 /* This is the first dimension, so always r >= 0 */
7316 if (dim_ind
== 2 && (zonei
== 2 || zonei
== 3))
7318 r
= cg_cm
[cg
][dim1
] - corner_round_1
[zone
];
7325 r
= cg_cm
[cg
][dim1
] - bcorner_round_1
;
7335 /* Triclinic direction, more complicated */
7338 /* Rounding, conservative as the skew_fac multiplication
7339 * will slightly underestimate the distance.
7341 if (dim_ind
>= 1 && (zonei
== 1 || zonei
== 2))
7343 rn
[dim0
] = cg_cm
[cg
][dim0
] - corner_round_0
;
7344 for(i
=dim0
+1; i
<DIM
; i
++)
7346 rn
[dim0
] -= cg_cm
[cg
][i
]*v_0
[i
][dim0
];
7348 r2
= rn
[dim0
]*rn
[dim0
]*sf2_round
[dim0
];
7351 rb
[dim0
] = rn
[dim0
];
7354 /* Take care that the cell planes along dim0 might not
7355 * be orthogonal to those along dim1 and dim2.
7357 for(i
=1; i
<=dim_ind
; i
++)
7360 if (normal
[dim0
][dimd
] > 0)
7362 rn
[dimd
] -= rn
[dim0
]*normal
[dim0
][dimd
];
7365 rb
[dimd
] -= rb
[dim0
]*normal
[dim0
][dimd
];
7370 if (dim_ind
== 2 && (zonei
== 2 || zonei
== 3))
7372 rn
[dim1
] += cg_cm
[cg
][dim1
] - corner_round_1
[zone
];
7374 for(i
=dim1
+1; i
<DIM
; i
++)
7376 tric_sh
-= cg_cm
[cg
][i
]*v_1
[i
][dim1
];
7378 rn
[dim1
] += tric_sh
;
7381 r2
+= rn
[dim1
]*rn
[dim1
]*sf2_round
[dim1
];
7382 /* Take care of coupling of the distances
7383 * to the planes along dim0 and dim1 through dim2.
7385 r2
-= rn
[dim0
]*rn
[dim1
]*skew_fac_01
;
7386 /* Take care that the cell planes along dim1
7387 * might not be orthogonal to that along dim2.
7389 if (normal
[dim1
][dim2
] > 0)
7391 rn
[dim2
] -= rn
[dim1
]*normal
[dim1
][dim2
];
7397 cg_cm
[cg
][dim1
] - bcorner_round_1
+ tric_sh
;
7400 rb2
+= rb
[dim1
]*rb
[dim1
]*sf2_round
[dim1
];
7401 /* Take care of coupling of the distances
7402 * to the planes along dim0 and dim1 through dim2.
7404 rb2
-= rb
[dim0
]*rb
[dim1
]*skew_fac_01
;
7405 /* Take care that the cell planes along dim1
7406 * might not be orthogonal to that along dim2.
7408 if (normal
[dim1
][dim2
] > 0)
7410 rb
[dim2
] -= rb
[dim1
]*normal
[dim1
][dim2
];
7415 /* The distance along the communication direction */
7416 rn
[dim
] += cg_cm
[cg
][dim
] - corner
[dim_ind
][zone
];
7418 for(i
=dim
+1; i
<DIM
; i
++)
7420 tric_sh
-= cg_cm
[cg
][i
]*v_d
[i
][dim
];
7425 r2
+= rn
[dim
]*rn
[dim
]*skew_fac2_d
;
7426 /* Take care of coupling of the distances
7427 * to the planes along dim0 and dim1 through dim2.
7429 if (dim_ind
== 1 && zonei
== 1)
7431 r2
-= rn
[dim0
]*rn
[dim
]*skew_fac_01
;
7437 rb
[dim
] += cg_cm
[cg
][dim
] - bcorner
[dim_ind
] + tric_sh
;
7440 rb2
+= rb
[dim
]*rb
[dim
]*skew_fac2_d
;
7441 /* Take care of coupling of the distances
7442 * to the planes along dim0 and dim1 through dim2.
7444 if (dim_ind
== 1 && zonei
== 1)
7446 rb2
-= rb
[dim0
]*rb
[dim
]*skew_fac_01
;
7454 ((bDistMB
&& rb2
< r_bcomm2
) ||
7455 (bDist2B
&& r2
< r_bcomm2
)) &&
7457 (GET_CGINFO_BOND_INTER(fr
->cginfo
[cg
]) &&
7458 missing_link(comm
->cglink
,index_gl
[cg
],
7461 /* Make an index to the local charge groups */
7462 if (nsend
+1 > ind
->nalloc
)
7464 ind
->nalloc
= over_alloc_large(nsend
+1);
7465 srenew(ind
->index
,ind
->nalloc
);
7467 if (nsend
+1 > comm
->nalloc_int
)
7469 comm
->nalloc_int
= over_alloc_large(nsend
+1);
7470 srenew(comm
->buf_int
,comm
->nalloc_int
);
7472 ind
->index
[nsend
] = cg
;
7473 comm
->buf_int
[nsend
] = index_gl
[cg
];
7475 vec_rvec_check_alloc(&comm
->vbuf
,nsend
+1);
7477 if (dd
->ci
[dim
] == 0)
7479 /* Correct cg_cm for pbc */
7480 rvec_add(cg_cm
[cg
],box
[dim
],comm
->vbuf
.v
[nsend
]);
7483 comm
->vbuf
.v
[nsend
][YY
] =
7484 box
[YY
][YY
]-comm
->vbuf
.v
[nsend
][YY
];
7485 comm
->vbuf
.v
[nsend
][ZZ
] =
7486 box
[ZZ
][ZZ
]-comm
->vbuf
.v
[nsend
][ZZ
];
7491 copy_rvec(cg_cm
[cg
],comm
->vbuf
.v
[nsend
]);
7494 nat
+= cgindex
[cg
+1] - cgindex
[cg
];
7498 /* Clear the counts in case we do not have pbc */
7499 for(zone
=nzone_send
; zone
<nzone
; zone
++)
7501 ind
->nsend
[zone
] = 0;
7503 ind
->nsend
[nzone
] = nsend
;
7504 ind
->nsend
[nzone
+1] = nat
;
7505 /* Communicate the number of cg's and atoms to receive */
7506 dd_sendrecv_int(dd
, dim_ind
, dddirBackward
,
7507 ind
->nsend
, nzone
+2,
7508 ind
->nrecv
, nzone
+2);
7510 /* The rvec buffer is also required for atom buffers of size nsend
7511 * in dd_move_x and dd_move_f.
7513 vec_rvec_check_alloc(&comm
->vbuf
,ind
->nsend
[nzone
+1]);
7517 /* We can receive in place if only the last zone is not empty */
7518 for(zone
=0; zone
<nzone
-1; zone
++)
7520 if (ind
->nrecv
[zone
] > 0)
7522 cd
->bInPlace
= FALSE
;
7527 /* The int buffer is only required here for the cg indices */
7528 if (ind
->nrecv
[nzone
] > comm
->nalloc_int2
)
7530 comm
->nalloc_int2
= over_alloc_dd(ind
->nrecv
[nzone
]);
7531 srenew(comm
->buf_int2
,comm
->nalloc_int2
);
7533 /* The rvec buffer is also required for atom buffers
7534 * of size nrecv in dd_move_x and dd_move_f.
7536 i
= max(cd
->ind
[0].nrecv
[nzone
+1],ind
->nrecv
[nzone
+1]);
7537 vec_rvec_check_alloc(&comm
->vbuf2
,i
);
7541 /* Make space for the global cg indices */
7542 if (pos_cg
+ ind
->nrecv
[nzone
] > dd
->cg_nalloc
7543 || dd
->cg_nalloc
== 0)
7545 dd
->cg_nalloc
= over_alloc_dd(pos_cg
+ ind
->nrecv
[nzone
]);
7546 srenew(index_gl
,dd
->cg_nalloc
);
7547 srenew(cgindex
,dd
->cg_nalloc
+1);
7549 /* Communicate the global cg indices */
7552 recv_i
= index_gl
+ pos_cg
;
7556 recv_i
= comm
->buf_int2
;
7558 dd_sendrecv_int(dd
, dim_ind
, dddirBackward
,
7559 comm
->buf_int
, nsend
,
7560 recv_i
, ind
->nrecv
[nzone
]);
7562 /* Make space for cg_cm */
7563 if (pos_cg
+ ind
->nrecv
[nzone
] > fr
->cg_nalloc
)
7565 dd_realloc_fr_cg(fr
,pos_cg
+ ind
->nrecv
[nzone
]);
7568 /* Communicate cg_cm */
7571 recv_vr
= cg_cm
+ pos_cg
;
7575 recv_vr
= comm
->vbuf2
.v
;
7577 dd_sendrecv_rvec(dd
, dim_ind
, dddirBackward
,
7578 comm
->vbuf
.v
, nsend
,
7579 recv_vr
, ind
->nrecv
[nzone
]);
7581 /* Make the charge group index */
7584 zone
= (p
== 0 ? 0 : nzone
- 1);
7585 while (zone
< nzone
)
7587 for(cg
=0; cg
<ind
->nrecv
[zone
]; cg
++)
7589 cg_gl
= index_gl
[pos_cg
];
7590 fr
->cginfo
[pos_cg
] = ddcginfo(cginfo_mb
,cg_gl
);
7591 nrcg
= GET_CGINFO_NATOMS(fr
->cginfo
[pos_cg
]);
7592 cgindex
[pos_cg
+1] = cgindex
[pos_cg
] + nrcg
;
7595 /* Update the charge group presence,
7596 * so we can use it in the next pass of the loop.
7598 comm
->bLocalCG
[cg_gl
] = TRUE
;
7604 comm
->zone_ncg1
[nzone
+zone
] = ind
->nrecv
[zone
];
7607 zone_cg_range
[nzone
+zone
] = pos_cg
;
7612 /* This part of the code is never executed with bBondComm. */
7613 merge_cg_buffers(nzone
,cd
,p
,zone_cg_range
,
7614 index_gl
,recv_i
,cg_cm
,recv_vr
,
7615 cgindex
,fr
->cginfo_mb
,fr
->cginfo
);
7616 pos_cg
+= ind
->nrecv
[nzone
];
7618 nat_tot
+= ind
->nrecv
[nzone
+1];
7622 /* Store the atom block for easy copying of communication buffers */
7623 make_cell2at_index(cd
,nzone
,zone_cg_range
[nzone
],cgindex
);
7627 dd
->index_gl
= index_gl
;
7628 dd
->cgindex
= cgindex
;
7630 dd
->ncg_tot
= zone_cg_range
[zones
->n
];
7631 dd
->nat_tot
= nat_tot
;
7632 comm
->nat
[ddnatHOME
] = dd
->nat_home
;
7633 for(i
=ddnatZONE
; i
<ddnatNR
; i
++)
7635 comm
->nat
[i
] = dd
->nat_tot
;
7640 /* We don't need to update cginfo, since that was alrady done above.
7641 * So we pass NULL for the forcerec.
7643 dd_set_cginfo(dd
->index_gl
,dd
->ncg_home
,dd
->ncg_tot
,
7644 NULL
,comm
->bLocalCG
);
7649 fprintf(debug
,"Finished setting up DD communication, zones:");
7650 for(c
=0; c
<zones
->n
; c
++)
7652 fprintf(debug
," %d",zones
->cg_range
[c
+1]-zones
->cg_range
[c
]);
7654 fprintf(debug
,"\n");
7658 static void set_cg_boundaries(gmx_domdec_zones_t
*zones
)
7662 for(c
=0; c
<zones
->nizone
; c
++)
7664 zones
->izone
[c
].cg1
= zones
->cg_range
[c
+1];
7665 zones
->izone
[c
].jcg0
= zones
->cg_range
[zones
->izone
[c
].j0
];
7666 zones
->izone
[c
].jcg1
= zones
->cg_range
[zones
->izone
[c
].j1
];
7670 static int comp_cgsort(const void *a
,const void *b
)
7674 gmx_cgsort_t
*cga
,*cgb
;
7675 cga
= (gmx_cgsort_t
*)a
;
7676 cgb
= (gmx_cgsort_t
*)b
;
7678 comp
= cga
->nsc
- cgb
->nsc
;
7681 comp
= cga
->ind_gl
- cgb
->ind_gl
;
7687 static void order_int_cg(int n
,gmx_cgsort_t
*sort
,
7692 /* Order the data */
7695 buf
[i
] = a
[sort
[i
].ind
];
7698 /* Copy back to the original array */
7705 static void order_vec_cg(int n
,gmx_cgsort_t
*sort
,
7710 /* Order the data */
7713 copy_rvec(v
[sort
[i
].ind
],buf
[i
]);
7716 /* Copy back to the original array */
7719 copy_rvec(buf
[i
],v
[i
]);
7723 static void order_vec_atom(int ncg
,int *cgindex
,gmx_cgsort_t
*sort
,
7726 int a
,atot
,cg
,cg0
,cg1
,i
;
7728 /* Order the data */
7730 for(cg
=0; cg
<ncg
; cg
++)
7732 cg0
= cgindex
[sort
[cg
].ind
];
7733 cg1
= cgindex
[sort
[cg
].ind
+1];
7734 for(i
=cg0
; i
<cg1
; i
++)
7736 copy_rvec(v
[i
],buf
[a
]);
7742 /* Copy back to the original array */
7743 for(a
=0; a
<atot
; a
++)
7745 copy_rvec(buf
[a
],v
[a
]);
7749 static void ordered_sort(int nsort2
,gmx_cgsort_t
*sort2
,
7750 int nsort_new
,gmx_cgsort_t
*sort_new
,
7751 gmx_cgsort_t
*sort1
)
7755 /* The new indices are not very ordered, so we qsort them */
7756 qsort(sort_new
,nsort_new
,sizeof(sort_new
[0]),comp_cgsort
);
7758 /* sort2 is already ordered, so now we can merge the two arrays */
7762 while(i2
< nsort2
|| i_new
< nsort_new
)
7766 sort1
[i1
++] = sort_new
[i_new
++];
7768 else if (i_new
== nsort_new
)
7770 sort1
[i1
++] = sort2
[i2
++];
7772 else if (sort2
[i2
].nsc
< sort_new
[i_new
].nsc
||
7773 (sort2
[i2
].nsc
== sort_new
[i_new
].nsc
&&
7774 sort2
[i2
].ind_gl
< sort_new
[i_new
].ind_gl
))
7776 sort1
[i1
++] = sort2
[i2
++];
7780 sort1
[i1
++] = sort_new
[i_new
++];
7785 static void dd_sort_state(gmx_domdec_t
*dd
,int ePBC
,
7786 rvec
*cgcm
,t_forcerec
*fr
,t_state
*state
,
7789 gmx_domdec_sort_t
*sort
;
7790 gmx_cgsort_t
*cgsort
,*sort_i
;
7791 int ncg_new
,nsort2
,nsort_new
,i
,cell_index
,*ibuf
,cgsize
;
7794 sort
= dd
->comm
->sort
;
7796 if (dd
->ncg_home
> sort
->sort_nalloc
)
7798 sort
->sort_nalloc
= over_alloc_dd(dd
->ncg_home
);
7799 srenew(sort
->sort1
,sort
->sort_nalloc
);
7800 srenew(sort
->sort2
,sort
->sort_nalloc
);
7803 if (ncg_home_old
>= 0)
7805 /* The charge groups that remained in the same ns grid cell
7806 * are completely ordered. So we can sort efficiently by sorting
7807 * the charge groups that did move into the stationary list.
7812 for(i
=0; i
<dd
->ncg_home
; i
++)
7814 /* Check if this cg did not move to another node */
7815 cell_index
= fr
->ns
.grid
->cell_index
[i
];
7816 if (cell_index
!= 4*fr
->ns
.grid
->ncells
)
7818 if (i
>= ncg_home_old
|| cell_index
!= sort
->sort1
[i
].nsc
)
7820 /* This cg is new on this node or moved ns grid cell */
7821 if (nsort_new
>= sort
->sort_new_nalloc
)
7823 sort
->sort_new_nalloc
= over_alloc_dd(nsort_new
+1);
7824 srenew(sort
->sort_new
,sort
->sort_new_nalloc
);
7826 sort_i
= &(sort
->sort_new
[nsort_new
++]);
7830 /* This cg did not move */
7831 sort_i
= &(sort
->sort2
[nsort2
++]);
7833 /* Sort on the ns grid cell indices
7834 * and the global topology index
7836 sort_i
->nsc
= cell_index
;
7837 sort_i
->ind_gl
= dd
->index_gl
[i
];
7844 fprintf(debug
,"ordered sort cgs: stationary %d moved %d\n",
7847 /* Sort efficiently */
7848 ordered_sort(nsort2
,sort
->sort2
,nsort_new
,sort
->sort_new
,sort
->sort1
);
7852 cgsort
= sort
->sort1
;
7854 for(i
=0; i
<dd
->ncg_home
; i
++)
7856 /* Sort on the ns grid cell indices
7857 * and the global topology index
7859 cgsort
[i
].nsc
= fr
->ns
.grid
->cell_index
[i
];
7860 cgsort
[i
].ind_gl
= dd
->index_gl
[i
];
7862 if (cgsort
[i
].nsc
!= 4*fr
->ns
.grid
->ncells
)
7869 fprintf(debug
,"qsort cgs: %d new home %d\n",dd
->ncg_home
,ncg_new
);
7871 /* Determine the order of the charge groups using qsort */
7872 qsort(cgsort
,dd
->ncg_home
,sizeof(cgsort
[0]),comp_cgsort
);
7874 cgsort
= sort
->sort1
;
7876 /* We alloc with the old size, since cgindex is still old */
7877 vec_rvec_check_alloc(&dd
->comm
->vbuf
,dd
->cgindex
[dd
->ncg_home
]);
7878 vbuf
= dd
->comm
->vbuf
.v
;
7880 /* Remove the charge groups which are no longer at home here */
7881 dd
->ncg_home
= ncg_new
;
7883 /* Reorder the state */
7884 for(i
=0; i
<estNR
; i
++)
7886 if (EST_DISTR(i
) && state
->flags
& (1<<i
))
7891 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->x
,vbuf
);
7894 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->v
,vbuf
);
7897 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->sd_X
,vbuf
);
7900 order_vec_atom(dd
->ncg_home
,dd
->cgindex
,cgsort
,state
->cg_p
,vbuf
);
7904 case estDISRE_INITF
:
7905 case estDISRE_RM3TAV
:
7906 case estORIRE_INITF
:
7908 /* No ordering required */
7911 gmx_incons("Unknown state entry encountered in dd_sort_state");
7917 order_vec_cg(dd
->ncg_home
,cgsort
,cgcm
,vbuf
);
7919 if (dd
->ncg_home
+1 > sort
->ibuf_nalloc
)
7921 sort
->ibuf_nalloc
= over_alloc_dd(dd
->ncg_home
+1);
7922 srenew(sort
->ibuf
,sort
->ibuf_nalloc
);
7925 /* Reorder the global cg index */
7926 order_int_cg(dd
->ncg_home
,cgsort
,dd
->index_gl
,ibuf
);
7927 /* Reorder the cginfo */
7928 order_int_cg(dd
->ncg_home
,cgsort
,fr
->cginfo
,ibuf
);
7929 /* Rebuild the local cg index */
7931 for(i
=0; i
<dd
->ncg_home
; i
++)
7933 cgsize
= dd
->cgindex
[cgsort
[i
].ind
+1] - dd
->cgindex
[cgsort
[i
].ind
];
7934 ibuf
[i
+1] = ibuf
[i
] + cgsize
;
7936 for(i
=0; i
<dd
->ncg_home
+1; i
++)
7938 dd
->cgindex
[i
] = ibuf
[i
];
7940 /* Set the home atom number */
7941 dd
->nat_home
= dd
->cgindex
[dd
->ncg_home
];
7943 /* Copy the sorted ns cell indices back to the ns grid struct */
7944 for(i
=0; i
<dd
->ncg_home
; i
++)
7946 fr
->ns
.grid
->cell_index
[i
] = cgsort
[i
].nsc
;
7948 fr
->ns
.grid
->nr
= dd
->ncg_home
;
7951 static void add_dd_statistics(gmx_domdec_t
*dd
)
7953 gmx_domdec_comm_t
*comm
;
7958 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
7960 comm
->sum_nat
[ddnat
-ddnatZONE
] +=
7961 comm
->nat
[ddnat
] - comm
->nat
[ddnat
-1];
7966 void reset_dd_statistics_counters(gmx_domdec_t
*dd
)
7968 gmx_domdec_comm_t
*comm
;
7973 /* Reset all the statistics and counters for total run counting */
7974 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
7976 comm
->sum_nat
[ddnat
-ddnatZONE
] = 0;
7980 comm
->load_step
= 0;
7983 clear_ivec(comm
->load_lim
);
7988 void print_dd_statistics(t_commrec
*cr
,t_inputrec
*ir
,FILE *fplog
)
7990 gmx_domdec_comm_t
*comm
;
7994 comm
= cr
->dd
->comm
;
7996 gmx_sumd(ddnatNR
-ddnatZONE
,comm
->sum_nat
,cr
);
8003 fprintf(fplog
,"\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
8005 for(ddnat
=ddnatZONE
; ddnat
<ddnatNR
; ddnat
++)
8007 av
= comm
->sum_nat
[ddnat
-ddnatZONE
]/comm
->ndecomp
;
8012 " av. #atoms communicated per step for force: %d x %.1f\n",
8016 if (cr
->dd
->vsite_comm
)
8019 " av. #atoms communicated per step for vsites: %d x %.1f\n",
8020 (EEL_PME(ir
->coulombtype
) || ir
->coulombtype
==eelEWALD
) ? 3 : 2,
8025 if (cr
->dd
->constraint_comm
)
8028 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
8029 1 + ir
->nLincsIter
,av
);
8033 gmx_incons(" Unknown type for DD statistics");
8036 fprintf(fplog
,"\n");
8038 if (comm
->bRecordLoad
&& EI_DYNAMICS(ir
->eI
))
8040 print_dd_load_av(fplog
,cr
->dd
);
8044 void dd_partition_system(FILE *fplog
,
8045 gmx_large_int_t step
,
8049 t_state
*state_global
,
8050 gmx_mtop_t
*top_global
,
8052 t_state
*state_local
,
8055 gmx_localtop_t
*top_local
,
8058 gmx_shellfc_t shellfc
,
8059 gmx_constr_t constr
,
8061 gmx_wallcycle_t wcycle
,
8065 gmx_domdec_comm_t
*comm
;
8068 gmx_large_int_t step_pcoupl
;
8069 rvec cell_ns_x0
,cell_ns_x1
;
8070 int i
,j
,n
,cg0
=0,ncg_home_old
=-1,nat_f_novirsum
;
8071 bool bBoxChanged
,bNStGlobalComm
,bDoDLB
,bCheckDLB
,bTurnOnDLB
,bLogLoad
;
8072 bool bRedist
,bSortCG
,bResortAll
;
8080 bBoxChanged
= (bMasterState
|| DEFORM(*ir
));
8081 if (ir
->epc
!= epcNO
)
8083 /* With nstcalcenery > 1 pressure coupling happens.
8084 * one step after calculating the energies.
8085 * Box scaling happens at the end of the MD step,
8086 * after the DD partitioning.
8087 * We therefore have to do DLB in the first partitioning
8088 * after an MD step where P-coupling occured.
8089 * We need to determine the last step in which p-coupling occurred.
8090 * MRS -- need to validate this for vv?
8092 n
= ir
->nstcalcenergy
;
8095 step_pcoupl
= step
- 1;
8099 step_pcoupl
= ((step
- 1)/n
)*n
+ 1;
8101 if (step_pcoupl
>= comm
->partition_step
)
8107 bNStGlobalComm
= (step
>= comm
->partition_step
+ nstglobalcomm
);
8109 if (!comm
->bDynLoadBal
)
8115 /* Should we do dynamic load balacing this step?
8116 * Since it requires (possibly expensive) global communication,
8117 * we might want to do DLB less frequently.
8119 if (bBoxChanged
|| ir
->epc
!= epcNO
)
8121 bDoDLB
= bBoxChanged
;
8125 bDoDLB
= bNStGlobalComm
;
8129 /* Check if we have recorded loads on the nodes */
8130 if (comm
->bRecordLoad
&& dd_load_count(comm
))
8132 if (comm
->eDLB
== edlbAUTO
&& !comm
->bDynLoadBal
)
8134 /* Check if we should use DLB at the second partitioning
8135 * and every 100 partitionings,
8136 * so the extra communication cost is negligible.
8138 n
= max(100,nstglobalcomm
);
8139 bCheckDLB
= (comm
->n_load_collect
== 0 ||
8140 comm
->n_load_have
% n
== n
-1);
8147 /* Print load every nstlog, first and last step to the log file */
8148 bLogLoad
= ((ir
->nstlog
> 0 && step
% ir
->nstlog
== 0) ||
8149 comm
->n_load_collect
== 0 ||
8150 (step
+ ir
->nstlist
> ir
->init_step
+ ir
->nsteps
));
8152 /* Avoid extra communication due to verbose screen output
8153 * when nstglobalcomm is set.
8155 if (bDoDLB
|| bLogLoad
|| bCheckDLB
||
8156 (bVerbose
&& (ir
->nstlist
== 0 || nstglobalcomm
<= ir
->nstlist
)))
8158 get_load_distribution(dd
,wcycle
);
8163 dd_print_load(fplog
,dd
,step
-1);
8167 dd_print_load_verbose(dd
);
8170 comm
->n_load_collect
++;
8173 /* Since the timings are node dependent, the master decides */
8177 (dd_force_imb_perf_loss(dd
) >= DD_PERF_LOSS
);
8180 fprintf(debug
,"step %s, imb loss %f\n",
8181 gmx_step_str(step
,sbuf
),
8182 dd_force_imb_perf_loss(dd
));
8185 dd_bcast(dd
,sizeof(bTurnOnDLB
),&bTurnOnDLB
);
8188 turn_on_dlb(fplog
,cr
,step
);
8193 comm
->n_load_have
++;
8196 cgs_gl
= &comm
->cgs_gl
;
8201 /* Clear the old state */
8202 clear_dd_indices(dd
,0,0);
8204 set_ddbox(dd
,bMasterState
,cr
,ir
,state_global
->box
,
8205 TRUE
,cgs_gl
,state_global
->x
,&ddbox
);
8207 get_cg_distribution(fplog
,step
,dd
,cgs_gl
,
8208 state_global
->box
,&ddbox
,state_global
->x
);
8210 dd_distribute_state(dd
,cgs_gl
,
8211 state_global
,state_local
,f
);
8213 dd_make_local_cgs(dd
,&top_local
->cgs
);
8215 if (dd
->ncg_home
> fr
->cg_nalloc
)
8217 dd_realloc_fr_cg(fr
,dd
->ncg_home
);
8219 calc_cgcm(fplog
,0,dd
->ncg_home
,
8220 &top_local
->cgs
,state_local
->x
,fr
->cg_cm
);
8222 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
8224 dd_set_cginfo(dd
->index_gl
,0,dd
->ncg_home
,fr
,comm
->bLocalCG
);
8228 else if (state_local
->ddp_count
!= dd
->ddp_count
)
8230 if (state_local
->ddp_count
> dd
->ddp_count
)
8232 gmx_fatal(FARGS
,"Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)",state_local
->ddp_count
,dd
->ddp_count
);
8235 if (state_local
->ddp_count_cg_gl
!= state_local
->ddp_count
)
8237 gmx_fatal(FARGS
,"Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)",state_local
->ddp_count_cg_gl
,state_local
->ddp_count
);
8240 /* Clear the old state */
8241 clear_dd_indices(dd
,0,0);
8243 /* Build the new indices */
8244 rebuild_cgindex(dd
,cgs_gl
->index
,state_local
);
8245 make_dd_indices(dd
,cgs_gl
->index
,0);
8247 /* Redetermine the cg COMs */
8248 calc_cgcm(fplog
,0,dd
->ncg_home
,
8249 &top_local
->cgs
,state_local
->x
,fr
->cg_cm
);
8251 inc_nrnb(nrnb
,eNR_CGCM
,dd
->nat_home
);
8253 dd_set_cginfo(dd
->index_gl
,0,dd
->ncg_home
,fr
,comm
->bLocalCG
);
8255 set_ddbox(dd
,bMasterState
,cr
,ir
,state_local
->box
,
8256 TRUE
,&top_local
->cgs
,state_local
->x
,&ddbox
);
8258 bRedist
= comm
->bDynLoadBal
;
8262 /* We have the full state, only redistribute the cgs */
8264 /* Clear the non-home indices */
8265 clear_dd_indices(dd
,dd
->ncg_home
,dd
->nat_home
);
8267 /* Avoid global communication for dim's without pbc and -gcom */
8268 if (!bNStGlobalComm
)
8270 copy_rvec(comm
->box0
,ddbox
.box0
);
8271 copy_rvec(comm
->box_size
,ddbox
.box_size
);
8273 set_ddbox(dd
,bMasterState
,cr
,ir
,state_local
->box
,
8274 bNStGlobalComm
,&top_local
->cgs
,state_local
->x
,&ddbox
);
8279 /* For dim's without pbc and -gcom */
8280 copy_rvec(ddbox
.box0
,comm
->box0
);
8281 copy_rvec(ddbox
.box_size
,comm
->box_size
);
8283 set_dd_cell_sizes(dd
,&ddbox
,dynamic_dd_box(&ddbox
,ir
),bMasterState
,bDoDLB
,
8286 if (comm
->nstDDDumpGrid
> 0 && step
% comm
->nstDDDumpGrid
== 0)
8288 write_dd_grid_pdb("dd_grid",step
,dd
,state_local
->box
,&ddbox
);
8291 /* Check if we should sort the charge groups */
8292 if (comm
->nstSortCG
> 0)
8294 bSortCG
= (bMasterState
||
8295 (bRedist
&& (step
% comm
->nstSortCG
== 0)));
8302 ncg_home_old
= dd
->ncg_home
;
8306 cg0
= dd_redistribute_cg(fplog
,step
,dd
,ddbox
.tric_dir
,
8307 state_local
,f
,fr
,mdatoms
,
8311 get_nsgrid_boundaries(fr
->ns
.grid
,dd
,
8312 state_local
->box
,&ddbox
,&comm
->cell_x0
,&comm
->cell_x1
,
8313 dd
->ncg_home
,fr
->cg_cm
,
8314 cell_ns_x0
,cell_ns_x1
,&grid_density
);
8318 comm_dd_ns_cell_sizes(dd
,&ddbox
,cell_ns_x0
,cell_ns_x1
,step
);
8321 copy_ivec(fr
->ns
.grid
->n
,ncells_old
);
8322 grid_first(fplog
,fr
->ns
.grid
,dd
,&ddbox
,fr
->ePBC
,
8323 state_local
->box
,cell_ns_x0
,cell_ns_x1
,
8324 fr
->rlistlong
,grid_density
);
8325 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
8326 copy_ivec(ddbox
.tric_dir
,comm
->tric_dir
);
8330 /* Sort the state on charge group position.
8331 * This enables exact restarts from this step.
8332 * It also improves performance by about 15% with larger numbers
8333 * of atoms per node.
8336 /* Fill the ns grid with the home cell,
8337 * so we can sort with the indices.
8339 set_zones_ncg_home(dd
);
8340 fill_grid(fplog
,&comm
->zones
,fr
->ns
.grid
,dd
->ncg_home
,
8341 0,dd
->ncg_home
,fr
->cg_cm
);
8343 /* Check if we can user the old order and ns grid cell indices
8344 * of the charge groups to sort the charge groups efficiently.
8346 bResortAll
= (bMasterState
||
8347 fr
->ns
.grid
->n
[XX
] != ncells_old
[XX
] ||
8348 fr
->ns
.grid
->n
[YY
] != ncells_old
[YY
] ||
8349 fr
->ns
.grid
->n
[ZZ
] != ncells_old
[ZZ
]);
8353 fprintf(debug
,"Step %s, sorting the %d home charge groups\n",
8354 gmx_step_str(step
,sbuf
),dd
->ncg_home
);
8356 dd_sort_state(dd
,ir
->ePBC
,fr
->cg_cm
,fr
,state_local
,
8357 bResortAll
? -1 : ncg_home_old
);
8358 /* Rebuild all the indices */
8360 ga2la_clear(dd
->ga2la
);
8363 /* Setup up the communication and communicate the coordinates */
8364 setup_dd_communication(dd
,state_local
->box
,&ddbox
,fr
);
8366 /* Set the indices */
8367 make_dd_indices(dd
,cgs_gl
->index
,cg0
);
8369 /* Set the charge group boundaries for neighbor searching */
8370 set_cg_boundaries(&comm
->zones
);
8373 write_dd_pdb("dd_home",step,"dump",top_global,cr,
8374 -1,state_local->x,state_local->box);
8377 /* Extract a local topology from the global topology */
8378 for(i
=0; i
<dd
->ndim
; i
++)
8380 np
[dd
->dim
[i
]] = comm
->cd
[i
].np
;
8382 dd_make_local_top(fplog
,dd
,&comm
->zones
,dd
->npbcdim
,state_local
->box
,
8383 comm
->cellsize_min
,np
,
8384 fr
,vsite
,top_global
,top_local
);
8386 /* Set up the special atom communication */
8387 n
= comm
->nat
[ddnatZONE
];
8388 for(i
=ddnatZONE
+1; i
<ddnatNR
; i
++)
8393 if (vsite
&& vsite
->n_intercg_vsite
)
8395 n
= dd_make_local_vsites(dd
,n
,top_local
->idef
.il
);
8399 if (dd
->bInterCGcons
)
8401 /* Only for inter-cg constraints we need special code */
8402 n
= dd_make_local_constraints(dd
,n
,top_global
,
8403 constr
,ir
->nProjOrder
,
8404 &top_local
->idef
.il
[F_CONSTR
]);
8408 gmx_incons("Unknown special atom type setup");
8413 /* Make space for the extra coordinates for virtual site
8414 * or constraint communication.
8416 state_local
->natoms
= comm
->nat
[ddnatNR
-1];
8417 if (state_local
->natoms
> state_local
->nalloc
)
8419 dd_realloc_state(state_local
,f
,state_local
->natoms
);
8422 if (fr
->bF_NoVirSum
)
8424 if (vsite
&& vsite
->n_intercg_vsite
)
8426 nat_f_novirsum
= comm
->nat
[ddnatVSITE
];
8430 if (EEL_FULL(ir
->coulombtype
) && dd
->n_intercg_excl
> 0)
8432 nat_f_novirsum
= dd
->nat_tot
;
8436 nat_f_novirsum
= dd
->nat_home
;
8445 /* Set the number of atoms required for the force calculation.
8446 * Forces need to be constrained when using a twin-range setup
8447 * or with energy minimization. For simple simulations we could
8448 * avoid some allocation, zeroing and copying, but this is
8449 * probably not worth the complications ande checking.
8451 forcerec_set_ranges(fr
,dd
->ncg_home
,dd
->ncg_tot
,
8452 comm
->nat
[ddnatCON
],nat_f_novirsum
);
8454 /* We make the all mdatoms up to nat_tot_con.
8455 * We could save some work by only setting invmass
8456 * between nat_tot and nat_tot_con.
8458 /* This call also sets the new number of home particles to dd->nat_home */
8459 atoms2md(top_global
,ir
,
8460 comm
->nat
[ddnatCON
],dd
->gatindex
,0,dd
->nat_home
,mdatoms
);
8462 /* Now we have the charges we can sort the FE interactions */
8463 dd_sort_local_top(dd
,mdatoms
,top_local
);
8467 /* Make the local shell stuff, currently no communication is done */
8468 make_local_shells(cr
,mdatoms
,shellfc
);
8471 if (ir
->implicit_solvent
)
8473 make_local_gb(cr
,fr
->born
,ir
->gb_algorithm
);
8476 if (!(cr
->duty
& DUTY_PME
))
8478 /* Send the charges to our PME only node */
8479 gmx_pme_send_q(cr
,mdatoms
->nChargePerturbed
,
8480 mdatoms
->chargeA
,mdatoms
->chargeB
,
8481 comm
->ddpme
[0].maxshift
,comm
->ddpme
[1].maxshift
);
8486 set_constraints(constr
,top_local
,ir
,mdatoms
,cr
);
8489 if (ir
->ePull
!= epullNO
)
8491 /* Update the local pull groups */
8492 dd_make_local_pull_groups(dd
,ir
->pull
,mdatoms
);
8495 add_dd_statistics(dd
);
8497 /* Make sure we only count the cycles for this DD partitioning */
8498 clear_dd_cycle_counts(dd
);
8500 /* Because the order of the atoms might have changed since
8501 * the last vsite construction, we need to communicate the constructing
8502 * atom coordinates again (for spreading the forces this MD step).
8504 dd_move_x_vsites(dd
,state_local
->box
,state_local
->x
);
8506 if (comm
->nstDDDump
> 0 && step
% comm
->nstDDDump
== 0)
8508 dd_move_x(dd
,state_local
->box
,state_local
->x
);
8509 write_dd_pdb("dd_dump",step
,"dump",top_global
,cr
,
8510 -1,state_local
->x
,state_local
->box
);
8513 /* Store the partitioning step */
8514 comm
->partition_step
= step
;
8516 /* Increase the DD partitioning counter */
8518 /* The state currently matches this DD partitioning count, store it */
8519 state_local
->ddp_count
= dd
->ddp_count
;
8522 /* The DD master node knows the complete cg distribution,
8523 * store the count so we can possibly skip the cg info communication.
8525 comm
->master_cg_ddp_count
= (bSortCG
? 0 : dd
->ddp_count
);
8528 if (comm
->DD_debug
> 0)
8530 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
8531 check_index_consistency(dd
,top_global
->natoms
,ncg_mtop(top_global
),
8532 "after partitioning");