Merge branch 'master' of git://git.gromacs.org/gromacs
[gromacs/adressmacs.git] / src / mdlib / partdec.c
blob6509bd529d3e11cb69550c3a10b43e08ccc1b1d8
1 /*
2 *
3 * This source code is part of
4 *
5 * G R O M A C S
6 *
7 * GROningen MAchine for Chemical Simulations
8 *
9 * VERSION 3.2.0
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
32 * And Hey:
33 * GROwing Monsters And Cloning Shrimps
35 /* This file is completely threadsafe - keep it that way! */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
40 #include <math.h>
42 #include "sysstuff.h"
43 #include "typedefs.h"
44 #include "smalloc.h"
45 #include "invblock.h"
46 #include "macros.h"
47 #include "main.h"
48 #include "ns.h"
49 #include "partdec.h"
50 #include "splitter.h"
51 #include "gmx_random.h"
52 #include "mtop_util.h"
53 #include "mvdata.h"
54 #include "vec.h"
56 typedef struct gmx_partdec_constraint
58 int left_range_receive;
59 int right_range_receive;
60 int left_range_send;
61 int right_range_send;
62 int nconstraints;
63 int * nlocalatoms;
64 rvec * sendbuf;
65 rvec * recvbuf;
67 gmx_partdec_constraint_t;
70 typedef struct gmx_partdec {
71 int neighbor[2]; /* The nodeids of left and right neighb */
72 int *cgindex; /* The charge group boundaries, */
73 /* size nnodes+1, */
74 /* only allocated with particle decomp. */
75 int *index; /* The home particle boundaries, */
76 /* size nnodes+1, */
77 /* only allocated with particle decomp. */
78 int shift,bshift; /* Coordinates are shifted left for */
79 /* 'shift' systolic pulses, and right */
80 /* for 'bshift' pulses. Forces are */
81 /* shifted right for 'shift' pulses */
82 /* and left for 'bshift' pulses */
83 /* This way is not necessary to shift */
84 /* the coordinates over the entire ring */
85 rvec *vbuf; /* Buffer for summing the forces */
86 #ifdef GMX_MPI
87 MPI_Request mpi_req_rx; /* MPI reqs for async transfers */
88 MPI_Request mpi_req_tx;
89 #endif
90 gmx_partdec_constraint_t * constraints;
91 } gmx_partdec_t;
94 void gmx_tx(const t_commrec *cr,int dir,void *buf,int bufsize)
96 #ifndef GMX_MPI
97 gmx_call("gmx_tx");
98 #else
99 int nodeid;
100 int tag,flag;
101 MPI_Status status;
103 nodeid = cr->pd->neighbor[dir];
105 #ifdef DEBUG
106 fprintf(stderr,"gmx_tx: nodeid=%d, buf=%x, bufsize=%d\n",
107 nodeid,buf,bufsize);
108 #endif
109 #ifdef MPI_TEST
110 /* workaround for crashes encountered with MPI on IRIX 6.5 */
111 if (cr->pd->mpi_req_tx != MPI_REQUEST_NULL) {
112 MPI_Test(&cr->pd->mpi_req_tx,&flag,&status);
113 if (flag==FALSE) {
114 fprintf(stdlog,"gmx_tx called before previous send was complete: nodeid=%d, buf=%x, bufsize=%d\n",
115 nodeid,buf,bufsize);
116 gmx_tx_wait(nodeid);
119 #endif
120 tag = 0;
121 if (MPI_Isend(buf,bufsize,MPI_BYTE,RANK(cr,nodeid),tag,cr->mpi_comm_mygroup,&cr->pd->mpi_req_tx) != 0)
122 gmx_comm("MPI_Isend Failed");
123 #endif
126 void gmx_tx_wait(const t_commrec *cr, int dir)
128 #ifndef GMX_MPI
129 gmx_call("gmx_tx_wait");
130 #else
131 MPI_Status status;
132 int mpi_result;
134 if ((mpi_result=MPI_Wait(&cr->pd->mpi_req_tx,&status)) != 0)
135 gmx_fatal(FARGS,"MPI_Wait: result=%d",mpi_result);
136 #endif
139 void gmx_rx(const t_commrec *cr,int dir,void *buf,int bufsize)
141 #ifndef GMX_MPI
142 gmx_call("gmx_rx");
143 #else
144 int nodeid;
145 int tag;
147 nodeid = cr->pd->neighbor[dir];
148 #ifdef DEBUG
149 fprintf(stderr,"gmx_rx: nodeid=%d, buf=%x, bufsize=%d\n",
150 nodeid,buf,bufsize);
151 #endif
152 tag = 0;
153 if (MPI_Irecv( buf, bufsize, MPI_BYTE, RANK(cr,nodeid), tag, cr->mpi_comm_mygroup, &cr->pd->mpi_req_rx) != 0 )
154 gmx_comm("MPI_Recv Failed");
155 #endif
158 void gmx_rx_wait(const t_commrec *cr, int nodeid)
160 #ifndef GMX_MPI
161 gmx_call("gmx_rx_wait");
162 #else
163 MPI_Status status;
164 int mpi_result;
166 if ((mpi_result=MPI_Wait(&cr->pd->mpi_req_rx,&status)) != 0)
167 gmx_fatal(FARGS,"MPI_Wait: result=%d",mpi_result);
168 #endif
171 void gmx_tx_rx_real(const t_commrec *cr,
172 int send_dir,real *send_buf,int send_bufsize,
173 int recv_dir,real *recv_buf,int recv_bufsize)
175 #ifndef GMX_MPI
176 gmx_call("gmx_tx_rx_real");
177 #else
178 int send_nodeid,recv_nodeid;
179 int tx_tag = 0,rx_tag = 0;
180 MPI_Status stat;
182 send_nodeid = cr->pd->neighbor[send_dir];
183 recv_nodeid = cr->pd->neighbor[recv_dir];
185 #ifdef GMX_DOUBLE
186 #define mpi_type MPI_DOUBLE
187 #else
188 #define mpi_type MPI_FLOAT
189 #endif
191 if (send_bufsize > 0 && recv_bufsize > 0) {
192 MPI_Sendrecv(send_buf,send_bufsize,mpi_type,RANK(cr,send_nodeid),tx_tag,
193 recv_buf,recv_bufsize,mpi_type,RANK(cr,recv_nodeid),rx_tag,
194 cr->mpi_comm_mygroup,&stat);
195 } else if (send_bufsize > 0) {
196 MPI_Send(send_buf,send_bufsize,mpi_type,RANK(cr,send_nodeid),tx_tag,
197 cr->mpi_comm_mygroup);
198 } else if (recv_bufsize > 0) {
199 MPI_Recv(recv_buf,recv_bufsize,mpi_type,RANK(cr,recv_nodeid),rx_tag,
200 cr->mpi_comm_mygroup,&stat);
202 #undef mpi_type
203 #endif
207 void gmx_tx_rx_void(const t_commrec *cr,
208 int send_dir,void *send_buf,int send_bufsize,
209 int recv_dir,void *recv_buf,int recv_bufsize)
211 #ifndef GMX_MPI
212 gmx_call("gmx_tx_rx_void");
213 #else
214 int send_nodeid,recv_nodeid;
215 int tx_tag = 0,rx_tag = 0;
216 MPI_Status stat;
218 send_nodeid = cr->pd->neighbor[send_dir];
219 recv_nodeid = cr->pd->neighbor[recv_dir];
222 MPI_Sendrecv(send_buf,send_bufsize,MPI_BYTE,RANK(cr,send_nodeid),tx_tag,
223 recv_buf,recv_bufsize,MPI_BYTE,RANK(cr,recv_nodeid),rx_tag,
224 cr->mpi_comm_mygroup,&stat);
226 #endif
230 /*void gmx_wait(int dir_send,int dir_recv)*/
232 void gmx_wait(const t_commrec *cr, int dir_send,int dir_recv)
234 #ifndef GMX_MPI
235 gmx_call("gmx_wait");
236 #else
237 gmx_tx_wait(cr, dir_send);
238 gmx_rx_wait(cr, dir_recv);
239 #endif
242 static void set_left_right(t_commrec *cr)
244 cr->pd->neighbor[GMX_LEFT] = (cr->nnodes + cr->nodeid - 1) % cr->nnodes;
245 cr->pd->neighbor[GMX_RIGHT] = (cr->nodeid + 1) % cr->nnodes;
248 void pd_move_f(const t_commrec *cr,rvec f[],t_nrnb *nrnb)
250 move_f(NULL,cr,GMX_LEFT,GMX_RIGHT,f,cr->pd->vbuf,nrnb);
253 int *pd_cgindex(const t_commrec *cr)
255 return cr->pd->cgindex;
258 int *pd_index(const t_commrec *cr)
260 return cr->pd->index;
263 int pd_shift(const t_commrec *cr)
265 return cr->pd->shift;
268 int pd_bshift(const t_commrec *cr)
270 return cr->pd->bshift;
273 void pd_cg_range(const t_commrec *cr,int *cg0,int *cg1)
275 *cg0 = cr->pd->cgindex[cr->nodeid];
276 *cg1 = cr->pd->cgindex[cr->nodeid+1];
279 void pd_at_range(const t_commrec *cr,int *at0,int *at1)
281 *at0 = cr->pd->index[cr->nodeid];
282 *at1 = cr->pd->index[cr->nodeid+1];
285 void
286 pd_get_constraint_range(gmx_partdec_p_t pd,int *start,int *natoms)
288 *start = pd->constraints->left_range_receive;
289 *natoms = pd->constraints->right_range_receive-pd->constraints->left_range_receive;
292 int *
293 pd_constraints_nlocalatoms(gmx_partdec_p_t pd)
295 int *rc;
297 if(NULL != pd && NULL != pd->constraints)
299 rc = pd->constraints->nlocalatoms;
301 else
303 rc = NULL;
305 return rc;
311 /* This routine is used to communicate the non-home-atoms needed for constrains.
312 * We have already calculated this range of atoms during setup, and stored in the
313 * partdec constraints structure.
315 * When called, we send/receive left_range_send/receive atoms to our left (lower)
316 * node neighbor, and similar to the right (higher) node.
318 * This has not been tested for periodic molecules...
320 void
321 pd_move_x_constraints(t_commrec * cr,
322 rvec * x0,
323 rvec * x1)
325 #ifdef GMX_MPI
326 gmx_partdec_t *pd;
327 gmx_partdec_constraint_t *pdc;
329 rvec * sendptr;
330 rvec * recvptr;
331 int thisnode;
332 int i;
333 int cnt;
334 int sendcnt,recvcnt;
336 pd = cr->pd;
337 pdc = pd->constraints;
339 thisnode = cr->nodeid;
341 /* First pulse to right */
343 recvcnt = 3*(pd->index[thisnode]-pdc->left_range_receive);
344 sendcnt = 3*(cr->pd->index[thisnode+1]-cr->pd->constraints->right_range_send);
346 if(x1!=NULL)
348 /* Assemble temporary array with both x0 & x1 */
349 recvptr = pdc->recvbuf;
350 sendptr = pdc->sendbuf;
352 cnt = 0;
353 for(i=pdc->right_range_send;i<pd->index[thisnode+1];i++)
355 copy_rvec(x0[i],sendptr[cnt++]);
357 for(i=pdc->right_range_send;i<pd->index[thisnode+1];i++)
359 copy_rvec(x1[i],sendptr[cnt++]);
361 recvcnt *= 2;
362 sendcnt *= 2;
364 else
366 recvptr = x0 + pdc->left_range_receive;
367 sendptr = x0 + pdc->right_range_send;
370 gmx_tx_rx_real(cr,
371 GMX_RIGHT,(real *)sendptr,sendcnt,
372 GMX_LEFT, (real *)recvptr,recvcnt);
374 if(x1 != NULL)
376 /* copy back to x0/x1 */
377 cnt = 0;
378 for(i=pdc->left_range_receive;i<pd->index[thisnode];i++)
380 copy_rvec(recvptr[cnt++],x0[i]);
382 for(i=pdc->left_range_receive;i<pd->index[thisnode];i++)
384 copy_rvec(recvptr[cnt++],x1[i]);
388 /* And pulse to left */
389 sendcnt = 3*(pdc->left_range_send-pd->index[thisnode]);
390 recvcnt = 3*(pdc->right_range_receive-pd->index[thisnode+1]);
392 if(x1 != NULL)
394 cnt = 0;
395 for(i=cr->pd->index[thisnode];i<pdc->left_range_send;i++)
397 copy_rvec(x0[i],sendptr[cnt++]);
399 for(i=cr->pd->index[thisnode];i<pdc->left_range_send;i++)
401 copy_rvec(x1[i],sendptr[cnt++]);
403 recvcnt *= 2;
404 sendcnt *= 2;
406 else
408 sendptr = x0 + pd->index[thisnode];
409 recvptr = x0 + pd->index[thisnode+1];
412 gmx_tx_rx_real(cr,
413 GMX_LEFT ,(real *)sendptr,sendcnt,
414 GMX_RIGHT,(real *)recvptr,recvcnt);
416 /* Final copy back from buffers */
417 if(x1 != NULL)
419 /* First copy received data back into x0 & x1 */
420 cnt = 0;
421 for(i=pd->index[thisnode+1];i<pdc->right_range_receive;i++)
423 copy_rvec(recvptr[cnt++],x0[i]);
425 for(i=pd->index[thisnode+1];i<pdc->right_range_receive;i++)
427 copy_rvec(recvptr[cnt++],x1[i]);
430 #endif
433 static int home_cpu(int nnodes,gmx_partdec_t *pd,int atomid)
435 int k;
437 for(k=0; (k<nnodes); k++) {
438 if (atomid<pd->index[k+1])
439 return k;
441 gmx_fatal(FARGS,"Atomid %d is larger than number of atoms (%d)",
442 atomid+1,pd->index[nnodes]+1);
444 return -1;
447 static void calc_nsbshift(FILE *fp,int nnodes,gmx_partdec_t *pd,t_idef *idef)
449 int i,j,k;
450 int lastcg,targetcg,nshift,naaj;
451 int homeid[32];
453 pd->bshift=0;
454 for(i=1; (i<nnodes); i++) {
455 targetcg = pd->cgindex[i];
456 for(nshift=i; (nshift > 0) && (pd->cgindex[nshift] > targetcg); nshift--)
458 pd->bshift=max(pd->bshift,i-nshift);
461 pd->shift = (nnodes + 1)/2;
462 for(i=0; (i<nnodes); i++) {
463 lastcg = pd->cgindex[i+1]-1;
464 naaj = calc_naaj(lastcg,pd->cgindex[nnodes]);
465 targetcg = (lastcg+naaj) % pd->cgindex[nnodes];
467 /* Search until we find the target charge group */
468 for(nshift=0;
469 (nshift < nnodes) && (targetcg > pd->cgindex[nshift+1]);
470 nshift++)
472 /* Now compute the shift, that is the difference in node index */
473 nshift=((nshift - i + nnodes) % nnodes);
475 if (fp)
476 fprintf(fp,"CPU=%3d, lastcg=%5d, targetcg=%5d, myshift=%5d\n",
477 i,lastcg,targetcg,nshift);
479 /* It's the largest shift that matters */
480 pd->shift=max(nshift,pd->shift);
482 /* Now for the bonded forces */
483 for(i=0; (i<F_NRE); i++) {
484 if (interaction_function[i].flags & IF_BOND) {
485 int nratoms = interaction_function[i].nratoms;
486 for(j=0; (j<idef->il[i].nr); j+=nratoms+1) {
487 for(k=1; (k<=nratoms); k++)
488 homeid[k-1] = home_cpu(nnodes,pd,idef->il[i].iatoms[j+k]);
489 for(k=1; (k<nratoms); k++)
490 pd->shift = max(pd->shift,abs(homeid[k]-homeid[0]));
494 if (fp)
495 fprintf(fp,"pd->shift = %3d, pd->bshift=%3d\n",
496 pd->shift,pd->bshift);
500 static void
501 init_partdec_constraint(t_commrec *cr,
502 t_idef * idef,
503 int *left_range,
504 int *right_range)
506 gmx_partdec_t * pd = cr->pd;
507 gmx_partdec_constraint_t *pdc;
508 int i,cnt,k;
509 int ai,aj,nodei,nodej;
510 int nratoms;
511 int nodeid;
513 snew(pdc,1);
514 cr->pd->constraints = pdc;
517 /* Who am I? */
518 nodeid = cr->nodeid;
520 /* Setup LINCS communication ranges */
521 pdc->left_range_receive = left_range[nodeid];
522 pdc->right_range_receive = right_range[nodeid]+1;
523 pdc->left_range_send = (nodeid > 0) ? right_range[nodeid-1]+1 : 0;
524 pdc->right_range_send = (nodeid < cr->nnodes-1) ? left_range[nodeid+1] : pd->index[cr->nnodes];
526 snew(pdc->nlocalatoms,idef->il[F_CONSTR].nr);
527 nratoms = interaction_function[F_CONSTR].nratoms;
529 for(i=0,cnt=0;i<idef->il[F_CONSTR].nr;i+=nratoms+1,cnt++)
531 ai = idef->il[F_CONSTR].iatoms[i+1];
532 aj = idef->il[F_CONSTR].iatoms[i+2];
533 nodei = 0;
534 while(ai>=pd->index[nodei+1])
536 nodei++;
538 nodej = 0;
539 while(aj>=pd->index[nodej+1])
541 nodej++;
543 pdc->nlocalatoms[cnt] = 0;
544 if(nodei==nodeid)
546 pdc->nlocalatoms[cnt]++;
548 if(nodej==nodeid)
550 pdc->nlocalatoms[cnt]++;
553 pdc->nconstraints = cnt;
555 /* This should really be calculated, but 1000 is a _lot_ for overlapping constraints... */
556 snew(pdc->sendbuf,1000);
557 snew(pdc->recvbuf,1000);
561 static void init_partdec(FILE *fp,t_commrec *cr,t_block *cgs,int *multinr,
562 t_idef *idef)
564 int i,nodeid;
565 gmx_partdec_t *pd;
567 snew(pd,1);
568 cr->pd = pd;
570 set_left_right(cr);
572 if (cr->nnodes > 1) {
573 if (multinr == NULL)
574 gmx_fatal(FARGS,"Internal error in init_partdec: multinr = NULL");
575 snew(pd->index,cr->nnodes+1);
576 snew(pd->cgindex,cr->nnodes+1);
577 pd->cgindex[0] = 0;
578 pd->index[0] = 0;
579 for(i=0; (i < cr->nnodes); i++) {
580 pd->cgindex[i+1] = multinr[i];
581 pd->index[i+1] = cgs->index[multinr[i]];
583 calc_nsbshift(fp,cr->nnodes,pd,idef);
584 /* This is a hack to do with bugzilla 148 */
585 /*pd->shift = cr->nnodes-1;
586 pd->bshift = 0;*/
588 /* Allocate a buffer of size natoms of the whole system
589 * for summing the forces over the nodes.
591 snew(pd->vbuf,cgs->index[cgs->nr]);
592 pd->constraints = NULL;
594 #ifdef GMX_MPI
595 pd->mpi_req_tx=MPI_REQUEST_NULL;
596 pd->mpi_req_rx=MPI_REQUEST_NULL;
597 #endif
600 static void print_partdec(FILE *fp,const char *title,
601 int nnodes,gmx_partdec_t *pd)
603 int i;
605 fprintf(fp,"%s\n",title);
606 fprintf(fp,"nnodes: %5d\n",nnodes);
607 fprintf(fp,"pd->shift: %5d\n",pd->shift);
608 fprintf(fp,"pd->bshift: %5d\n",pd->bshift);
610 fprintf(fp,"Nodeid atom0 #atom cg0 #cg\n");
611 for(i=0; (i<nnodes); i++)
612 fprintf(fp,"%6d%8d%8d%8d%10d\n",
614 pd->index[i],pd->index[i+1]-pd->index[i],
615 pd->cgindex[i],pd->cgindex[i+1]-pd->cgindex[i]);
616 fprintf(fp,"\n");
619 static void pr_idef_division(FILE *fp,t_idef *idef,int nnodes,int **multinr)
621 int i,ftype,nr,nra,m0,m1;
623 fprintf(fp,"Division of bonded forces over processors\n");
624 fprintf(fp,"%-12s","CPU");
625 for(i=0; (i<nnodes); i++)
626 fprintf(fp," %5d",i);
627 fprintf(fp,"\n");
629 for(ftype=0; (ftype<F_NRE); ftype++) {
630 if (idef->il[ftype].nr > 0) {
631 nr = idef->il[ftype].nr;
632 nra = 1+interaction_function[ftype].nratoms;
633 fprintf(fp,"%-12s", interaction_function[ftype].name);
634 /* Loop over processors */
635 for(i=0; (i<nnodes); i++) {
636 m0 = (i == 0) ? 0 : multinr[ftype][i-1]/nra;
637 m1 = multinr[ftype][i]/nra;
638 fprintf(fp," %5d",m1-m0);
640 fprintf(fp,"\n");
645 static void select_my_ilist(FILE *log,t_ilist *il,int *multinr,t_commrec *cr)
647 t_iatom *ia;
648 int i,start,end,nr;
650 if (cr->nodeid == 0)
651 start=0;
652 else
653 start=multinr[cr->nodeid-1];
654 end=multinr[cr->nodeid];
656 nr=end-start;
657 if (nr < 0)
658 gmx_fatal(FARGS,"Negative number of atoms (%d) on node %d\n"
659 "You have probably not used the same value for -np with grompp"
660 " and mdrun",
661 nr,cr->nodeid);
662 snew(ia,nr);
664 for(i=0; (i<nr); i++)
665 ia[i]=il->iatoms[start+i];
667 sfree(il->iatoms);
668 il->iatoms=ia;
670 il->nr=nr;
673 static void select_my_idef(FILE *log,t_idef *idef,int **multinr,t_commrec *cr)
675 int i;
677 for(i=0; (i<F_NRE); i++)
678 select_my_ilist(log,&idef->il[i],multinr[i],cr);
681 gmx_localtop_t *split_system(FILE *log,
682 gmx_mtop_t *mtop,t_inputrec *inputrec,
683 t_commrec *cr)
685 int i,npp,n,nn;
686 real *capacity;
687 double tcap = 0,cap;
688 int *multinr_cgs,**multinr_nre;
689 char *cap_env;
690 gmx_localtop_t *top;
691 int *left_range;
692 int *right_range;
694 /* Time to setup the division of charge groups over processors */
695 npp = cr->nnodes-cr->npmenodes;
696 snew(capacity,npp);
697 cap_env = getenv("GMX_CAPACITY");
698 if (cap_env == NULL) {
699 for(i=0; (i<npp-1); i++) {
700 capacity[i] = 1.0/(double)npp;
701 tcap += capacity[i];
703 /* Take care that the sum of capacities is 1.0 */
704 capacity[npp-1] = 1.0 - tcap;
705 } else {
706 tcap = 0;
707 nn = 0;
708 for(i=0; i<npp; i++) {
709 cap = 0;
710 sscanf(cap_env+nn,"%lf%n",&cap,&n);
711 if (cap == 0)
712 gmx_fatal(FARGS,"Incorrect entry or number of entries in GMX_CAPACITY='%s'",cap_env);
713 capacity[i] = cap;
714 tcap += cap;
715 nn += n;
717 for(i=0; i<npp; i++)
718 capacity[i] /= tcap;
721 /* Convert the molecular topology to a fully listed topology */
722 top = gmx_mtop_generate_local_top(mtop,inputrec);
724 snew(multinr_cgs,npp);
725 snew(multinr_nre,F_NRE);
726 for(i=0; i<F_NRE; i++)
727 snew(multinr_nre[i],npp);
730 snew(left_range,cr->nnodes);
731 snew(right_range,cr->nnodes);
733 /* This computes which entities can be placed on processors */
734 split_top(log,npp,top,inputrec,&mtop->mols,capacity,multinr_cgs,multinr_nre,left_range,right_range);
736 sfree(capacity);
737 init_partdec(log,cr,&(top->cgs),multinr_cgs,&(top->idef));
739 /* This should be fine */
740 /*split_idef(&(top->idef),cr->nnodes-cr->npmenodes);*/
742 select_my_idef(log,&(top->idef),multinr_nre,cr);
744 if(top->idef.il[F_CONSTR].nr>0)
746 init_partdec_constraint(cr,&(top->idef),left_range,right_range);
749 if (log)
750 pr_idef_division(log,&(top->idef),npp,multinr_nre);
752 for(i=0; i<F_NRE; i++)
753 sfree(multinr_nre[i]);
754 sfree(multinr_nre);
755 sfree(multinr_cgs);
757 sfree(left_range);
758 sfree(right_range);
760 if (log)
761 print_partdec(log,"Workload division",cr->nnodes,cr->pd);
763 return top;
766 static void create_vsitelist(int nindex, int *list,
767 int *targetn, int **listptr)
769 int i,j,k,inr;
770 int minidx;
771 int *newlist;
773 /* remove duplicates */
774 for(i=0;i<nindex;i++) {
775 inr=list[i];
776 for(j=i+1;j<nindex;j++) {
777 if(list[j]==inr) {
778 for(k=j;k<nindex-1;k++)
779 list[k]=list[k+1];
780 nindex--;
785 *targetn=nindex;
786 snew(newlist,nindex);
788 /* sort into the new array */
789 for(i=0;i<nindex;i++) {
790 inr=-1;
791 for(j=0;j<nindex;j++)
792 if(list[j]>0 && (inr==-1 || list[j]<list[inr]))
793 inr=j; /* smallest so far */
794 newlist[i]=list[inr];
795 list[inr]=-1;
797 *listptr=newlist;
800 static void
801 add_to_vsitelist(int **list, int *nitem, int *nalloc,int newitem)
803 int i,idx;
804 gmx_bool found;
806 found = FALSE;
807 idx = *nitem;
808 for(i=0; i<idx && !found;i++)
810 found = (newitem ==(*list)[i]);
812 if(!found)
814 *nalloc+=100;
815 srenew(*list,*nalloc);
816 (*list)[idx++] = newitem;
817 *nitem = idx;
821 gmx_bool setup_parallel_vsites(t_idef *idef,t_commrec *cr,
822 t_comm_vsites *vsitecomm)
824 int i,j,ftype;
825 int nra;
826 gmx_bool do_comm;
827 t_iatom *ia;
828 gmx_partdec_t *pd;
829 int iconstruct;
830 int i0,i1;
831 int nalloc_left_construct,nalloc_right_construct;
832 int sendbuf[2],recvbuf[2];
833 int bufsize,leftbuf,rightbuf;
835 pd = cr->pd;
837 i0 = pd->index[cr->nodeid];
838 i1 = pd->index[cr->nodeid+1];
840 vsitecomm->left_import_construct = NULL;
841 vsitecomm->left_import_nconstruct = 0;
842 nalloc_left_construct = 0;
844 vsitecomm->right_import_construct = NULL;
845 vsitecomm->right_import_nconstruct = 0;
846 nalloc_right_construct = 0;
848 for(ftype=0; (ftype<F_NRE); ftype++)
850 if ( !(interaction_function[ftype].flags & IF_VSITE) )
852 continue;
855 nra = interaction_function[ftype].nratoms;
856 ia = idef->il[ftype].iatoms;
858 for(i=0; i<idef->il[ftype].nr;i+=nra+1)
860 for(j=2;j<1+nra;j++)
862 iconstruct = ia[i+j];
863 if(iconstruct<i0)
865 add_to_vsitelist(&vsitecomm->left_import_construct,
866 &vsitecomm->left_import_nconstruct,
867 &nalloc_left_construct,iconstruct);
869 else if(iconstruct>=i1)
871 add_to_vsitelist(&vsitecomm->right_import_construct,
872 &vsitecomm->right_import_nconstruct,
873 &nalloc_right_construct,iconstruct);
879 /* Pre-communicate the array lengths */
880 gmx_tx_rx_void(cr,
881 GMX_RIGHT,(void *)&vsitecomm->right_import_nconstruct,sizeof(int),
882 GMX_LEFT, (void *)&vsitecomm->left_export_nconstruct, sizeof(int));
883 gmx_tx_rx_void(cr,
884 GMX_LEFT, (void *)&vsitecomm->left_import_nconstruct, sizeof(int),
885 GMX_RIGHT,(void *)&vsitecomm->right_export_nconstruct,sizeof(int));
887 snew(vsitecomm->left_export_construct, vsitecomm->left_export_nconstruct);
888 snew(vsitecomm->right_export_construct,vsitecomm->right_export_nconstruct);
890 /* Communicate the construcing atom index arrays */
891 gmx_tx_rx_void(cr,
892 GMX_RIGHT,(void *)vsitecomm->right_import_construct,vsitecomm->right_import_nconstruct*sizeof(int),
893 GMX_LEFT, (void *)vsitecomm->left_export_construct, vsitecomm->left_export_nconstruct*sizeof(int));
895 /* Communicate the construcing atom index arrays */
896 gmx_tx_rx_void(cr,
897 GMX_LEFT ,(void *)vsitecomm->left_import_construct, vsitecomm->left_import_nconstruct*sizeof(int),
898 GMX_RIGHT,(void *)vsitecomm->right_export_construct,vsitecomm->right_export_nconstruct*sizeof(int));
900 leftbuf = max(vsitecomm->left_export_nconstruct,vsitecomm->left_import_nconstruct);
901 rightbuf = max(vsitecomm->right_export_nconstruct,vsitecomm->right_import_nconstruct);
903 bufsize = max(leftbuf,rightbuf);
905 do_comm = (bufsize>0);
907 snew(vsitecomm->send_buf,2*bufsize);
908 snew(vsitecomm->recv_buf,2*bufsize);
910 return do_comm;
913 t_state *partdec_init_local_state(t_commrec *cr,t_state *state_global)
915 t_state *state_local;
917 snew(state_local,1);
919 /* Copy all the contents */
920 *state_local = *state_global;
922 if (state_global->nrngi > 1) {
923 /* With stochastic dynamics we need local storage for the random state */
924 if (state_local->flags & (1<<estLD_RNG)) {
925 state_local->nrng = gmx_rng_n();
926 snew(state_local->ld_rng,state_local->nrng);
927 #ifdef GMX_MPI
928 if (PAR(cr)) {
929 MPI_Scatter(state_global->ld_rng,
930 state_local->nrng*sizeof(state_local->ld_rng[0]),MPI_BYTE,
931 state_local->ld_rng ,
932 state_local->nrng*sizeof(state_local->ld_rng[0]),MPI_BYTE,
933 MASTERRANK(cr),cr->mpi_comm_mygroup);
935 #endif
937 if (state_local->flags & (1<<estLD_RNGI)) {
938 snew(state_local->ld_rngi,1);
939 #ifdef GMX_MPI
940 if (PAR(cr)) {
941 MPI_Scatter(state_global->ld_rngi,
942 sizeof(state_local->ld_rngi[0]),MPI_BYTE,
943 state_local->ld_rngi,
944 sizeof(state_local->ld_rngi[0]),MPI_BYTE,
945 MASTERRANK(cr),cr->mpi_comm_mygroup);
947 #endif
951 return state_local;