changed reading hint
[gromacs/adressmacs.git] / src / kernel / splitter.c
blob384e348d859839a9b7cf073a5a9f9bab6f782e82
1 /*
2 * $Id$
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 2.0
12 * Copyright (c) 1991-1999
13 * BIOSON Research Institute, Dept. of Biophysical Chemistry
14 * University of Groningen, The Netherlands
16 * Please refer to:
17 * GROMACS: A message-passing parallel molecular dynamics implementation
18 * H.J.C. Berendsen, D. van der Spoel and R. van Drunen
19 * Comp. Phys. Comm. 91, 43-56 (1995)
21 * Also check out our WWW page:
22 * http://md.chem.rug.nl/~gmx
23 * or e-mail to:
24 * gromacs@chem.rug.nl
26 * And Hey:
27 * GRowing Old MAkes el Chrono Sweat
29 static char *SRCID_splitter_c = "$Id$";
31 #include <stdio.h>
32 #include "sysstuff.h"
33 #include "assert.h"
34 #include "macros.h"
35 #include "smalloc.h"
36 #include "typedefs.h"
37 #include "mshift.h"
38 #include "invblock.h"
39 #include "txtdump.h"
40 #include "math.h"
42 typedef struct {
43 int nr;
44 t_iatom *ia;
45 } t_sf;
47 static void init_sf(int nr,t_sf sf[])
49 int i;
51 for(i=0; (i<nr); i++) {
52 sf[i].nr=0;
53 sf[i].ia=NULL;
57 static void done_sf(int nr,t_sf sf[])
59 int i;
61 for(i=0; (i<nr); i++) {
62 sf[i].nr=0;
63 sfree(sf[i].ia);
64 sf[i].ia=NULL;
68 static void push_sf(t_sf *sf,int nr,t_iatom ia[])
70 int i;
72 srenew(sf->ia,sf->nr+nr);
73 for(i=0; (i<nr); i++)
74 sf->ia[sf->nr+i]=ia[i];
75 sf->nr+=nr;
78 static int min_pid(int nr,atom_id list[],int hid[])
80 int i,pid,minpid;
82 assert(nr > 0);
83 minpid=hid[list[0]];
84 for (i=1; (i<nr); i++)
85 if ((pid=hid[list[i]]) < minpid)
86 minpid=pid;
88 return minpid;
91 static void split_force2(int nprocs,int hid[],t_idef *idef,t_ilist *ilist)
93 int i,type,ftype,pid,nratoms,tnr;
94 t_iatom ai;
95 t_sf sf[MAXPROC];
97 init_sf(MAXPROC,sf);
99 /* Walk along all the bonded forces, find the appropriate processor
100 * to calc it on, and add it to that processors list.
102 for (i=0; i<ilist->nr; i+=(1+nratoms)) {
103 type = ilist->iatoms[i];
104 ftype = idef->functype[type];
105 nratoms = interaction_function[ftype].nratoms;
107 if (ftype == F_SHAKE) {
108 /* SPECIAL CASE: All Atoms must have the same home processor! */
109 pid=hid[ilist->iatoms[i+1]];
110 if (hid[ilist->iatoms[i+2]] != pid)
111 fatal_error(0,"Shake block crossing processor boundaries\n"
112 "constraint between atoms (%d,%d)",
113 ilist->iatoms[i+1],ilist->iatoms[i+2]);
115 else if (ftype == F_SETTLE) {
116 /* Only the first particle is stored for settles ... */
117 ai=ilist->iatoms[i+1];
118 pid=hid[ai];
119 if ((pid != hid[ai+1]) ||
120 (pid != hid[ai+2]))
121 fatal_error(0,"Settle block crossing processor boundaries\n"
122 "constraint between atoms (%d-%d)",ai,ai+2);
124 else
125 pid=min_pid(nratoms,&ilist->iatoms[i+1],hid);
127 /* Add it to the list */
128 push_sf(&(sf[pid]),nratoms+1,&(ilist->iatoms[i]));
130 tnr=0;
131 for(pid=0; (pid<MAXPROC); pid++) {
132 for (i=0; (i<sf[pid].nr); i++)
133 ilist->iatoms[tnr++]=sf[pid].ia[i];
135 ilist->multinr[pid]=(pid==0) ? 0 : ilist->multinr[pid-1];
136 ilist->multinr[pid]+=sf[pid].nr;
138 assert(tnr==ilist->nr);
139 done_sf(MAXPROC,sf);
142 static int *home_index(int nprocs,t_block *cgs)
144 /* This routine determines the processor id for each particle */
145 int *hid;
146 int pid,j0,j1,j,k,ak;
148 snew(hid,cgs->nra);
149 /* Initiate to -1 to make it possible to check afterwards,
150 * all hid's should be set in the loop below
152 for(k=0; (k<cgs->nra); k++)
153 hid[k]=-1;
155 /* loop over processors */
156 for(pid=0; (pid<nprocs); pid++) {
157 j0 = (pid==0) ? 0 : cgs->multinr[pid-1];
158 j1 = cgs->multinr[pid];
160 /* j0 and j1 are the boundariesin the index array */
161 for(j=j0; (j<j1); j++) {
162 for(k=cgs->index[j]; (k<cgs->index[j+1]); k++) {
163 ak=cgs->a[k];
164 hid[ak]=pid;
168 /* Now verify that all hid's are not -1 */
169 for(k=0; (k<cgs->nra); k++)
170 if (hid[k] == -1)
171 fatal_error(0,"hid[%d] = -1, cgs->nr = %d, cgs->nra = %d",
172 k,cgs->nr,cgs->nra);
174 return hid;
177 static int max_index(int start,t_block *b)
179 int k,k0,k1;
180 int mi=-1;
182 if (start < b->nr) {
183 k0=b->index[start];
184 k1=b->index[start+1];
185 if (k1 > k0) {
186 mi=b->a[k0];
187 for(k=k0+1; (k<k1); k++)
188 mi=max(mi,b->a[k]);
191 return mi;
194 static int min_index(int start,t_block *b)
196 int k,k0,k1;
197 int mi=INT_MAX;
199 if (start < b->nr) {
200 k0=b->index[start];
201 k1=b->index[start+1];
202 if (k1 > k0) {
203 mi=b->a[k0];
204 for(k=k0+1; (k<k1); k++)
205 mi=min(mi,b->a[k]);
208 return mi;
211 typedef struct {
212 int atom,ic,is;
213 } t_border;
215 void set_bor(t_border *b,int atom,int ic,int is)
217 b->atom = atom;
218 b->ic = ic;
219 b->is = is;
222 t_border *mk_border(bool bVerbose,int nprocs,t_block *cgs,
223 t_block *shakes,int *nb)
225 int i,ic,is,nbor,ac,as,as0;
226 t_border *border;
228 nbor = 1+max(cgs->nr,shakes->nr);
229 if ((shakes->nr > 0) && (bVerbose)) {
230 fprintf(stderr,
231 "Going to use the WINKELHAAK Algorithm to split over %d cpus\n",
232 nprocs);
233 fprintf(stderr,"There are %d possible border locations\n",nbor);
235 snew(border,nbor);
236 nbor = 0;
237 ic = 0;
238 is = 0;
239 while ((ic < cgs->nr) || (is < shakes->nr)) {
240 ac = (ic == cgs->nr) ? cgs->nra : cgs->a[cgs->index[ic]];
241 as = (is == shakes->nr) ? shakes->nra : shakes->a[shakes->index[is]];
242 as0 = (is == 0) ? 0 : (shakes->a[shakes->index[is]-1]);
243 if (debug)
244 fprintf(debug,"mk_border: is=%6d ic=%6d as0=%6d as=%6d ac=%6d\n",
245 is,ic,as0,as,ac);
246 if (ac == as) {
247 set_bor(&border[nbor],ac,ic,is);
248 nbor++;
249 ic++;
250 if (as < shakes->nra)
251 is++;
253 else if ((ac > as) && (as == shakes->nra)) {
254 set_bor(&border[nbor],ac,ic,is);
255 nbor++;
256 ic++;
258 else if ((ac < as) && (ac > as0)) {
259 set_bor(&border[nbor],ac,ic,is);
260 nbor++;
261 ic++;
263 else if (ac < as)
264 ic++;
265 else
266 is++;
268 set_bor(&border[nbor],cgs->nra,ic,is);
269 nbor++;
271 *nb = nbor;
273 if (debug) {
274 fprintf(debug,"There are %d actual border entries\n",nbor);
275 for(i=0; (i<nbor); i++)
276 fprintf(debug,"border[%5d] = atom: %d ic: %d is: %d\n",i,
277 border[i].atom,border[i].ic,border[i].is);
280 return border;
283 static void split_blocks(bool bVerbose,int nprocs,
284 t_block *cgs,t_block *shakes)
286 int maxatom[MAXPROC];
287 int i,ai,sbl,b0,b1;
288 int pid,last_shk,nbor;
289 t_border *border;
290 double load,tload;
292 bool bSHK;
293 atom_id *shknum,*cgsnum;
295 if (debug) {
296 pr_block(debug,0,"cgs",cgs);
297 pr_block(debug,0,"shakes",shakes);
300 shknum = make_invblock(shakes,cgs->nra+1);
301 cgsnum = make_invblock(cgs,cgs->nra+1);
302 border = mk_border(bVerbose,nprocs,cgs,shakes,&nbor);
304 load = (double)cgs->nra / (double)nprocs;
305 tload = load;
307 pid = 0;
308 sbl = 0;
309 for(i=0; (i<nbor) && (tload < cgs->nra); i++) {
310 if(i<(nbor-1))
311 b1=border[i+1].atom;
312 else
313 b1=cgs->nra;
315 b0 = border[i].atom;
317 if (fabs(b0-tload)<fabs(b1-tload)) {
318 /* New pid time */
319 cgs->multinr[pid] = border[i].ic;
320 shakes->multinr[pid] = border[i].is;
321 maxatom[pid] = b0;
322 pid++;
323 tload += load;
326 /* Now the last one... */
327 while (pid < nprocs) {
328 cgs->multinr[pid]=cgs->nr;
329 shakes->multinr[pid]=shakes->nr;
330 maxatom[pid]=cgs->nra;
331 pid++;
333 if (pid != nprocs) {
334 fatal_error(0,"pid = %d, nprocs = %d, file %s, line %d",
335 pid,nprocs,__FILE__,__LINE__);
338 if (bVerbose) {
339 for(i=nprocs-1; (i>0); i--)
340 maxatom[i]-=maxatom[i-1];
341 fprintf(stderr,"Division over processors in atoms:\n");
342 for(i=0; (i<nprocs); i++)
343 fprintf(stderr,"%6d",maxatom[i]);
344 fprintf(stderr,"\n");
346 sfree(shknum);
347 sfree(cgsnum);
348 sfree(border);
351 static void def_mnr(int nr,int mnr[])
353 int i;
355 for (i=0; (i<MAXPROC); i++)
356 mnr[i]=0;
357 mnr[0]=nr;
360 void split_top(bool bVerbose,int nprocs,t_topology *top)
362 int j;
363 int *homeind;
365 if ((bVerbose) && (nprocs>1))
366 fprintf(stderr,"splitting topology...\n");
368 for(j=0; (j<F_NRE); j++)
369 def_mnr(top->idef.il[j].nr,top->idef.il[j].multinr);
370 def_mnr(top->atoms.excl.nr,top->atoms.excl.multinr);
372 for (j=0; j<ebNR; j++)
373 def_mnr(top->blocks[j].nr,top->blocks[j].multinr);
375 if (nprocs > 1) {
376 split_blocks(bVerbose,nprocs,
377 &(top->blocks[ebCGS]),&(top->blocks[ebSBLOCKS]));
378 homeind=home_index(nprocs,&(top->blocks[ebCGS]));
379 for(j=0; (j<F_NRE); j++)
380 split_force2(nprocs,homeind,&top->idef,&top->idef.il[j]);
381 sfree(homeind);
385 typedef struct {
386 int atom,sid;
387 } t_sid;
389 static int sid_comp(const void *a,const void *b)
391 t_sid *sa,*sb;
392 int dd;
394 sa=(t_sid *)a;
395 sb=(t_sid *)b;
397 dd=sa->sid-sb->sid;
398 if (dd == 0)
399 return (sa->atom-sb->atom);
400 else
401 return dd;
404 typedef enum { egcolWhite, egcolGrey, egcolBlack, egcolNR } egCol;
406 static int mk_grey(int nnodes,egCol egc[],t_graph *g,int *AtomI,
407 t_sid sid[])
409 int j,ng,ai,aj,g0;
411 ng=0;
412 ai=*AtomI;
414 g0=g->start;
415 /* Loop over all the bonds */
416 for(j=0; (j<g->nedge[ai]); j++) {
417 aj=g->edge[ai][j]-g0;
418 /* If there is a white one, make it gray and set pbc */
419 if (egc[aj] == egcolWhite) {
420 if (aj < *AtomI)
421 *AtomI = aj;
422 egc[aj] = egcolGrey;
424 /* Check whether this one has been set before... */
425 if (sid[aj+g0].sid != -1)
426 fatal_error(0,"sid[%d]=%d, sid[%d]=%d, file %s, line %d",
427 ai,sid[ai+g0].sid,aj,sid[aj+g0].sid,__FILE__,__LINE__);
428 else {
429 sid[aj+g0].sid = sid[ai+g0].sid;
430 sid[aj+g0].atom = aj+g0;
432 ng++;
435 return ng;
438 static int first_colour(int fC,egCol Col,t_graph *g,egCol egc[])
439 /* Return the first node with colour Col starting at fC.
440 * return -1 if none found.
443 int i;
445 for(i=fC; (i<g->nnodes); i++)
446 if ((g->nedge[i] > 0) && (egc[i]==Col))
447 return i;
449 return -1;
452 static int mk_sblocks(bool bVerbose,t_graph *g,t_sid sid[])
454 int ng,nnodes;
455 int nW,nG,nB; /* Number of Grey, Black, White */
456 int fW,fG; /* First of each category */
457 static egCol *egc=NULL; /* The colour of each node */
458 int g0,nblock;
460 if (!g->nbound)
461 return 0;
463 nblock=0;
465 nnodes=g->nnodes;
466 snew(egc,nnodes);
468 g0=g->start;
469 nW=g->nbound;
470 nG=0;
471 nB=0;
473 fW=0;
475 /* We even have a loop invariant:
476 * nW+nG+nB == g->nbound
479 if (bVerbose)
480 fprintf(stderr,"Walking down the molecule graph to make shake-blocks\n");
482 while (nW > 0) {
483 /* Find the first white, this will allways be a larger
484 * number than before, because no nodes are made white
485 * in the loop
487 if ((fW=first_colour(fW,egcolWhite,g,egc)) == -1)
488 fatal_error(0,"No WHITE nodes found while nW=%d\n",nW);
490 /* Make the first white node grey, and set the block number */
491 egc[fW] = egcolGrey;
492 sid[fW+g0].sid = nblock++;
493 nG++;
494 nW--;
496 /* Initial value for the first grey */
497 fG=fW;
499 if (debug)
500 fprintf(debug,"Starting G loop (nW=%d, nG=%d, nB=%d, total %d)\n",
501 nW,nG,nB,nW+nG+nB);
503 while (nG > 0) {
504 if ((fG=first_colour(fG,egcolGrey,g,egc)) == -1)
505 fatal_error(0,"No GREY nodes found while nG=%d\n",nG);
507 /* Make the first grey node black */
508 egc[fG]=egcolBlack;
509 nB++;
510 nG--;
512 /* Make all the neighbours of this black node grey
513 * and set their block number
515 ng=mk_grey(nnodes,egc,g,&fG,sid);
516 /* ng is the number of white nodes made grey */
517 nG+=ng;
518 nW-=ng;
521 sfree(egc);
523 return nblock;
526 void gen_sblocks(bool bVerbose,int natoms,t_idef *idef,t_block *sblock)
528 t_graph *g;
529 int i,j,k;
530 t_sid *sid;
531 int isid,nsid;
533 g=mk_graph(idef,natoms,TRUE);
534 if (bVerbose && debug)
535 p_graph(debug,"Graaf Dracula",g);
536 snew(sid,natoms);
537 for(i=0; (i<natoms); i++) {
538 sid[i].atom = i;
539 sid[i].sid = -1;
541 nsid=mk_sblocks(bVerbose,g,sid);
543 /* Now sort the shake blocks... */
544 qsort(sid,natoms,(size_t)sizeof(sid[0]),sid_comp);
546 /* Now check how many are NOT -1, i.e. how many have to be shaken */
547 for(i=0; (i<natoms); i++)
548 if (sid[i].sid > -1)
549 break;
551 /* Fill the sblock struct */
552 sblock->nr = nsid;
553 sblock->nra = natoms-i;
554 srenew(sblock->a,sblock->nra);
555 srenew(sblock->index,sblock->nr+1);
557 if (i < natoms) {
558 isid = sid[i].sid;
559 sblock->index[0]=0;
560 if (nsid > 0) {
561 k=1;
562 for(j=0 ; (i<natoms); i++,j++) {
563 sblock->a[j]=sid[i].atom;
564 if (sid[i].sid != isid) {
565 sblock->index[k]=j;
566 isid = sid[i].sid;
567 k++;
570 sblock->index[k]=j;
572 if (k != nsid) {
573 fatal_error(0,"k=%d, nsid=%d\n",k,nsid);
577 sfree(sid);
578 done_graph(g);
579 sfree(g);