src/gmxlib/nrnb.c

   1 /*
   2  * $Id$
   3  *
   4  *       This source code is part of
   5  *
   6  *        G   R   O   M   A   C   S
   7  *
   8  * GROningen MAchine for Chemical Simulations
   9  *
  10  *               VERSION 2.0
  11  *
  12  * Copyright (c) 1991-1999
  13  * BIOSON Research Institute, Dept. of Biophysical Chemistry
  14  * University of Groningen, The Netherlands
  15  *
  16  * Please refer to:
  17  * GROMACS: A message-passing parallel molecular dynamics implementation
  18  * H.J.C. Berendsen, D. van der Spoel and R. van Drunen
  19  * Comp. Phys. Comm. 91, 43-56 (1995)
  20  *
  21  * Also check out our WWW page:
  22  * http://md.chem.rug.nl/~gmx
  23  * or e-mail to:
  24  * gromacs@chem.rug.nl
  25  *
  26  * And Hey:
  27  * Green Red Orange Magenta Azure Cyan Skyblue
  28  */
  29 static char *SRCID_nrnb_c = "$Id$";
  30
  31 #include <string.h>
  32 #include "sysstuff.h"
  33 #include "fatal.h"
  34 #include "vveclib.h"
  35 #include "names.h"
  36 #include "macros.h"
  37 #include "nrnb.h"
  38 #include "main.h"
  39 #include "smalloc.h"
  40 #include "copyrite.h"
  41
  42 typedef struct {
  43   char *name;
  44   int  flop;
  45 } t_nrnb_data;
  46
  47 static t_nrnb_data nbdata[eNRNB] = {
  48   { "LJ+Coulomb",      38 },
  49   { "Coulomb",         27 },
  50   { "LJC-RF",          43 },
  51   { "Coul-RF",         32 },
  52   { "Buckingham",      44 },
  53   { "Buck.+RF",        49 },
  54   { "Table-Coul",      47 },
  55   { "Table-LJC",       73 },
  56   { "Table-BHAM",      78 },
  57   { "Table-BHAM-H2O", 171 },
  58   { "LJ+Coulomb-H2O",  91 },
  59   { "Coulomb-H2O",     80 },
  60   { "LJC-RF-H2O",     106 },
  61   { "Coul-RF-H2O",     95 },
  62   { "Buckingham-H2O",  97 },
  63   { "Buck.+RF-H2O",   112 },
  64   { "Table-Coul-H2O", 140 },
  65   { "Table-LJC-H2O",  166 },
  66   { "LJC-FreeEner",   101 },
  67   { "BHAM-FreeEner",  106 },
  68   { "LJC-Ewald",      150 }, /* not correct yet, but not used either */
  69   { "Coul-Ewald",     150 }, /* not correct yet, but not used either */
  70   { "BHAM-Ewald",     150 }, /* not correct yet, but not used either */
  71   { "LJC-Ewald-H2O",  150 }, /* not correct yet, but not used either */
  72   { "Coul-Ewald-H2O", 150 }, /* not correct yet, but not used either */
  73   { "BHAM-Ewald-H2O", 150 }, /* not correct yet, but not used either */
  74   { "Innerloop-Iatom", 10 },
  75   { "Calc Weights",    36 },
  76   { "Spread Q",         6 },
  77   { "Spread Q Bspline", 2 }, /* a first guess */
  78   { "Gather F",        23 },
  79   { "Gather F Bspline",12 }, /* a first guess */
  80   { "3D-FFT",           8 },
  81   { "Convolution",      4 },
  82   { "Solve PME",       64 }, /* a first guess */
  83   { "NS-Pairs",        21 },
  84   { "Reset In Box",     9 },
  85   { "Shift-X",          6 },
  86   { "CG-CoM",          29 },
  87   { "Sum Forces",       1 },
  88   { "Bonds",           43 },
  89   { "G96Bonds",        40 },
  90   { "Angles",         163 },
  91   { "G96Angles",      150 },
  92   { "Propers",        229 },
  93   { "Impropers",      208 },
  94   { "RB-Dihedrals",   247 },
  95   { "Dist. Restr.",   200 },
  96   { "Pos. Restr.",     50 },
  97   { "Angle Restr.",   191 },
  98   { "Angle Restr. Z", 164 },
  99   { "Morse Potent.",    0 },
 100   { "Water Pol.",      62 },
 101   { "Virial",          18 },
 102   { "Update",          31 },
 103   { "Stop-CM",         10 },
 104   { "P-Coupling",      24 },
 105   { "Calc-Ekin",       27 },
 106   { "Lincs",           60 },
 107   { "Lincs-Mat",        4 },
 108   { "Shake",           30 },
 109   { "Shake-V",         15 },
 110   { "Shake-Init",      10 },
 111   { "Shake-Vir",       18 },
 112   { "Settle",         323 },
 113   { "PShake-InitLD",   59 },
 114   { "PShake-InitMD",   65 },
 115   { "PShake",           7 },
 116   { "Dummy2",          17 },
 117   { "Dummy3",          28 },
 118   { "Dummy3fd",        95 },
 119   { "Dummy3fad",      176 },
 120   { "Dummy3out",       87 },
 121   { "Dummy4fd",       110 }
 122 };
 123
 124 void init_nrnb(t_nrnb *nrnb)
 125 {
 126   int i;
 127
 128   for(i=0; (i<eNRNB); i++)
 129     nrnb->n[i]=0.0;
 130 }
 131
 132 void cp_nrnb(t_nrnb *dest, t_nrnb *src)
 133 {
 134   int i;
 135
 136   for(i=0; (i<eNRNB); i++)
 137     dest->n[i]=src->n[i];
 138 }
 139
 140 void add_nrnb(t_nrnb *dest, t_nrnb *s1, t_nrnb *s2)
 141 {
 142   int i;
 143
 144   for(i=0; (i<eNRNB); i++)
 145     dest->n[i]=s1->n[i]+s2->n[i];
 146 }
 147
 148 void print_nrnb(FILE *out, t_nrnb *nrnb)
 149 {
 150   int i;
 151
 152   for(i=0; (i<eNRNB); i++)
 153     fprintf(out,"%14s  %10.0f.\n",nbdata[i].name,nrnb->n[i]);
 154 }
 155
 156 void _inc_nrnb(t_nrnb *nrnb,int enr,int inc,char *file,int line)
 157 {
 158   nrnb->n[enr]+=inc;
 159 #ifdef DEBUG
 160   fprintf(stdlog,"nrnb %15s(%2d) incremented with %8d from file %s line %d\n",
 161           nbdata[enr].name,enr,inc,file,line);
 162 #endif
 163 }
 164
 165 void print_perf(FILE *out,double cputime,double realtime,real runtime,
 166                 t_nrnb *nrnb,int nprocs)
 167 {
 168   int    i;
 169   double nbfs,mni,frac,tfrac,mflop,tflop;
 170
 171   if (cputime == 0.0) {
 172     fprintf(out,"cputime = 0! Infinite Giga flopses! \n");
 173     return;
 174   }
 175
 176   nbfs=0.0;
 177   for(i=0; (i<eNR_INLOOP); i++) {
 178     if (strstr(nbdata[i].name,"H2O") != NULL)
 179       nbfs += 3e-6*nrnb->n[i];
 180     else
 181       nbfs += 1e-6*nrnb->n[i];
 182   }
 183   tflop=0;
 184   for(i=0; (i<eNRNB); i++)
 185     tflop+=1e-6*nrnb->n[i]*nbdata[i].flop;
 186
 187   if (tflop == 0) {
 188     fprintf(out,"No MEGA Flopsen this time\n");
 189     return;
 190   }
 191   fprintf(out,"\tM E G A - F L O P S   A C C O U N T I N G\n\n");
 192   if (nprocs > 1) {
 193     cputime = realtime;
 194     fprintf(out,"\tBased on real time for parallel computer.\n");
 195   }
 196   fprintf(out,"%15s  %12s  %12s  %8s\n",
 197           "Computing:","M-Number","M-Flop's","% Flop's");
 198   mflop=0.0;
 199   tfrac=0.0;
 200   for(i=0; (i<eNRNB); i++) {
 201     mni    = 1e-6*nrnb->n[i];
 202     mflop += mni*nbdata[i].flop;
 203     frac   = 100.0*mni*nbdata[i].flop/tflop;
 204     tfrac += frac;
 205     if (mni != 0)
 206       fprintf(out,"%15s  %12.6f  %12.6f  %6.1f\n",
 207               nbdata[i].name,mni,mni*nbdata[i].flop,frac);
 208   }
 209   fprintf(out,"%15s  %12s  %12.5f  %6.1f\n\n",
 210           "Total","",mflop,tfrac);
 211   fprintf(out,"%12s %10s %10s %8s\n","","CPU (s)","Real (s)","(%)");
 212   fprintf(out,"%12s %10.3f %10.3f %8.1f\n","Time:",
 213           cputime, realtime, 100.0*cputime/realtime);
 214   if (cputime > 60) {
 215     fprintf(out,"%12s %10s","","");
 216     pr_difftime(out,cputime);
 217   }
 218   if(runtime>0) { /* runtime=0 means calc energies only */
 219   fprintf(out,"%12s %10s %10s %10s %10s\n",
 220           "","(Mnbf/s)","(MFlops)","(ps/CPU hour)","(CPU hour/ns)");
 221   fprintf(out,"%12s %10.3f %10.3f %10.3f %10.3f\n","Performance:",
 222           nbfs/cputime,mflop/cputime,
 223           runtime*3600/cputime,1000*cputime/(3600*runtime));
 224   }
 225 }
 226
 227 int cost_nrnb(int enr)
 228 {
 229   return nbdata[enr].flop;
 230 }
 231
 232 char *nrnb_str(int enr)
 233 {
 234   return nbdata[enr].name;
 235 }
 236
 237 static int    force_index[]={
 238   eNR_BONDS,  eNR_ANGLES,  eNR_PROPER, eNR_IMPROPER,
 239   eNR_RB,     eNR_DISRES,  eNR_POSRES,
 240   eNR_NS,     eNR_INL_IATOM
 241 };
 242 #define NFORCE_INDEX asize(force_index)
 243
 244 static int    shake_index[]={
 245   eNR_SHAKE,     eNR_SHAKE_RIJ, eNR_SETTLE,       eNR_UPDATE,       eNR_PCOUPL,
 246   eNR_SHAKE_VIR, eNR_SHAKE_V,   eNR_PSHAKEINITLD, eNR_PSHAKEINITMD, eNR_PSHAKE
 247 };
 248 #define NSHAKE_INDEX asize(shake_index)
 249
 250 static double pr_av(FILE *log,int nprocs,double fav,double ftot[],char *title)
 251 {
 252   int    i,perc;
 253   double dperc,unb;
 254
 255   unb=0;
 256   if (fav > 0) {
 257     fav/=nprocs;
 258     fprintf(log,"\n%15s:",title);
 259     for(i=0; (i<nprocs); i++) {
 260       dperc=(100.0*ftot[i])/fav;
 261       unb=max(unb,dperc);
 262       perc=dperc;
 263       fprintf(log,"%3d ",perc);
 264     }
 265     if (unb > 0) {
 266       perc=10000.0/unb;
 267       fprintf(log,"%6d%%\n\n",perc);
 268     }
 269     else
 270       fprintf(log,"\n\n");
 271   }
 272   return unb;
 273 }
 274
 275 void pr_load(FILE *log,int nprocs,t_nrnb nrnb[])
 276 {
 277   int    i,j,perc;
 278   double dperc,unb,uf,us;
 279   double *ftot,fav;
 280   double *stot,sav;
 281   t_nrnb *av;
 282
 283   snew(av,1);
 284   snew(ftot,nprocs);
 285   snew(stot,nprocs);
 286   init_nrnb(av);
 287   for(i=0; (i<nprocs); i++) {
 288     add_nrnb(av,av,&(nrnb[i]));
 289     /* Cost due to forces */
 290     for(j=0; (j<eNR_INLOOP); j++)
 291       ftot[i]+=nrnb[i].n[j]*cost_nrnb(j);
 292     for(j=0; (j<NFORCE_INDEX); j++)
 293       ftot[i]+=nrnb[i].n[force_index[j]]*cost_nrnb(force_index[j]);
 294     /* Due to shake */
 295     for(j=0; (j<NSHAKE_INDEX); j++) {
 296       stot[i]+=nrnb[i].n[shake_index[j]]*cost_nrnb(shake_index[j]);
 297     }
 298   }
 299   for(j=0; (j<eNRNB); j++)
 300     av->n[j]=av->n[j]/(double)nprocs;
 301
 302   fprintf(log,"\nDetailed load balancing info in percentage of average\n");
 303
 304   fprintf(log,"Type        CPU:");
 305   for(i=0; (i<nprocs); i++)
 306     fprintf(log,"%3d ",i);
 307   fprintf(log,"Scaling\n");
 308   fprintf(log,"----------------");
 309   for(i=0; (i<nprocs); i++)
 310     fprintf(log,"----");
 311   fprintf(log,"-------\n");
 312
 313   for(j=0; (j<eNRNB); j++) {
 314     unb=100.0;
 315     if (av->n[j] > 0) {
 316       fprintf(log,"%15s:",nrnb_str(j));
 317       for(i=0; (i<nprocs); i++) {
 318         dperc=(100.0*nrnb[i].n[j])/av->n[j];
 319         unb=max(unb,dperc);
 320         perc=dperc;
 321         fprintf(log,"%3d ",perc);
 322       }
 323       if (unb > 0) {
 324         perc=10000.0/unb;
 325         fprintf(log,"%6d%%\n",perc);
 326       }
 327       else
 328         fprintf(log,"\n");
 329     }
 330   }
 331   fav=sav=0;
 332   for(i=0; (i<nprocs); i++) {
 333     fav+=ftot[i];
 334     sav+=stot[i];
 335   }
 336   uf=pr_av(log,nprocs,fav,ftot,"Total Force");
 337   us=pr_av(log,nprocs,sav,stot,"Total Shake");
 338
 339   unb=(uf*fav+us*sav)/(fav+sav);
 340   if (unb > 0) {
 341     unb=10000.0/unb;
 342     fprintf(log,"\nTotal Scaling: %.0f%% of max performance\n\n",unb);
 343   }
 344 }
 345