changed reading hint
[gromacs/adressmacs.git] / src / gmxlib / nrnb.c
blob15681d624f003e59a7784b60c1a60463192efab9
1 /*
2 * $Id$
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 2.0
12 * Copyright (c) 1991-1999
13 * BIOSON Research Institute, Dept. of Biophysical Chemistry
14 * University of Groningen, The Netherlands
16 * Please refer to:
17 * GROMACS: A message-passing parallel molecular dynamics implementation
18 * H.J.C. Berendsen, D. van der Spoel and R. van Drunen
19 * Comp. Phys. Comm. 91, 43-56 (1995)
21 * Also check out our WWW page:
22 * http://md.chem.rug.nl/~gmx
23 * or e-mail to:
24 * gromacs@chem.rug.nl
26 * And Hey:
27 * Green Red Orange Magenta Azure Cyan Skyblue
29 static char *SRCID_nrnb_c = "$Id$";
31 #include <string.h>
32 #include "sysstuff.h"
33 #include "fatal.h"
34 #include "vveclib.h"
35 #include "names.h"
36 #include "macros.h"
37 #include "nrnb.h"
38 #include "main.h"
39 #include "smalloc.h"
40 #include "copyrite.h"
42 typedef struct {
43 char *name;
44 int flop;
45 } t_nrnb_data;
47 static t_nrnb_data nbdata[eNRNB] = {
48 { "LJ+Coulomb", 38 },
49 { "Coulomb", 27 },
50 { "LJC-RF", 43 },
51 { "Coul-RF", 32 },
52 { "Buckingham", 44 },
53 { "Buck.+RF", 49 },
54 { "Table-Coul", 47 },
55 { "Table-LJC", 73 },
56 { "Table-BHAM", 78 },
57 { "Table-BHAM-H2O", 171 },
58 { "LJ+Coulomb-H2O", 91 },
59 { "Coulomb-H2O", 80 },
60 { "LJC-RF-H2O", 106 },
61 { "Coul-RF-H2O", 95 },
62 { "Buckingham-H2O", 97 },
63 { "Buck.+RF-H2O", 112 },
64 { "Table-Coul-H2O", 140 },
65 { "Table-LJC-H2O", 166 },
66 { "LJC-FreeEner", 101 },
67 { "BHAM-FreeEner", 106 },
68 { "LJC-Ewald", 150 }, /* not correct yet, but not used either */
69 { "Coul-Ewald", 150 }, /* not correct yet, but not used either */
70 { "BHAM-Ewald", 150 }, /* not correct yet, but not used either */
71 { "LJC-Ewald-H2O", 150 }, /* not correct yet, but not used either */
72 { "Coul-Ewald-H2O", 150 }, /* not correct yet, but not used either */
73 { "BHAM-Ewald-H2O", 150 }, /* not correct yet, but not used either */
74 { "Innerloop-Iatom", 10 },
75 { "Calc Weights", 36 },
76 { "Spread Q", 6 },
77 { "Spread Q Bspline", 2 }, /* a first guess */
78 { "Gather F", 23 },
79 { "Gather F Bspline",12 }, /* a first guess */
80 { "3D-FFT", 8 },
81 { "Convolution", 4 },
82 { "Solve PME", 64 }, /* a first guess */
83 { "NS-Pairs", 21 },
84 { "Reset In Box", 9 },
85 { "Shift-X", 6 },
86 { "CG-CoM", 29 },
87 { "Sum Forces", 1 },
88 { "Bonds", 43 },
89 { "G96Bonds", 40 },
90 { "Angles", 163 },
91 { "G96Angles", 150 },
92 { "Propers", 229 },
93 { "Impropers", 208 },
94 { "RB-Dihedrals", 247 },
95 { "Dist. Restr.", 200 },
96 { "Pos. Restr.", 50 },
97 { "Angle Restr.", 191 },
98 { "Angle Restr. Z", 164 },
99 { "Morse Potent.", 0 },
100 { "Water Pol.", 62 },
101 { "Virial", 18 },
102 { "Update", 31 },
103 { "Stop-CM", 10 },
104 { "P-Coupling", 24 },
105 { "Calc-Ekin", 27 },
106 { "Lincs", 60 },
107 { "Lincs-Mat", 4 },
108 { "Shake", 30 },
109 { "Shake-V", 15 },
110 { "Shake-Init", 10 },
111 { "Shake-Vir", 18 },
112 { "Settle", 323 },
113 { "PShake-InitLD", 59 },
114 { "PShake-InitMD", 65 },
115 { "PShake", 7 },
116 { "Dummy2", 17 },
117 { "Dummy3", 28 },
118 { "Dummy3fd", 95 },
119 { "Dummy3fad", 176 },
120 { "Dummy3out", 87 },
121 { "Dummy4fd", 110 }
124 void init_nrnb(t_nrnb *nrnb)
126 int i;
128 for(i=0; (i<eNRNB); i++)
129 nrnb->n[i]=0.0;
132 void cp_nrnb(t_nrnb *dest, t_nrnb *src)
134 int i;
136 for(i=0; (i<eNRNB); i++)
137 dest->n[i]=src->n[i];
140 void add_nrnb(t_nrnb *dest, t_nrnb *s1, t_nrnb *s2)
142 int i;
144 for(i=0; (i<eNRNB); i++)
145 dest->n[i]=s1->n[i]+s2->n[i];
148 void print_nrnb(FILE *out, t_nrnb *nrnb)
150 int i;
152 for(i=0; (i<eNRNB); i++)
153 fprintf(out,"%14s %10.0f.\n",nbdata[i].name,nrnb->n[i]);
156 void _inc_nrnb(t_nrnb *nrnb,int enr,int inc,char *file,int line)
158 nrnb->n[enr]+=inc;
159 #ifdef DEBUG
160 fprintf(stdlog,"nrnb %15s(%2d) incremented with %8d from file %s line %d\n",
161 nbdata[enr].name,enr,inc,file,line);
162 #endif
165 void print_perf(FILE *out,double cputime,double realtime,real runtime,
166 t_nrnb *nrnb,int nprocs)
168 int i;
169 double nbfs,mni,frac,tfrac,mflop,tflop;
171 if (cputime == 0.0) {
172 fprintf(out,"cputime = 0! Infinite Giga flopses! \n");
173 return;
176 nbfs=0.0;
177 for(i=0; (i<eNR_INLOOP); i++) {
178 if (strstr(nbdata[i].name,"H2O") != NULL)
179 nbfs += 3e-6*nrnb->n[i];
180 else
181 nbfs += 1e-6*nrnb->n[i];
183 tflop=0;
184 for(i=0; (i<eNRNB); i++)
185 tflop+=1e-6*nrnb->n[i]*nbdata[i].flop;
187 if (tflop == 0) {
188 fprintf(out,"No MEGA Flopsen this time\n");
189 return;
191 fprintf(out,"\tM E G A - F L O P S A C C O U N T I N G\n\n");
192 if (nprocs > 1) {
193 cputime = realtime;
194 fprintf(out,"\tBased on real time for parallel computer.\n");
196 fprintf(out,"%15s %12s %12s %8s\n",
197 "Computing:","M-Number","M-Flop's","% Flop's");
198 mflop=0.0;
199 tfrac=0.0;
200 for(i=0; (i<eNRNB); i++) {
201 mni = 1e-6*nrnb->n[i];
202 mflop += mni*nbdata[i].flop;
203 frac = 100.0*mni*nbdata[i].flop/tflop;
204 tfrac += frac;
205 if (mni != 0)
206 fprintf(out,"%15s %12.6f %12.6f %6.1f\n",
207 nbdata[i].name,mni,mni*nbdata[i].flop,frac);
209 fprintf(out,"%15s %12s %12.5f %6.1f\n\n",
210 "Total","",mflop,tfrac);
211 fprintf(out,"%12s %10s %10s %8s\n","","CPU (s)","Real (s)","(%)");
212 fprintf(out,"%12s %10.3f %10.3f %8.1f\n","Time:",
213 cputime, realtime, 100.0*cputime/realtime);
214 if (cputime > 60) {
215 fprintf(out,"%12s %10s","","");
216 pr_difftime(out,cputime);
218 if(runtime>0) { /* runtime=0 means calc energies only */
219 fprintf(out,"%12s %10s %10s %10s %10s\n",
220 "","(Mnbf/s)","(MFlops)","(ps/CPU hour)","(CPU hour/ns)");
221 fprintf(out,"%12s %10.3f %10.3f %10.3f %10.3f\n","Performance:",
222 nbfs/cputime,mflop/cputime,
223 runtime*3600/cputime,1000*cputime/(3600*runtime));
227 int cost_nrnb(int enr)
229 return nbdata[enr].flop;
232 char *nrnb_str(int enr)
234 return nbdata[enr].name;
237 static int force_index[]={
238 eNR_BONDS, eNR_ANGLES, eNR_PROPER, eNR_IMPROPER,
239 eNR_RB, eNR_DISRES, eNR_POSRES,
240 eNR_NS, eNR_INL_IATOM
242 #define NFORCE_INDEX asize(force_index)
244 static int shake_index[]={
245 eNR_SHAKE, eNR_SHAKE_RIJ, eNR_SETTLE, eNR_UPDATE, eNR_PCOUPL,
246 eNR_SHAKE_VIR, eNR_SHAKE_V, eNR_PSHAKEINITLD, eNR_PSHAKEINITMD, eNR_PSHAKE
248 #define NSHAKE_INDEX asize(shake_index)
250 static double pr_av(FILE *log,int nprocs,double fav,double ftot[],char *title)
252 int i,perc;
253 double dperc,unb;
255 unb=0;
256 if (fav > 0) {
257 fav/=nprocs;
258 fprintf(log,"\n%15s:",title);
259 for(i=0; (i<nprocs); i++) {
260 dperc=(100.0*ftot[i])/fav;
261 unb=max(unb,dperc);
262 perc=dperc;
263 fprintf(log,"%3d ",perc);
265 if (unb > 0) {
266 perc=10000.0/unb;
267 fprintf(log,"%6d%%\n\n",perc);
269 else
270 fprintf(log,"\n\n");
272 return unb;
275 void pr_load(FILE *log,int nprocs,t_nrnb nrnb[])
277 int i,j,perc;
278 double dperc,unb,uf,us;
279 double *ftot,fav;
280 double *stot,sav;
281 t_nrnb *av;
283 snew(av,1);
284 snew(ftot,nprocs);
285 snew(stot,nprocs);
286 init_nrnb(av);
287 for(i=0; (i<nprocs); i++) {
288 add_nrnb(av,av,&(nrnb[i]));
289 /* Cost due to forces */
290 for(j=0; (j<eNR_INLOOP); j++)
291 ftot[i]+=nrnb[i].n[j]*cost_nrnb(j);
292 for(j=0; (j<NFORCE_INDEX); j++)
293 ftot[i]+=nrnb[i].n[force_index[j]]*cost_nrnb(force_index[j]);
294 /* Due to shake */
295 for(j=0; (j<NSHAKE_INDEX); j++) {
296 stot[i]+=nrnb[i].n[shake_index[j]]*cost_nrnb(shake_index[j]);
299 for(j=0; (j<eNRNB); j++)
300 av->n[j]=av->n[j]/(double)nprocs;
302 fprintf(log,"\nDetailed load balancing info in percentage of average\n");
304 fprintf(log,"Type CPU:");
305 for(i=0; (i<nprocs); i++)
306 fprintf(log,"%3d ",i);
307 fprintf(log,"Scaling\n");
308 fprintf(log,"----------------");
309 for(i=0; (i<nprocs); i++)
310 fprintf(log,"----");
311 fprintf(log,"-------\n");
313 for(j=0; (j<eNRNB); j++) {
314 unb=100.0;
315 if (av->n[j] > 0) {
316 fprintf(log,"%15s:",nrnb_str(j));
317 for(i=0; (i<nprocs); i++) {
318 dperc=(100.0*nrnb[i].n[j])/av->n[j];
319 unb=max(unb,dperc);
320 perc=dperc;
321 fprintf(log,"%3d ",perc);
323 if (unb > 0) {
324 perc=10000.0/unb;
325 fprintf(log,"%6d%%\n",perc);
327 else
328 fprintf(log,"\n");
331 fav=sav=0;
332 for(i=0; (i<nprocs); i++) {
333 fav+=ftot[i];
334 sav+=stot[i];
336 uf=pr_av(log,nprocs,fav,ftot,"Total Force");
337 us=pr_av(log,nprocs,sav,stot,"Total Shake");
339 unb=(uf*fav+us*sav)/(fav+sav);
340 if (unb > 0) {
341 unb=10000.0/unb;
342 fprintf(log,"\nTotal Scaling: %.0f%% of max performance\n\n",unb);