4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
12 * Copyright (c) 1991-1999
13 * BIOSON Research Institute, Dept. of Biophysical Chemistry
14 * University of Groningen, The Netherlands
17 * GROMACS: A message-passing parallel molecular dynamics implementation
18 * H.J.C. Berendsen, D. van der Spoel and R. van Drunen
19 * Comp. Phys. Comm. 91, 43-56 (1995)
21 * Also check out our WWW page:
22 * http://md.chem.rug.nl/~gmx
27 * Green Red Orange Magenta Azure Cyan Skyblue
29 static char *SRCID_nrnb_c
= "$Id$";
47 static t_nrnb_data nbdata
[eNRNB
] = {
57 { "Table-BHAM-H2O", 171 },
58 { "LJ+Coulomb-H2O", 91 },
59 { "Coulomb-H2O", 80 },
60 { "LJC-RF-H2O", 106 },
61 { "Coul-RF-H2O", 95 },
62 { "Buckingham-H2O", 97 },
63 { "Buck.+RF-H2O", 112 },
64 { "Table-Coul-H2O", 140 },
65 { "Table-LJC-H2O", 166 },
66 { "LJC-FreeEner", 101 },
67 { "BHAM-FreeEner", 106 },
68 { "LJC-Ewald", 150 }, /* not correct yet, but not used either */
69 { "Coul-Ewald", 150 }, /* not correct yet, but not used either */
70 { "BHAM-Ewald", 150 }, /* not correct yet, but not used either */
71 { "LJC-Ewald-H2O", 150 }, /* not correct yet, but not used either */
72 { "Coul-Ewald-H2O", 150 }, /* not correct yet, but not used either */
73 { "BHAM-Ewald-H2O", 150 }, /* not correct yet, but not used either */
74 { "Innerloop-Iatom", 10 },
75 { "Calc Weights", 36 },
77 { "Spread Q Bspline", 2 }, /* a first guess */
79 { "Gather F Bspline",12 }, /* a first guess */
82 { "Solve PME", 64 }, /* a first guess */
84 { "Reset In Box", 9 },
94 { "RB-Dihedrals", 247 },
95 { "Dist. Restr.", 200 },
96 { "Pos. Restr.", 50 },
97 { "Angle Restr.", 191 },
98 { "Angle Restr. Z", 164 },
99 { "Morse Potent.", 0 },
100 { "Water Pol.", 62 },
104 { "P-Coupling", 24 },
110 { "Shake-Init", 10 },
113 { "PShake-InitLD", 59 },
114 { "PShake-InitMD", 65 },
119 { "Dummy3fad", 176 },
124 void init_nrnb(t_nrnb
*nrnb
)
128 for(i
=0; (i
<eNRNB
); i
++)
132 void cp_nrnb(t_nrnb
*dest
, t_nrnb
*src
)
136 for(i
=0; (i
<eNRNB
); i
++)
137 dest
->n
[i
]=src
->n
[i
];
140 void add_nrnb(t_nrnb
*dest
, t_nrnb
*s1
, t_nrnb
*s2
)
144 for(i
=0; (i
<eNRNB
); i
++)
145 dest
->n
[i
]=s1
->n
[i
]+s2
->n
[i
];
148 void print_nrnb(FILE *out
, t_nrnb
*nrnb
)
152 for(i
=0; (i
<eNRNB
); i
++)
153 fprintf(out
,"%14s %10.0f.\n",nbdata
[i
].name
,nrnb
->n
[i
]);
156 void _inc_nrnb(t_nrnb
*nrnb
,int enr
,int inc
,char *file
,int line
)
160 fprintf(stdlog
,"nrnb %15s(%2d) incremented with %8d from file %s line %d\n",
161 nbdata
[enr
].name
,enr
,inc
,file
,line
);
165 void print_perf(FILE *out
,double cputime
,double realtime
,real runtime
,
166 t_nrnb
*nrnb
,int nprocs
)
169 double nbfs
,mni
,frac
,tfrac
,mflop
,tflop
;
171 if (cputime
== 0.0) {
172 fprintf(out
,"cputime = 0! Infinite Giga flopses! \n");
177 for(i
=0; (i
<eNR_INLOOP
); i
++) {
178 if (strstr(nbdata
[i
].name
,"H2O") != NULL
)
179 nbfs
+= 3e-6*nrnb
->n
[i
];
181 nbfs
+= 1e-6*nrnb
->n
[i
];
184 for(i
=0; (i
<eNRNB
); i
++)
185 tflop
+=1e-6*nrnb
->n
[i
]*nbdata
[i
].flop
;
188 fprintf(out
,"No MEGA Flopsen this time\n");
191 fprintf(out
,"\tM E G A - F L O P S A C C O U N T I N G\n\n");
194 fprintf(out
,"\tBased on real time for parallel computer.\n");
196 fprintf(out
,"%15s %12s %12s %8s\n",
197 "Computing:","M-Number","M-Flop's","% Flop's");
200 for(i
=0; (i
<eNRNB
); i
++) {
201 mni
= 1e-6*nrnb
->n
[i
];
202 mflop
+= mni
*nbdata
[i
].flop
;
203 frac
= 100.0*mni
*nbdata
[i
].flop
/tflop
;
206 fprintf(out
,"%15s %12.6f %12.6f %6.1f\n",
207 nbdata
[i
].name
,mni
,mni
*nbdata
[i
].flop
,frac
);
209 fprintf(out
,"%15s %12s %12.5f %6.1f\n\n",
210 "Total","",mflop
,tfrac
);
211 fprintf(out
,"%12s %10s %10s %8s\n","","CPU (s)","Real (s)","(%)");
212 fprintf(out
,"%12s %10.3f %10.3f %8.1f\n","Time:",
213 cputime
, realtime
, 100.0*cputime
/realtime
);
215 fprintf(out
,"%12s %10s","","");
216 pr_difftime(out
,cputime
);
218 if(runtime
>0) { /* runtime=0 means calc energies only */
219 fprintf(out
,"%12s %10s %10s %10s %10s\n",
220 "","(Mnbf/s)","(MFlops)","(ps/CPU hour)","(CPU hour/ns)");
221 fprintf(out
,"%12s %10.3f %10.3f %10.3f %10.3f\n","Performance:",
222 nbfs
/cputime
,mflop
/cputime
,
223 runtime
*3600/cputime
,1000*cputime
/(3600*runtime
));
227 int cost_nrnb(int enr
)
229 return nbdata
[enr
].flop
;
232 char *nrnb_str(int enr
)
234 return nbdata
[enr
].name
;
237 static int force_index
[]={
238 eNR_BONDS
, eNR_ANGLES
, eNR_PROPER
, eNR_IMPROPER
,
239 eNR_RB
, eNR_DISRES
, eNR_POSRES
,
240 eNR_NS
, eNR_INL_IATOM
242 #define NFORCE_INDEX asize(force_index)
244 static int shake_index
[]={
245 eNR_SHAKE
, eNR_SHAKE_RIJ
, eNR_SETTLE
, eNR_UPDATE
, eNR_PCOUPL
,
246 eNR_SHAKE_VIR
, eNR_SHAKE_V
, eNR_PSHAKEINITLD
, eNR_PSHAKEINITMD
, eNR_PSHAKE
248 #define NSHAKE_INDEX asize(shake_index)
250 static double pr_av(FILE *log
,int nprocs
,double fav
,double ftot
[],char *title
)
258 fprintf(log
,"\n%15s:",title
);
259 for(i
=0; (i
<nprocs
); i
++) {
260 dperc
=(100.0*ftot
[i
])/fav
;
263 fprintf(log
,"%3d ",perc
);
267 fprintf(log
,"%6d%%\n\n",perc
);
275 void pr_load(FILE *log
,int nprocs
,t_nrnb nrnb
[])
278 double dperc
,unb
,uf
,us
;
287 for(i
=0; (i
<nprocs
); i
++) {
288 add_nrnb(av
,av
,&(nrnb
[i
]));
289 /* Cost due to forces */
290 for(j
=0; (j
<eNR_INLOOP
); j
++)
291 ftot
[i
]+=nrnb
[i
].n
[j
]*cost_nrnb(j
);
292 for(j
=0; (j
<NFORCE_INDEX
); j
++)
293 ftot
[i
]+=nrnb
[i
].n
[force_index
[j
]]*cost_nrnb(force_index
[j
]);
295 for(j
=0; (j
<NSHAKE_INDEX
); j
++) {
296 stot
[i
]+=nrnb
[i
].n
[shake_index
[j
]]*cost_nrnb(shake_index
[j
]);
299 for(j
=0; (j
<eNRNB
); j
++)
300 av
->n
[j
]=av
->n
[j
]/(double)nprocs
;
302 fprintf(log
,"\nDetailed load balancing info in percentage of average\n");
304 fprintf(log
,"Type CPU:");
305 for(i
=0; (i
<nprocs
); i
++)
306 fprintf(log
,"%3d ",i
);
307 fprintf(log
,"Scaling\n");
308 fprintf(log
,"----------------");
309 for(i
=0; (i
<nprocs
); i
++)
311 fprintf(log
,"-------\n");
313 for(j
=0; (j
<eNRNB
); j
++) {
316 fprintf(log
,"%15s:",nrnb_str(j
));
317 for(i
=0; (i
<nprocs
); i
++) {
318 dperc
=(100.0*nrnb
[i
].n
[j
])/av
->n
[j
];
321 fprintf(log
,"%3d ",perc
);
325 fprintf(log
,"%6d%%\n",perc
);
332 for(i
=0; (i
<nprocs
); i
++) {
336 uf
=pr_av(log
,nprocs
,fav
,ftot
,"Total Force");
337 us
=pr_av(log
,nprocs
,sav
,stot
,"Total Shake");
339 unb
=(uf
*fav
+us
*sav
)/(fav
+sav
);
342 fprintf(log
,"\nTotal Scaling: %.0f%% of max performance\n\n",unb
);