3 * This source code is part of
7 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * GROningen Mixture of Alchemy and Childrens' Stories
41 #include "gmx_fatal.h"
55 static const t_nrnb_data nbdata
[eNRNB
] = {
56 { "LJ", 33 }, /* nb_kernel010 */
57 { "Buckingham", 61 }, /* nb_kernel020 */
58 { "VdW(T)", 54 }, /* nb_kernel030 */
59 { "Coulomb", 27 }, /* nb_kernel100 */
60 { "Coulomb [W3]", 80 }, /* nb_kernel101 */
61 { "Coulomb [W3-W3]", 234 }, /* nb_kernel102 */
62 { "Coulomb [W4]", 80 }, /* nb_kernel103 */
63 { "Coulomb [W4-W4]", 234 }, /* nb_kernel104 */
64 { "Coulomb + LJ", 38 }, /* nb_kernel110 */
65 { "Coulomb + LJ [W3]", 91 }, /* nb_kernel111 */
66 { "Coulomb + LJ [W3-W3]", 245 }, /* nb_kernel112 */
67 { "Coulomb + LJ [W4]", 113 }, /* nb_kernel113 */
68 { "Coulomb + LJ [W4-W4]", 267 }, /* nb_kernel114 */
69 { "Coulomb + Bham ", 64 }, /* nb_kernel120 */
70 { "Coulomb + Bham [W3]", 117 }, /* nb_kernel121 */
71 { "Coulomb + Bham [W3-W3]", 271 }, /* nb_kernel122 */
72 { "Coulomb + Bham [W4]", 141 }, /* nb_kernel123 */
73 { "Coulomb + Bham [W4-W4]", 295 }, /* nb_kernel124 */
74 { "Coulomb + VdW(T) ", 59 }, /* nb_kernel130 */
75 { "Coulomb + VdW(T) [W3]", 112 }, /* nb_kernel131 */
76 { "Coulomb + VdW(T) [W3-W3]", 266 }, /* nb_kernel132 */
77 { "Coulomb + VdW(T) [W4]", 134 }, /* nb_kernel133 */
78 { "Coulomb + VdW(T) [W4-W4]", 288 }, /* nb_kernel134 */
79 { "RF Coul", 33 }, /* nb_kernel200 */
80 { "RF Coul [W3]", 98 }, /* nb_kernel201 */
81 { "RF Coul [W3-W3]", 288 }, /* nb_kernel202 */
82 { "RF Coul [W4]", 98 }, /* nb_kernel203 */
83 { "RF Coul [W4-W4]", 288 }, /* nb_kernel204 */
84 { "RF Coul + LJ", 44 }, /* nb_kernel210 */
85 { "RF Coul + LJ [W3]", 109 }, /* nb_kernel211 */
86 { "RF Coul + LJ [W3-W3]", 299 }, /* nb_kernel212 */
87 { "RF Coul + LJ [W4]", 131 }, /* nb_kernel213 */
88 { "RF Coul + LJ [W4-W4]", 321 }, /* nb_kernel214 */
89 { "RF Coul + Bham ", 70 }, /* nb_kernel220 */
90 { "RF Coul + Bham [W3]", 135 }, /* nb_kernel221 */
91 { "RF Coul + Bham [W3-W3]", 325 }, /* nb_kernel222 */
92 { "RF Coul + Bham [W4]", 159 }, /* nb_kernel223 */
93 { "RF Coul + Bham [W4-W4]", 349 }, /* nb_kernel224 */
94 { "RF Coul + VdW(T) ", 65 }, /* nb_kernel230 */
95 { "RF Coul + VdW(T) [W3]", 130 }, /* nb_kernel231 */
96 { "RF Coul + VdW(T) [W3-W3]", 320 }, /* nb_kernel232 */
97 { "RF Coul + VdW(T) [W4]", 152 }, /* nb_kernel233 */
98 { "RF Coul + VdW(T) [W4-W4]", 342 }, /* nb_kernel234 */
99 { "Coul(T)", 42 }, /* nb_kernel300 */
100 { "Coul(T) [W3]", 125 }, /* nb_kernel301 */
101 { "Coul(T) [W3-W3]", 369 }, /* nb_kernel302 */
102 { "Coul(T) [W4]", 125 }, /* nb_kernel303 */
103 { "Coul(T) [W4-W4]", 369 }, /* nb_kernel304 */
104 { "Coul(T) + LJ", 55 }, /* nb_kernel310 */
105 { "Coul(T) + LJ [W3]", 138 }, /* nb_kernel311 */
106 { "Coul(T) + LJ [W3-W3]", 382 }, /* nb_kernel312 */
107 { "Coul(T) + LJ [W4]", 158 }, /* nb_kernel313 */
108 { "Coul(T) + LJ [W4-W4]", 402 }, /* nb_kernel314 */
109 { "Coul(T) + Bham", 81 }, /* nb_kernel320 */
110 { "Coul(T) + Bham [W3]", 164 }, /* nb_kernel321 */
111 { "Coul(T) + Bham [W3-W3]", 408 }, /* nb_kernel322 */
112 { "Coul(T) + Bham [W4]", 186 }, /* nb_kernel323 */
113 { "Coul(T) + Bham [W4-W4]", 430 }, /* nb_kernel324 */
114 { "Coul(T) + VdW(T)", 68 }, /* nb_kernel330 */
115 { "Coul(T) + VdW(T) [W3]", 151 }, /* nb_kernel331 */
116 { "Coul(T) + VdW(T) [W3-W3]", 395 }, /* nb_kernel332 */
117 { "Coul(T) + VdW(T) [W4]", 179 }, /* nb_kernel333 */
118 { "Coul(T) + VdW(T) [W4-W4]", 423 }, /* nb_kernel334 */
119 { "Generalized Born Coulomb", 48 }, /* nb_kernel400 */
120 { "GB Coulomb + LJ", 61 }, /* nb_kernel410 */
121 { "GB Coulomb + VdW(T)", 79 }, /* nb_kernel430 */
122 { "LJ NF", 19 }, /* nb_kernel010nf */
123 { "Buckingham NF", 48 }, /* nb_kernel020nf */
124 { "VdW(T) NF", 33 }, /* nb_kernel030nf */
125 { "Coulomb NF", 16 }, /* nb_kernel100nf */
126 { "Coulomb [W3] NF", 47 }, /* nb_kernel101nf */
127 { "Coulomb [W3-W3] NF", 135 }, /* nb_kernel102nf */
128 { "Coulomb [W4] NF", 47 }, /* nb_kernel103nf */
129 { "Coulomb [W4-W4] NF", 135 }, /* nb_kernel104nf */
130 { "Coulomb + LJ NF", 24 }, /* nb_kernel110nf */
131 { "Coulomb + LJ [W3] NF", 55 }, /* nb_kernel111nf */
132 { "Coulomb + LJ [W3-W3] NF", 143 }, /* nb_kernel112nf */
133 { "Coulomb + LJ [W4] NF", 66 }, /* nb_kernel113nf */
134 { "Coulomb + LJ [W4-W4] NF", 154 }, /* nb_kernel114nf */
135 { "Coulomb + Bham NF", 51 }, /* nb_kernel120nf */
136 { "Coulomb + Bham [W3] NF", 82 }, /* nb_kernel121nf */
137 { "Coulomb + Bham [W3-W3] NF", 170 }, /* nb_kernel122nf */
138 { "Coulomb + Bham [W4] NF", 95 }, /* nb_kernel123nf */
139 { "Coulomb + Bham [W4-W4] NF", 183 }, /* nb_kernel124nf */
140 { "Coulomb + VdW(T) NF", 36 }, /* nb_kernel130nf */
141 { "Coulomb + VdW(T) [W3] NF", 67 }, /* nb_kernel131nf */
142 { "Coulomb + VdW(T) [W3-W3] NF", 155 }, /* nb_kernel132nf */
143 { "Coulomb + VdW(T) [W4] NF", 80 }, /* nb_kernel133nf */
144 { "Coulomb + VdW(T) [W4-W4] NF", 168 }, /* nb_kernel134nf */
145 { "RF Coul NF", 19 }, /* nb_kernel200nf */
146 { "RF Coul [W3] NF", 56 }, /* nb_kernel201nf */
147 { "RF Coul [W3-W3] NF", 162 }, /* nb_kernel202nf */
148 { "RF Coul [W4] NF", 56 }, /* nb_kernel203nf */
149 { "RF Coul [W4-W4] NF", 162 }, /* nb_kernel204nf */
150 { "RF Coul + LJ NF", 27 }, /* nb_kernel210nf */
151 { "RF Coul + LJ [W3] NF", 64 }, /* nb_kernel211nf */
152 { "RF Coul + LJ [W3-W3] NF", 170 }, /* nb_kernel212nf */
153 { "RF Coul + LJ [W4] NF", 75 }, /* nb_kernel213nf */
154 { "RF Coul + LJ [W4-W4] NF", 181 }, /* nb_kernel214nf */
155 { "RF Coul + Bham NF", 54 }, /* nb_kernel220nf */
156 { "RF Coul + Bham [W3] NF", 91 }, /* nb_kernel221nf */
157 { "RF Coul + Bham [W3-W3] NF", 197 }, /* nb_kernel222nf */
158 { "RF Coul + Bham [W4] NF", 104 }, /* nb_kernel223nf */
159 { "RF Coul + Bham [W4-W4] NF", 210 }, /* nb_kernel224nf */
160 { "RF Coul + VdW(T) NF", 39 }, /* nb_kernel230nf */
161 { "RF Coul + VdW(T) [W3] NF", 76 }, /* nb_kernel231nf */
162 { "RF Coul + VdW(T) [W3-W3] NF", 182 }, /* nb_kernel232nf */
163 { "RF Coul + VdW(T) [W4] NF", 89 }, /* nb_kernel233nf */
164 { "RF Coul + VdW(T) [W4-W4] NF", 195 }, /* nb_kernel234nf */
165 { "Coul(T) NF", 26 }, /* nb_kernel300nf */
166 { "Coul(T) [W3] NF", 77 }, /* nb_kernel301nf */
167 { "Coul(T) [W3-W3] NF", 225 }, /* nb_kernel302nf */
168 { "Coul(T) [W4] NF", 77 }, /* nb_kernel303nf */
169 { "Coul(T) [W4-W4] NF", 225 }, /* nb_kernel304nf */
170 { "Coul(T) + LJ NF", 34 }, /* nb_kernel310nf */
171 { "Coul(T) + LJ [W3] NF", 85 }, /* nb_kernel311nf */
172 { "Coul(T) + LJ [W3-W3] NF", 233 }, /* nb_kernel312nf */
173 { "Coul(T) + LJ [W4] NF", 96 }, /* nb_kernel313nf */
174 { "Coul(T) + LJ [W4-W4] NF", 244 }, /* nb_kernel314nf */
175 { "Coul(T) + Bham NF", 61 }, /* nb_kernel320nf */
176 { "Coul(T) + Bham [W3] NF", 112 }, /* nb_kernel321nf */
177 { "Coul(T) + Bham [W3-W3] NF", 260 }, /* nb_kernel322nf */
178 { "Coul(T) + Bham [W4] NF", 125 }, /* nb_kernel323nf */
179 { "Coul(T) + Bham [W4-W4] NF", 273 }, /* nb_kernel324nf */
180 { "Coul(T) + VdW(T) NF", 42 }, /* nb_kernel330nf */
181 { "Coul(T) + VdW(T) [W3] NF", 93 }, /* nb_kernel331nf */
182 { "Coul(T) + VdW(T) [W3-W3] NF", 241 }, /* nb_kernel332nf */
183 { "Coul(T) + VdW(T) [W4] NF", 110 }, /* nb_kernel333nf */
184 { "Coul(T) + VdW(T) [W4-W4] NF", 258 }, /* nb_kernel334nf */
185 { "Generalized Born Coulomb NF", 29 }, /* nb_kernel400nf */
186 { "GB Coulomb + LJ NF", 37 }, /* nb_kernel410nf */
187 { "GB Coulomb + VdW(T) NF", 49 }, /* nb_kernel430nf */
188 { "Free energy innerloop", 150 }, /* free energy, estimate */
189 { "All-vs-All, Coul + LJ", 38 },
190 { "All-vs-All, GB + LJ", 61 },
191 { "Outer nonbonded loop", 10 },
192 { "1,4 nonbonded interactions", 90 },
193 { "Born radii (Still)", 47 },
194 { "Born radii (HCT/OBC)", 183 },
195 { "Born force chain rule", 15 },
196 { "All-vs-All Still radii", 47 },
197 { "All-vs-All HCT/OBC radii", 183 },
198 { "All-vs-All Born chain rule", 15 },
199 { "Calc Weights", 36 },
201 { "Spread Q Bspline", 2 },
203 { "Gather F Bspline", 12 },
205 { "Convolution", 4 },
208 { "Reset In Box", 3 },
214 { "FENE Bonds", 58 },
215 { "Tab. Bonds", 62 },
217 { "G96Angles", 150 },
218 { "Quartic Angles", 160 },
219 { "Tab. Angles", 169 },
221 { "Impropers", 208 },
222 { "RB-Dihedrals", 247 },
223 { "Four. Dihedrals", 247 },
224 { "Tab. Dihedrals", 227 },
225 { "Dist. Restr.", 200 },
226 { "Orient. Restr.", 200 },
227 { "Dihedral Restr.", 200 },
228 { "Pos. Restr.", 50 },
229 { "Angle Restr.", 191 },
230 { "Angle Restr. Z", 164 },
231 { "Morse Potent.", 58 },
232 { "Cubic Bonds", 54 },
234 { "Water Pol.", 62 },
235 { "Thole Pol.", 296 },
238 { "Ext.ens. Update", 54 },
245 { "Constraint-V", 8 },
246 { "Shake-Init", 10 },
247 { "Constraint-Vir", 24 },
249 { "Virtual Site 2", 23 },
250 { "Virtual Site 3", 37 },
251 { "Virtual Site 3fd", 95 },
252 { "Virtual Site 3fad", 176 },
253 { "Virtual Site 3out", 87 },
254 { "Virtual Site 4fd", 110 },
255 { "Virtual Site 4fdn", 254 },
256 { "Virtual Site N", 15 },
257 { "Mixed Generalized Born stuff", 10 }
261 void init_nrnb(t_nrnb
*nrnb
)
265 for(i
=0; (i
<eNRNB
); i
++)
269 void cp_nrnb(t_nrnb
*dest
, t_nrnb
*src
)
273 for(i
=0; (i
<eNRNB
); i
++)
274 dest
->n
[i
]=src
->n
[i
];
277 void add_nrnb(t_nrnb
*dest
, t_nrnb
*s1
, t_nrnb
*s2
)
281 for(i
=0; (i
<eNRNB
); i
++)
282 dest
->n
[i
]=s1
->n
[i
]+s2
->n
[i
];
285 void print_nrnb(FILE *out
, t_nrnb
*nrnb
)
289 for(i
=0; (i
<eNRNB
); i
++)
291 fprintf(out
," %-26s %10.0f.\n",nbdata
[i
].name
,nrnb
->n
[i
]);
294 void _inc_nrnb(t_nrnb
*nrnb
,int enr
,int inc
,char *file
,int line
)
298 printf("nrnb %15s(%2d) incremented with %8d from file %s line %d\n",
299 nbdata
[enr
].name
,enr
,inc
,file
,line
);
303 void print_flop(FILE *out
,t_nrnb
*nrnb
,double *nbfs
,double *mflop
)
306 double mni
,frac
,tfrac
,tflop
;
307 const char *myline
= "-----------------------------------------------------------------------------";
310 for(i
=0; (i
<eNR_NBKERNEL_NR
); i
++) {
311 if (strstr(nbdata
[i
].name
,"W3-W3") != NULL
)
312 *nbfs
+= 9e-6*nrnb
->n
[i
];
313 else if (strstr(nbdata
[i
].name
,"W3") != NULL
)
314 *nbfs
+= 3e-6*nrnb
->n
[i
];
315 else if (strstr(nbdata
[i
].name
,"W4-W4") != NULL
)
316 *nbfs
+= 10e-6*nrnb
->n
[i
];
317 else if (strstr(nbdata
[i
].name
,"W4") != NULL
)
318 *nbfs
+= 4e-6*nrnb
->n
[i
];
320 *nbfs
+= 1e-6*nrnb
->n
[i
];
323 for(i
=0; (i
<eNRNB
); i
++)
324 tflop
+=1e-6*nrnb
->n
[i
]*nbdata
[i
].flop
;
327 fprintf(out
,"No MEGA Flopsen this time\n");
331 fprintf(out
,"\n\tM E G A - F L O P S A C C O U N T I N G\n\n");
335 fprintf(out
," RF=Reaction-Field FE=Free Energy SCFE=Soft-Core/Free Energy\n");
336 fprintf(out
," T=Tabulated W3=SPC/TIP3p W4=TIP4p (single or pairs)\n");
337 fprintf(out
," NF=No Forces\n\n");
339 fprintf(out
," %-32s %16s %15s %7s\n",
340 "Computing:","M-Number","M-Flops","% Flops");
341 fprintf(out
,"%s\n",myline
);
345 for(i
=0; (i
<eNRNB
); i
++) {
346 mni
= 1e-6*nrnb
->n
[i
];
347 *mflop
+= mni
*nbdata
[i
].flop
;
348 frac
= 100.0*mni
*nbdata
[i
].flop
/tflop
;
351 fprintf(out
," %-32s %16.6f %15.3f %6.1f\n",
352 nbdata
[i
].name
,mni
,mni
*nbdata
[i
].flop
,frac
);
355 fprintf(out
,"%s\n",myline
);
356 fprintf(out
," %-32s %16s %15.3f %6.1f\n",
357 "Total","",*mflop
,tfrac
);
358 fprintf(out
,"%s\n\n",myline
);
362 void print_perf(FILE *out
,double nodetime
,double realtime
,int nprocs
,
363 gmx_large_int_t nsteps
,real delta_t
,
364 double nbfs
,double mflop
)
370 if (nodetime
== 0.0) {
371 fprintf(out
,"nodetime = 0! Infinite Giga flopses!\n");
377 fprintf(out
,"\tParallel run - timing based on wallclock.\n\n");
380 if ((nodetime
> 0) && (realtime
> 0)) {
381 fprintf(out
,"%12s %10s %10s %8s\n","","NODE (s)","Real (s)","(%)");
382 fprintf(out
,"%12s %10.3f %10.3f %8.1f\n","Time:",
383 nodetime
, realtime
, 100.0*nodetime
/realtime
);
385 fprintf(out
,"%12s %10s","","");
386 pr_difftime(out
,nodetime
);
389 mflop
= mflop
/nodetime
;
390 runtime
= nsteps
*delta_t
;
391 fprintf(out
,"%12s %10s %10s %10s %10s\n",
392 "","(Mnbf/s)",(mflop
> 1000) ? "(GFlops)" : "(MFlops)",
393 "(ns/day)","(hour/ns)");
394 fprintf(out
,"%12s %10.3f %10.3f %10.3f %10.3f\n","Performance:",
395 nbfs
/nodetime
,(mflop
> 1000) ? (mflop
/1000) : mflop
,
396 runtime
*24*3.6/nodetime
,1000*nodetime
/(3600*runtime
));
398 fprintf(out
,"%12s %10s %10s %14s\n",
399 "","(Mnbf/s)",(mflop
> 1000) ? "(GFlops)" : "(MFlops)",
401 fprintf(out
,"%12s %10.3f %10.3f %14.1f\n","Performance:",
402 nbfs
/nodetime
,(mflop
> 1000) ? (mflop
/1000) : mflop
,
403 nsteps
*3600.0/nodetime
);
408 int cost_nrnb(int enr
)
410 return nbdata
[enr
].flop
;
413 const char *nrnb_str(int enr
)
415 return nbdata
[enr
].name
;
418 static const int force_index
[]={
419 eNR_BONDS
, eNR_ANGLES
, eNR_PROPER
, eNR_IMPROPER
,
420 eNR_RB
, eNR_DISRES
, eNR_ORIRES
, eNR_POSRES
,
421 eNR_NS
, eNR_NBKERNEL_OUTER
423 #define NFORCE_INDEX asize(force_index)
425 static const int constr_index
[]={
426 eNR_SHAKE
, eNR_SHAKE_RIJ
, eNR_SETTLE
, eNR_UPDATE
, eNR_PCOUPL
,
427 eNR_CONSTR_VIR
,eNR_CONSTR_V
429 #define NCONSTR_INDEX asize(constr_index)
431 static double pr_av(FILE *log
,t_commrec
*cr
,
432 double fav
,double ftot
[],const char *title
)
439 fav
/= cr
->nnodes
- cr
->npmenodes
;
440 fprintf(log
,"\n %-26s",title
);
441 for(i
=0; (i
<cr
->nnodes
); i
++) {
442 dperc
=(100.0*ftot
[i
])/fav
;
445 fprintf(log
,"%3d ",perc
);
449 fprintf(log
,"%6d%%\n\n",perc
);
457 void pr_load(FILE *log
,t_commrec
*cr
,t_nrnb nrnb
[])
460 double dperc
,unb
,uf
,us
;
466 snew(ftot
,cr
->nnodes
);
467 snew(stot
,cr
->nnodes
);
469 for(i
=0; (i
<cr
->nnodes
); i
++) {
470 add_nrnb(av
,av
,&(nrnb
[i
]));
471 /* Cost due to forces */
472 for(j
=0; (j
<eNR_NBKERNEL_NR
); j
++)
473 ftot
[i
]+=nrnb
[i
].n
[j
]*cost_nrnb(j
);
474 for(j
=0; (j
<NFORCE_INDEX
); j
++)
475 ftot
[i
]+=nrnb
[i
].n
[force_index
[j
]]*cost_nrnb(force_index
[j
]);
477 for(j
=0; (j
<NCONSTR_INDEX
); j
++) {
478 stot
[i
]+=nrnb
[i
].n
[constr_index
[j
]]*cost_nrnb(constr_index
[j
]);
481 for(j
=0; (j
<eNRNB
); j
++)
482 av
->n
[j
]=av
->n
[j
]/(double)(cr
->nnodes
- cr
->npmenodes
);
484 fprintf(log
,"\nDetailed load balancing info in percentage of average\n");
486 fprintf(log
," Type NODE:");
487 for(i
=0; (i
<cr
->nnodes
); i
++)
488 fprintf(log
,"%3d ",i
);
489 fprintf(log
,"Scaling\n");
490 fprintf(log
,"---------------------------");
491 for(i
=0; (i
<cr
->nnodes
); i
++)
493 fprintf(log
,"-------\n");
495 for(j
=0; (j
<eNRNB
); j
++) {
498 fprintf(log
," %-26s",nrnb_str(j
));
499 for(i
=0; (i
<cr
->nnodes
); i
++) {
500 dperc
=(100.0*nrnb
[i
].n
[j
])/av
->n
[j
];
503 fprintf(log
,"%3d ",perc
);
507 fprintf(log
,"%6d%%\n",perc
);
514 for(i
=0; (i
<cr
->nnodes
); i
++) {
518 uf
=pr_av(log
,cr
,fav
,ftot
,"Total Force");
519 us
=pr_av(log
,cr
,sav
,stot
,"Total Constr.");
521 unb
=(uf
*fav
+us
*sav
)/(fav
+sav
);
524 fprintf(log
,"\nTotal Scaling: %.0f%% of max performance\n\n",unb
);