Don't use POSIX fnmatch() for pattern matching.
[gromacs/qmmm-gamess-us.git] / src / gmxlib / nrnb.c
blobf081fdee1b4519f86441ab52f05e093a9196d6d9
1 /*
2 *
3 * This source code is part of
4 *
5 * G R O M A C S
6 *
7 * GROningen MAchine for Chemical Simulations
8 *
9 * VERSION 3.2.0
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
32 * And Hey:
33 * GROningen Mixture of Alchemy and Childrens' Stories
35 #ifdef HAVE_CONFIG_H
36 #include <config.h>
37 #endif
39 #include <string.h>
40 #include "sysstuff.h"
41 #include "gmx_fatal.h"
42 #include "names.h"
43 #include "macros.h"
44 #include "nrnb.h"
45 #include "main.h"
46 #include "smalloc.h"
47 #include "copyrite.h"
49 typedef struct {
50 const char *name;
51 int flop;
52 } t_nrnb_data;
55 static const t_nrnb_data nbdata[eNRNB] = {
56 { "LJ", 33 }, /* nb_kernel010 */
57 { "Buckingham", 61 }, /* nb_kernel020 */
58 { "VdW(T)", 54 }, /* nb_kernel030 */
59 { "Coulomb", 27 }, /* nb_kernel100 */
60 { "Coulomb [W3]", 80 }, /* nb_kernel101 */
61 { "Coulomb [W3-W3]", 234 }, /* nb_kernel102 */
62 { "Coulomb [W4]", 80 }, /* nb_kernel103 */
63 { "Coulomb [W4-W4]", 234 }, /* nb_kernel104 */
64 { "Coulomb + LJ", 38 }, /* nb_kernel110 */
65 { "Coulomb + LJ [W3]", 91 }, /* nb_kernel111 */
66 { "Coulomb + LJ [W3-W3]", 245 }, /* nb_kernel112 */
67 { "Coulomb + LJ [W4]", 113 }, /* nb_kernel113 */
68 { "Coulomb + LJ [W4-W4]", 267 }, /* nb_kernel114 */
69 { "Coulomb + Bham ", 64 }, /* nb_kernel120 */
70 { "Coulomb + Bham [W3]", 117 }, /* nb_kernel121 */
71 { "Coulomb + Bham [W3-W3]", 271 }, /* nb_kernel122 */
72 { "Coulomb + Bham [W4]", 141 }, /* nb_kernel123 */
73 { "Coulomb + Bham [W4-W4]", 295 }, /* nb_kernel124 */
74 { "Coulomb + VdW(T) ", 59 }, /* nb_kernel130 */
75 { "Coulomb + VdW(T) [W3]", 112 }, /* nb_kernel131 */
76 { "Coulomb + VdW(T) [W3-W3]", 266 }, /* nb_kernel132 */
77 { "Coulomb + VdW(T) [W4]", 134 }, /* nb_kernel133 */
78 { "Coulomb + VdW(T) [W4-W4]", 288 }, /* nb_kernel134 */
79 { "RF Coul", 33 }, /* nb_kernel200 */
80 { "RF Coul [W3]", 98 }, /* nb_kernel201 */
81 { "RF Coul [W3-W3]", 288 }, /* nb_kernel202 */
82 { "RF Coul [W4]", 98 }, /* nb_kernel203 */
83 { "RF Coul [W4-W4]", 288 }, /* nb_kernel204 */
84 { "RF Coul + LJ", 44 }, /* nb_kernel210 */
85 { "RF Coul + LJ [W3]", 109 }, /* nb_kernel211 */
86 { "RF Coul + LJ [W3-W3]", 299 }, /* nb_kernel212 */
87 { "RF Coul + LJ [W4]", 131 }, /* nb_kernel213 */
88 { "RF Coul + LJ [W4-W4]", 321 }, /* nb_kernel214 */
89 { "RF Coul + Bham ", 70 }, /* nb_kernel220 */
90 { "RF Coul + Bham [W3]", 135 }, /* nb_kernel221 */
91 { "RF Coul + Bham [W3-W3]", 325 }, /* nb_kernel222 */
92 { "RF Coul + Bham [W4]", 159 }, /* nb_kernel223 */
93 { "RF Coul + Bham [W4-W4]", 349 }, /* nb_kernel224 */
94 { "RF Coul + VdW(T) ", 65 }, /* nb_kernel230 */
95 { "RF Coul + VdW(T) [W3]", 130 }, /* nb_kernel231 */
96 { "RF Coul + VdW(T) [W3-W3]", 320 }, /* nb_kernel232 */
97 { "RF Coul + VdW(T) [W4]", 152 }, /* nb_kernel233 */
98 { "RF Coul + VdW(T) [W4-W4]", 342 }, /* nb_kernel234 */
99 { "Coul(T)", 42 }, /* nb_kernel300 */
100 { "Coul(T) [W3]", 125 }, /* nb_kernel301 */
101 { "Coul(T) [W3-W3]", 369 }, /* nb_kernel302 */
102 { "Coul(T) [W4]", 125 }, /* nb_kernel303 */
103 { "Coul(T) [W4-W4]", 369 }, /* nb_kernel304 */
104 { "Coul(T) + LJ", 55 }, /* nb_kernel310 */
105 { "Coul(T) + LJ [W3]", 138 }, /* nb_kernel311 */
106 { "Coul(T) + LJ [W3-W3]", 382 }, /* nb_kernel312 */
107 { "Coul(T) + LJ [W4]", 158 }, /* nb_kernel313 */
108 { "Coul(T) + LJ [W4-W4]", 402 }, /* nb_kernel314 */
109 { "Coul(T) + Bham", 81 }, /* nb_kernel320 */
110 { "Coul(T) + Bham [W3]", 164 }, /* nb_kernel321 */
111 { "Coul(T) + Bham [W3-W3]", 408 }, /* nb_kernel322 */
112 { "Coul(T) + Bham [W4]", 186 }, /* nb_kernel323 */
113 { "Coul(T) + Bham [W4-W4]", 430 }, /* nb_kernel324 */
114 { "Coul(T) + VdW(T)", 68 }, /* nb_kernel330 */
115 { "Coul(T) + VdW(T) [W3]", 151 }, /* nb_kernel331 */
116 { "Coul(T) + VdW(T) [W3-W3]", 395 }, /* nb_kernel332 */
117 { "Coul(T) + VdW(T) [W4]", 179 }, /* nb_kernel333 */
118 { "Coul(T) + VdW(T) [W4-W4]", 423 }, /* nb_kernel334 */
119 { "Generalized Born Coulomb", 48 }, /* nb_kernel400 */
120 { "GB Coulomb + LJ", 61 }, /* nb_kernel410 */
121 { "GB Coulomb + VdW(T)", 79 }, /* nb_kernel430 */
122 { "LJ NF", 19 }, /* nb_kernel010nf */
123 { "Buckingham NF", 48 }, /* nb_kernel020nf */
124 { "VdW(T) NF", 33 }, /* nb_kernel030nf */
125 { "Coulomb NF", 16 }, /* nb_kernel100nf */
126 { "Coulomb [W3] NF", 47 }, /* nb_kernel101nf */
127 { "Coulomb [W3-W3] NF", 135 }, /* nb_kernel102nf */
128 { "Coulomb [W4] NF", 47 }, /* nb_kernel103nf */
129 { "Coulomb [W4-W4] NF", 135 }, /* nb_kernel104nf */
130 { "Coulomb + LJ NF", 24 }, /* nb_kernel110nf */
131 { "Coulomb + LJ [W3] NF", 55 }, /* nb_kernel111nf */
132 { "Coulomb + LJ [W3-W3] NF", 143 }, /* nb_kernel112nf */
133 { "Coulomb + LJ [W4] NF", 66 }, /* nb_kernel113nf */
134 { "Coulomb + LJ [W4-W4] NF", 154 }, /* nb_kernel114nf */
135 { "Coulomb + Bham NF", 51 }, /* nb_kernel120nf */
136 { "Coulomb + Bham [W3] NF", 82 }, /* nb_kernel121nf */
137 { "Coulomb + Bham [W3-W3] NF", 170 }, /* nb_kernel122nf */
138 { "Coulomb + Bham [W4] NF", 95 }, /* nb_kernel123nf */
139 { "Coulomb + Bham [W4-W4] NF", 183 }, /* nb_kernel124nf */
140 { "Coulomb + VdW(T) NF", 36 }, /* nb_kernel130nf */
141 { "Coulomb + VdW(T) [W3] NF", 67 }, /* nb_kernel131nf */
142 { "Coulomb + VdW(T) [W3-W3] NF", 155 }, /* nb_kernel132nf */
143 { "Coulomb + VdW(T) [W4] NF", 80 }, /* nb_kernel133nf */
144 { "Coulomb + VdW(T) [W4-W4] NF", 168 }, /* nb_kernel134nf */
145 { "RF Coul NF", 19 }, /* nb_kernel200nf */
146 { "RF Coul [W3] NF", 56 }, /* nb_kernel201nf */
147 { "RF Coul [W3-W3] NF", 162 }, /* nb_kernel202nf */
148 { "RF Coul [W4] NF", 56 }, /* nb_kernel203nf */
149 { "RF Coul [W4-W4] NF", 162 }, /* nb_kernel204nf */
150 { "RF Coul + LJ NF", 27 }, /* nb_kernel210nf */
151 { "RF Coul + LJ [W3] NF", 64 }, /* nb_kernel211nf */
152 { "RF Coul + LJ [W3-W3] NF", 170 }, /* nb_kernel212nf */
153 { "RF Coul + LJ [W4] NF", 75 }, /* nb_kernel213nf */
154 { "RF Coul + LJ [W4-W4] NF", 181 }, /* nb_kernel214nf */
155 { "RF Coul + Bham NF", 54 }, /* nb_kernel220nf */
156 { "RF Coul + Bham [W3] NF", 91 }, /* nb_kernel221nf */
157 { "RF Coul + Bham [W3-W3] NF", 197 }, /* nb_kernel222nf */
158 { "RF Coul + Bham [W4] NF", 104 }, /* nb_kernel223nf */
159 { "RF Coul + Bham [W4-W4] NF", 210 }, /* nb_kernel224nf */
160 { "RF Coul + VdW(T) NF", 39 }, /* nb_kernel230nf */
161 { "RF Coul + VdW(T) [W3] NF", 76 }, /* nb_kernel231nf */
162 { "RF Coul + VdW(T) [W3-W3] NF", 182 }, /* nb_kernel232nf */
163 { "RF Coul + VdW(T) [W4] NF", 89 }, /* nb_kernel233nf */
164 { "RF Coul + VdW(T) [W4-W4] NF", 195 }, /* nb_kernel234nf */
165 { "Coul(T) NF", 26 }, /* nb_kernel300nf */
166 { "Coul(T) [W3] NF", 77 }, /* nb_kernel301nf */
167 { "Coul(T) [W3-W3] NF", 225 }, /* nb_kernel302nf */
168 { "Coul(T) [W4] NF", 77 }, /* nb_kernel303nf */
169 { "Coul(T) [W4-W4] NF", 225 }, /* nb_kernel304nf */
170 { "Coul(T) + LJ NF", 34 }, /* nb_kernel310nf */
171 { "Coul(T) + LJ [W3] NF", 85 }, /* nb_kernel311nf */
172 { "Coul(T) + LJ [W3-W3] NF", 233 }, /* nb_kernel312nf */
173 { "Coul(T) + LJ [W4] NF", 96 }, /* nb_kernel313nf */
174 { "Coul(T) + LJ [W4-W4] NF", 244 }, /* nb_kernel314nf */
175 { "Coul(T) + Bham NF", 61 }, /* nb_kernel320nf */
176 { "Coul(T) + Bham [W3] NF", 112 }, /* nb_kernel321nf */
177 { "Coul(T) + Bham [W3-W3] NF", 260 }, /* nb_kernel322nf */
178 { "Coul(T) + Bham [W4] NF", 125 }, /* nb_kernel323nf */
179 { "Coul(T) + Bham [W4-W4] NF", 273 }, /* nb_kernel324nf */
180 { "Coul(T) + VdW(T) NF", 42 }, /* nb_kernel330nf */
181 { "Coul(T) + VdW(T) [W3] NF", 93 }, /* nb_kernel331nf */
182 { "Coul(T) + VdW(T) [W3-W3] NF", 241 }, /* nb_kernel332nf */
183 { "Coul(T) + VdW(T) [W4] NF", 110 }, /* nb_kernel333nf */
184 { "Coul(T) + VdW(T) [W4-W4] NF", 258 }, /* nb_kernel334nf */
185 { "Generalized Born Coulomb NF", 29 }, /* nb_kernel400nf */
186 { "GB Coulomb + LJ NF", 37 }, /* nb_kernel410nf */
187 { "GB Coulomb + VdW(T) NF", 49 }, /* nb_kernel430nf */
188 { "Free energy innerloop", 150 }, /* free energy, estimate */
189 { "All-vs-All, Coul + LJ", 38 },
190 { "All-vs-All, GB + LJ", 61 },
191 { "Outer nonbonded loop", 10 },
192 { "1,4 nonbonded interactions", 90 },
193 { "Born radii (Still)", 47 },
194 { "Born radii (HCT/OBC)", 183 },
195 { "Born force chain rule", 15 },
196 { "All-vs-All Still radii", 47 },
197 { "All-vs-All HCT/OBC radii", 183 },
198 { "All-vs-All Born chain rule", 15 },
199 { "Calc Weights", 36 },
200 { "Spread Q", 6 },
201 { "Spread Q Bspline", 2 },
202 { "Gather F", 23 },
203 { "Gather F Bspline", 12 },
204 { "3D-FFT", 8 },
205 { "Convolution", 4 },
206 { "Solve PME", 64 },
207 { "NS-Pairs", 21 },
208 { "Reset In Box", 3 },
209 { "Shift-X", 6 },
210 { "CG-CoM", 3 },
211 { "Sum Forces", 1 },
212 { "Bonds", 59 },
213 { "G96Bonds", 44 },
214 { "FENE Bonds", 58 },
215 { "Tab. Bonds", 62 },
216 { "Angles", 168 },
217 { "G96Angles", 150 },
218 { "Quartic Angles", 160 },
219 { "Tab. Angles", 169 },
220 { "Propers", 229 },
221 { "Impropers", 208 },
222 { "RB-Dihedrals", 247 },
223 { "Four. Dihedrals", 247 },
224 { "Tab. Dihedrals", 227 },
225 { "Dist. Restr.", 200 },
226 { "Orient. Restr.", 200 },
227 { "Dihedral Restr.", 200 },
228 { "Pos. Restr.", 50 },
229 { "Angle Restr.", 191 },
230 { "Angle Restr. Z", 164 },
231 { "Morse Potent.", 58 },
232 { "Cubic Bonds", 54 },
233 { "Walls", 31 },
234 { "Water Pol.", 62 },
235 { "Thole Pol.", 296 },
236 { "Virial", 18 },
237 { "Update", 31 },
238 { "Ext.ens. Update", 54 },
239 { "Stop-CM", 10 },
240 { "P-Coupling", 6 },
241 { "Calc-Ekin", 27 },
242 { "Lincs", 60 },
243 { "Lincs-Mat", 4 },
244 { "Shake", 30 },
245 { "Constraint-V", 8 },
246 { "Shake-Init", 10 },
247 { "Constraint-Vir", 24 },
248 { "Settle", 323 },
249 { "Virtual Site 2", 23 },
250 { "Virtual Site 3", 37 },
251 { "Virtual Site 3fd", 95 },
252 { "Virtual Site 3fad", 176 },
253 { "Virtual Site 3out", 87 },
254 { "Virtual Site 4fd", 110 },
255 { "Virtual Site 4fdn", 254 },
256 { "Virtual Site N", 15 },
257 { "Mixed Generalized Born stuff", 10 }
261 void init_nrnb(t_nrnb *nrnb)
263 int i;
265 for(i=0; (i<eNRNB); i++)
266 nrnb->n[i]=0.0;
269 void cp_nrnb(t_nrnb *dest, t_nrnb *src)
271 int i;
273 for(i=0; (i<eNRNB); i++)
274 dest->n[i]=src->n[i];
277 void add_nrnb(t_nrnb *dest, t_nrnb *s1, t_nrnb *s2)
279 int i;
281 for(i=0; (i<eNRNB); i++)
282 dest->n[i]=s1->n[i]+s2->n[i];
285 void print_nrnb(FILE *out, t_nrnb *nrnb)
287 int i;
289 for(i=0; (i<eNRNB); i++)
290 if (nrnb->n[i] > 0)
291 fprintf(out," %-26s %10.0f.\n",nbdata[i].name,nrnb->n[i]);
294 void _inc_nrnb(t_nrnb *nrnb,int enr,int inc,char *file,int line)
296 nrnb->n[enr]+=inc;
297 #ifdef DEBUG_NRNB
298 printf("nrnb %15s(%2d) incremented with %8d from file %s line %d\n",
299 nbdata[enr].name,enr,inc,file,line);
300 #endif
303 void print_flop(FILE *out,t_nrnb *nrnb,double *nbfs,double *mflop)
305 int i;
306 double mni,frac,tfrac,tflop;
307 const char *myline = "-----------------------------------------------------------------------------";
309 *nbfs = 0.0;
310 for(i=0; (i<eNR_NBKERNEL_NR); i++) {
311 if (strstr(nbdata[i].name,"W3-W3") != NULL)
312 *nbfs += 9e-6*nrnb->n[i];
313 else if (strstr(nbdata[i].name,"W3") != NULL)
314 *nbfs += 3e-6*nrnb->n[i];
315 else if (strstr(nbdata[i].name,"W4-W4") != NULL)
316 *nbfs += 10e-6*nrnb->n[i];
317 else if (strstr(nbdata[i].name,"W4") != NULL)
318 *nbfs += 4e-6*nrnb->n[i];
319 else
320 *nbfs += 1e-6*nrnb->n[i];
322 tflop=0;
323 for(i=0; (i<eNRNB); i++)
324 tflop+=1e-6*nrnb->n[i]*nbdata[i].flop;
326 if (tflop == 0) {
327 fprintf(out,"No MEGA Flopsen this time\n");
328 return;
330 if (out) {
331 fprintf(out,"\n\tM E G A - F L O P S A C C O U N T I N G\n\n");
334 if (out) {
335 fprintf(out," RF=Reaction-Field FE=Free Energy SCFE=Soft-Core/Free Energy\n");
336 fprintf(out," T=Tabulated W3=SPC/TIP3p W4=TIP4p (single or pairs)\n");
337 fprintf(out," NF=No Forces\n\n");
339 fprintf(out," %-32s %16s %15s %7s\n",
340 "Computing:","M-Number","M-Flops","% Flops");
341 fprintf(out,"%s\n",myline);
343 *mflop=0.0;
344 tfrac=0.0;
345 for(i=0; (i<eNRNB); i++) {
346 mni = 1e-6*nrnb->n[i];
347 *mflop += mni*nbdata[i].flop;
348 frac = 100.0*mni*nbdata[i].flop/tflop;
349 tfrac += frac;
350 if (out && mni != 0)
351 fprintf(out," %-32s %16.6f %15.3f %6.1f\n",
352 nbdata[i].name,mni,mni*nbdata[i].flop,frac);
354 if (out) {
355 fprintf(out,"%s\n",myline);
356 fprintf(out," %-32s %16s %15.3f %6.1f\n",
357 "Total","",*mflop,tfrac);
358 fprintf(out,"%s\n\n",myline);
362 void print_perf(FILE *out,double nodetime,double realtime,int nprocs,
363 gmx_large_int_t nsteps,real delta_t,
364 double nbfs,double mflop)
366 real runtime;
368 fprintf(out,"\n");
370 if (nodetime == 0.0) {
371 fprintf(out,"nodetime = 0! Infinite Giga flopses!\n");
374 if (nprocs > 1)
376 nodetime = realtime;
377 fprintf(out,"\tParallel run - timing based on wallclock.\n\n");
380 if ((nodetime > 0) && (realtime > 0)) {
381 fprintf(out,"%12s %10s %10s %8s\n","","NODE (s)","Real (s)","(%)");
382 fprintf(out,"%12s %10.3f %10.3f %8.1f\n","Time:",
383 nodetime, realtime, 100.0*nodetime/realtime);
384 if (nodetime > 60) {
385 fprintf(out,"%12s %10s","","");
386 pr_difftime(out,nodetime);
388 if (delta_t > 0) {
389 mflop = mflop/nodetime;
390 runtime = nsteps*delta_t;
391 fprintf(out,"%12s %10s %10s %10s %10s\n",
392 "","(Mnbf/s)",(mflop > 1000) ? "(GFlops)" : "(MFlops)",
393 "(ns/day)","(hour/ns)");
394 fprintf(out,"%12s %10.3f %10.3f %10.3f %10.3f\n","Performance:",
395 nbfs/nodetime,(mflop > 1000) ? (mflop/1000) : mflop,
396 runtime*24*3.6/nodetime,1000*nodetime/(3600*runtime));
397 } else {
398 fprintf(out,"%12s %10s %10s %14s\n",
399 "","(Mnbf/s)",(mflop > 1000) ? "(GFlops)" : "(MFlops)",
400 "(steps/hour)");
401 fprintf(out,"%12s %10.3f %10.3f %14.1f\n","Performance:",
402 nbfs/nodetime,(mflop > 1000) ? (mflop/1000) : mflop,
403 nsteps*3600.0/nodetime);
408 int cost_nrnb(int enr)
410 return nbdata[enr].flop;
413 const char *nrnb_str(int enr)
415 return nbdata[enr].name;
418 static const int force_index[]={
419 eNR_BONDS, eNR_ANGLES, eNR_PROPER, eNR_IMPROPER,
420 eNR_RB, eNR_DISRES, eNR_ORIRES, eNR_POSRES,
421 eNR_NS, eNR_NBKERNEL_OUTER
423 #define NFORCE_INDEX asize(force_index)
425 static const int constr_index[]={
426 eNR_SHAKE, eNR_SHAKE_RIJ, eNR_SETTLE, eNR_UPDATE, eNR_PCOUPL,
427 eNR_CONSTR_VIR,eNR_CONSTR_V
429 #define NCONSTR_INDEX asize(constr_index)
431 static double pr_av(FILE *log,t_commrec *cr,
432 double fav,double ftot[],const char *title)
434 int i,perc;
435 double dperc,unb;
437 unb=0;
438 if (fav > 0) {
439 fav /= cr->nnodes - cr->npmenodes;
440 fprintf(log,"\n %-26s",title);
441 for(i=0; (i<cr->nnodes); i++) {
442 dperc=(100.0*ftot[i])/fav;
443 unb=max(unb,dperc);
444 perc=dperc;
445 fprintf(log,"%3d ",perc);
447 if (unb > 0) {
448 perc=10000.0/unb;
449 fprintf(log,"%6d%%\n\n",perc);
451 else
452 fprintf(log,"\n\n");
454 return unb;
457 void pr_load(FILE *log,t_commrec *cr,t_nrnb nrnb[])
459 int i,j,perc;
460 double dperc,unb,uf,us;
461 double *ftot,fav;
462 double *stot,sav;
463 t_nrnb *av;
465 snew(av,1);
466 snew(ftot,cr->nnodes);
467 snew(stot,cr->nnodes);
468 init_nrnb(av);
469 for(i=0; (i<cr->nnodes); i++) {
470 add_nrnb(av,av,&(nrnb[i]));
471 /* Cost due to forces */
472 for(j=0; (j<eNR_NBKERNEL_NR); j++)
473 ftot[i]+=nrnb[i].n[j]*cost_nrnb(j);
474 for(j=0; (j<NFORCE_INDEX); j++)
475 ftot[i]+=nrnb[i].n[force_index[j]]*cost_nrnb(force_index[j]);
476 /* Due to shake */
477 for(j=0; (j<NCONSTR_INDEX); j++) {
478 stot[i]+=nrnb[i].n[constr_index[j]]*cost_nrnb(constr_index[j]);
481 for(j=0; (j<eNRNB); j++)
482 av->n[j]=av->n[j]/(double)(cr->nnodes - cr->npmenodes);
484 fprintf(log,"\nDetailed load balancing info in percentage of average\n");
486 fprintf(log," Type NODE:");
487 for(i=0; (i<cr->nnodes); i++)
488 fprintf(log,"%3d ",i);
489 fprintf(log,"Scaling\n");
490 fprintf(log,"---------------------------");
491 for(i=0; (i<cr->nnodes); i++)
492 fprintf(log,"----");
493 fprintf(log,"-------\n");
495 for(j=0; (j<eNRNB); j++) {
496 unb=100.0;
497 if (av->n[j] > 0) {
498 fprintf(log," %-26s",nrnb_str(j));
499 for(i=0; (i<cr->nnodes); i++) {
500 dperc=(100.0*nrnb[i].n[j])/av->n[j];
501 unb=max(unb,dperc);
502 perc=dperc;
503 fprintf(log,"%3d ",perc);
505 if (unb > 0) {
506 perc=10000.0/unb;
507 fprintf(log,"%6d%%\n",perc);
509 else
510 fprintf(log,"\n");
513 fav=sav=0;
514 for(i=0; (i<cr->nnodes); i++) {
515 fav+=ftot[i];
516 sav+=stot[i];
518 uf=pr_av(log,cr,fav,ftot,"Total Force");
519 us=pr_av(log,cr,sav,stot,"Total Constr.");
521 unb=(uf*fav+us*sav)/(fav+sav);
522 if (unb > 0) {
523 unb=10000.0/unb;
524 fprintf(log,"\nTotal Scaling: %.0f%% of max performance\n\n",unb);