Merge branch 'master' of git://git.gromacs.org/gromacs
[gromacs/adressmacs.git] / src / mdlib / gmx_wallcycle.c
blob47fbcdc4802993e1a1c880f016622e62c0c0aac5
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2008, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
32 * And Hey:
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
37 #ifdef HAVE_CONFIG_H
38 #include <config.h>
39 #endif
41 #include <string.h>
42 #include "gmx_wallcycle.h"
43 #include "gmx_cyclecounter.h"
44 #include "smalloc.h"
45 #include "gmx_fatal.h"
47 #ifdef GMX_LIB_MPI
48 #include <mpi.h>
49 #endif
50 #ifdef GMX_THREADS
51 #include "tmpi.h"
52 #endif
54 typedef struct
56 int n;
57 gmx_cycles_t c;
58 gmx_cycles_t start;
59 gmx_cycles_t last;
60 } wallcc_t;
62 typedef struct gmx_wallcycle
64 wallcc_t *wcc;
65 /* variables for testing/debugging */
66 gmx_bool wc_barrier;
67 wallcc_t *wcc_all;
68 int wc_depth;
69 int ewc_prev;
70 gmx_cycles_t cycle_prev;
71 gmx_large_int_t reset_counters;
72 #ifdef GMX_MPI
73 MPI_Comm mpi_comm_mygroup;
74 #endif
75 } gmx_wallcycle_t_t;
77 /* Each name should not exceed 19 characters */
78 static const char *wcn[ewcNR] =
79 { "Run", "Step", "PP during PME", "Domain decomp.", "DD comm. load", "DD comm. bounds", "Vsite constr.", "Send X to PME", "Comm. coord.", "Neighbor search", "Born radii", "Force", "Wait + Comm. F", "PME mesh", "PME redist. X/F", "PME spread/gather", "PME 3D-FFT", "PME solve", "Wait + Comm. X/F", "Wait + Recv. PME F", "Vsite spread", "Write traj.", "Update", "Constraints", "Comm. energies", "Enforced rotation", "Test" };
81 gmx_bool wallcycle_have_counter(void)
83 return gmx_cycles_have_counter();
86 gmx_wallcycle_t wallcycle_init(FILE *fplog,int resetstep,t_commrec *cr)
88 gmx_wallcycle_t wc;
91 if (!wallcycle_have_counter())
93 return NULL;
96 snew(wc,1);
98 wc->wc_barrier = FALSE;
99 wc->wcc_all = NULL;
100 wc->wc_depth = 0;
101 wc->ewc_prev = -1;
102 wc->reset_counters = resetstep;
104 #ifdef GMX_MPI
105 if (PAR(cr) && getenv("GMX_CYCLE_BARRIER") != NULL)
107 if (fplog)
109 fprintf(fplog,"\nWill call MPI_Barrier before each cycle start/stop call\n\n");
111 wc->wc_barrier = TRUE;
112 wc->mpi_comm_mygroup = cr->mpi_comm_mygroup;
114 #endif
116 snew(wc->wcc,ewcNR);
117 if (getenv("GMX_CYCLE_ALL") != NULL)
119 /*#ifndef GMX_THREADS*/
120 if (fplog)
122 fprintf(fplog,"\nWill time all the code during the run\n\n");
124 snew(wc->wcc_all,ewcNR*ewcNR);
125 /*#else*/
126 gmx_fatal(FARGS, "GMX_CYCLE_ALL is incompatible with threaded code");
127 /*#endif*/
130 return wc;
133 void wallcycle_destroy(gmx_wallcycle_t wc)
135 if (wc == NULL)
137 return;
140 if (wc->wcc != NULL)
142 sfree(wc->wcc);
144 if (wc->wcc_all != NULL)
146 sfree(wc->wcc_all);
148 sfree(wc);
151 static void wallcycle_all_start(gmx_wallcycle_t wc,int ewc,gmx_cycles_t cycle)
153 wc->ewc_prev = ewc;
154 wc->cycle_prev = cycle;
157 static void wallcycle_all_stop(gmx_wallcycle_t wc,int ewc,gmx_cycles_t cycle)
159 wc->wcc_all[wc->ewc_prev*ewcNR+ewc].n += 1;
160 wc->wcc_all[wc->ewc_prev*ewcNR+ewc].c += cycle - wc->cycle_prev;
163 void wallcycle_start(gmx_wallcycle_t wc, int ewc)
165 gmx_cycles_t cycle;
167 if (wc == NULL)
169 return;
172 #ifdef GMX_MPI
173 if (wc->wc_barrier)
175 MPI_Barrier(wc->mpi_comm_mygroup);
177 #endif
179 cycle = gmx_cycles_read();
180 wc->wcc[ewc].start = cycle;
181 if (wc->wcc_all != NULL)
183 wc->wc_depth++;
184 if (ewc == ewcRUN)
186 wallcycle_all_start(wc,ewc,cycle);
188 else if (wc->wc_depth == 3)
190 wallcycle_all_stop(wc,ewc,cycle);
195 double wallcycle_stop(gmx_wallcycle_t wc, int ewc)
197 gmx_cycles_t cycle,last;
199 if (wc == NULL)
201 return 0;
204 #ifdef GMX_MPI
205 if (wc->wc_barrier)
207 MPI_Barrier(wc->mpi_comm_mygroup);
209 #endif
211 cycle = gmx_cycles_read();
212 last = cycle - wc->wcc[ewc].start;
213 wc->wcc[ewc].c += last;
214 wc->wcc[ewc].n++;
215 if (wc->wcc_all)
217 wc->wc_depth--;
218 if (ewc == ewcRUN)
220 wallcycle_all_stop(wc,ewc,cycle);
222 else if (wc->wc_depth == 2)
224 wallcycle_all_start(wc,ewc,cycle);
228 return last;
231 void wallcycle_reset_all(gmx_wallcycle_t wc)
233 int i;
235 if (wc == NULL)
237 return;
240 for(i=0; i<ewcNR; i++)
242 wc->wcc[i].n = 0;
243 wc->wcc[i].c = 0;
244 wc->wcc[i].start = 0;
245 wc->wcc[i].last = 0;
249 void wallcycle_sum(t_commrec *cr, gmx_wallcycle_t wc,double cycles[])
251 wallcc_t *wcc;
252 double cycles_n[ewcNR],buf[ewcNR],*cyc_all,*buf_all;
253 int i;
255 if (wc == NULL)
257 return;
260 wcc = wc->wcc;
262 if (wcc[ewcDDCOMMLOAD].n > 0)
264 wcc[ewcDOMDEC].c -= wcc[ewcDDCOMMLOAD].c;
266 if (wcc[ewcDDCOMMBOUND].n > 0)
268 wcc[ewcDOMDEC].c -= wcc[ewcDDCOMMBOUND].c;
270 if (cr->npmenodes == 0)
272 /* All nodes do PME (or no PME at all) */
273 if (wcc[ewcPMEMESH].n > 0)
275 wcc[ewcFORCE].c -= wcc[ewcPMEMESH].c;
278 else
280 /* The are PME-only nodes */
281 if (wcc[ewcPMEMESH].n > 0)
283 /* This must be a PME only node, calculate the Wait + Comm. time */
284 wcc[ewcPMEWAITCOMM].c = wcc[ewcRUN].c - wcc[ewcPMEMESH].c;
288 /* Store the cycles in a double buffer for summing */
289 for(i=0; i<ewcNR; i++)
291 cycles_n[i] = (double)wcc[i].n;
292 cycles[i] = (double)wcc[i].c;
295 #ifdef GMX_MPI
296 if (cr->nnodes > 1)
298 MPI_Allreduce(cycles_n,buf,ewcNR,MPI_DOUBLE,MPI_MAX,
299 cr->mpi_comm_mysim);
300 for(i=0; i<ewcNR; i++)
302 wcc[i].n = (int)(buf[i] + 0.5);
304 MPI_Allreduce(cycles,buf,ewcNR,MPI_DOUBLE,MPI_SUM,
305 cr->mpi_comm_mysim);
306 for(i=0; i<ewcNR; i++)
308 cycles[i] = buf[i];
311 if (wc->wcc_all != NULL)
313 snew(cyc_all,ewcNR*ewcNR);
314 snew(buf_all,ewcNR*ewcNR);
315 for(i=0; i<ewcNR*ewcNR; i++)
317 cyc_all[i] = wc->wcc_all[i].c;
319 MPI_Allreduce(cyc_all,buf_all,ewcNR*ewcNR,MPI_DOUBLE,MPI_SUM,
320 cr->mpi_comm_mysim);
321 for(i=0; i<ewcNR*ewcNR; i++)
323 wc->wcc_all[i].c = buf_all[i];
325 sfree(buf_all);
326 sfree(cyc_all);
329 #endif
332 static void print_cycles(FILE *fplog, double c2t, const char *name, int nnodes,
333 int n, double c, double tot)
335 char num[11];
337 if (c > 0)
339 if (n > 0)
341 sprintf(num,"%10d",n);
343 else
345 sprintf(num," ");
347 fprintf(fplog," %-19s %4d %10s %12.3f %10.1f %5.1f\n",
348 name,nnodes,num,c*1e-9,c*c2t,100*c/tot);
352 static gmx_bool subdivision(int ewc)
354 return (ewc >= ewcPME_REDISTXF && ewc <= ewcPME_SOLVE);
357 void wallcycle_print(FILE *fplog, int nnodes, int npme, double realtime,
358 gmx_wallcycle_t wc, double cycles[])
360 double c2t,tot,sum;
361 int i,j,npp;
362 char buf[STRLEN];
363 const char *myline = "-----------------------------------------------------------------------";
365 if (wc == NULL)
367 return;
370 if (npme > 0)
372 npp = nnodes - npme;
374 else
376 npp = nnodes;
377 npme = nnodes;
379 tot = cycles[ewcRUN];
380 /* Conversion factor from cycles to seconds */
381 if (tot > 0)
383 c2t = nnodes*realtime/tot;
385 else
387 c2t = 0;
390 fprintf(fplog,"\n R E A L C Y C L E A N D T I M E A C C O U N T I N G\n\n");
392 fprintf(fplog," Computing: Nodes Number G-Cycles Seconds %c\n",'%');
393 fprintf(fplog,"%s\n",myline);
394 sum = 0;
395 for(i=ewcPPDURINGPME+1; i<ewcNR; i++)
397 if (!subdivision(i))
399 print_cycles(fplog,c2t,wcn[i],
400 (i==ewcPMEMESH || i==ewcPMEWAITCOMM) ? npme : npp,
401 wc->wcc[i].n,cycles[i],tot);
402 sum += cycles[i];
405 if (wc->wcc_all != NULL)
407 for(i=0; i<ewcNR; i++)
409 for(j=0; j<ewcNR; j++)
411 sprintf(buf,"%-9s",wcn[i]);
412 buf[9] = ' ';
413 sprintf(buf+10,"%-9s",wcn[j]);
414 buf[19] = '\0';
415 print_cycles(fplog,c2t,buf,
416 (i==ewcPMEMESH || i==ewcPMEWAITCOMM) ? npme : npp,
417 wc->wcc_all[i*ewcNR+j].n,
418 wc->wcc_all[i*ewcNR+j].c,
419 tot);
423 print_cycles(fplog,c2t,"Rest",npp,0,tot-sum,tot);
424 fprintf(fplog,"%s\n",myline);
425 print_cycles(fplog,c2t,"Total",nnodes,0,tot,tot);
426 fprintf(fplog,"%s\n",myline);
428 if (wc->wcc[ewcPMEMESH].n > 0)
430 fprintf(fplog,"%s\n",myline);
431 for(i=ewcPPDURINGPME+1; i<ewcNR; i++)
433 if (subdivision(i))
435 print_cycles(fplog,c2t,wcn[i],
436 (i>=ewcPMEMESH || i<=ewcPME_SOLVE) ? npme : npp,
437 wc->wcc[i].n,cycles[i],tot);
440 fprintf(fplog,"%s\n",myline);
443 if (cycles[ewcMoveE] > tot*0.05)
445 sprintf(buf,
446 "NOTE: %d %% of the run time was spent communicating energies,\n"
447 " you might want to use the -gcom option of mdrun\n",
448 (int)(100*cycles[ewcMoveE]/tot+0.5));
449 if (fplog)
451 fprintf(fplog,"\n%s\n",buf);
453 /* Only the sim master calls this function, so always print to stderr */
454 fprintf(stderr,"\n%s\n",buf);
458 extern gmx_large_int_t wcycle_get_reset_counters(gmx_wallcycle_t wc)
460 if (wc == NULL)
462 return -1;
465 return wc->reset_counters;
468 extern void wcycle_set_reset_counters(gmx_wallcycle_t wc, gmx_large_int_t reset_counters)
470 if (wc == NULL)
471 return;
473 wc->reset_counters = reset_counters;