1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2008, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
42 #include "gmx_wallcycle.h"
43 #include "gmx_cyclecounter.h"
45 #include "gmx_fatal.h"
62 typedef struct gmx_wallcycle
65 /* variables for testing/debugging */
70 gmx_cycles_t cycle_prev
;
71 gmx_large_int_t reset_counters
;
73 MPI_Comm mpi_comm_mygroup
;
77 /* Each name should not exceed 19 characters */
78 static const char *wcn
[ewcNR
] =
79 { "Run", "Step", "PP during PME", "Domain decomp.", "DD comm. load", "DD comm. bounds", "Vsite constr.", "Send X to PME", "Comm. coord.", "Neighbor search", "Born radii", "Force", "Wait + Comm. F", "PME mesh", "PME redist. X/F", "PME spread/gather", "PME 3D-FFT", "PME solve", "Wait + Comm. X/F", "Wait + Recv. PME F", "Vsite spread", "Write traj.", "Update", "Constraints", "Comm. energies", "Enforced rotation", "Test" };
81 gmx_bool
wallcycle_have_counter(void)
83 return gmx_cycles_have_counter();
86 gmx_wallcycle_t
wallcycle_init(FILE *fplog
,int resetstep
,t_commrec
*cr
)
91 if (!wallcycle_have_counter())
98 wc
->wc_barrier
= FALSE
;
102 wc
->reset_counters
= resetstep
;
105 if (PAR(cr
) && getenv("GMX_CYCLE_BARRIER") != NULL
)
109 fprintf(fplog
,"\nWill call MPI_Barrier before each cycle start/stop call\n\n");
111 wc
->wc_barrier
= TRUE
;
112 wc
->mpi_comm_mygroup
= cr
->mpi_comm_mygroup
;
117 if (getenv("GMX_CYCLE_ALL") != NULL
)
119 /*#ifndef GMX_THREADS*/
122 fprintf(fplog
,"\nWill time all the code during the run\n\n");
124 snew(wc
->wcc_all
,ewcNR
*ewcNR
);
126 gmx_fatal(FARGS
, "GMX_CYCLE_ALL is incompatible with threaded code");
133 void wallcycle_destroy(gmx_wallcycle_t wc
)
144 if (wc
->wcc_all
!= NULL
)
151 static void wallcycle_all_start(gmx_wallcycle_t wc
,int ewc
,gmx_cycles_t cycle
)
154 wc
->cycle_prev
= cycle
;
157 static void wallcycle_all_stop(gmx_wallcycle_t wc
,int ewc
,gmx_cycles_t cycle
)
159 wc
->wcc_all
[wc
->ewc_prev
*ewcNR
+ewc
].n
+= 1;
160 wc
->wcc_all
[wc
->ewc_prev
*ewcNR
+ewc
].c
+= cycle
- wc
->cycle_prev
;
163 void wallcycle_start(gmx_wallcycle_t wc
, int ewc
)
175 MPI_Barrier(wc
->mpi_comm_mygroup
);
179 cycle
= gmx_cycles_read();
180 wc
->wcc
[ewc
].start
= cycle
;
181 if (wc
->wcc_all
!= NULL
)
186 wallcycle_all_start(wc
,ewc
,cycle
);
188 else if (wc
->wc_depth
== 3)
190 wallcycle_all_stop(wc
,ewc
,cycle
);
195 double wallcycle_stop(gmx_wallcycle_t wc
, int ewc
)
197 gmx_cycles_t cycle
,last
;
207 MPI_Barrier(wc
->mpi_comm_mygroup
);
211 cycle
= gmx_cycles_read();
212 last
= cycle
- wc
->wcc
[ewc
].start
;
213 wc
->wcc
[ewc
].c
+= last
;
220 wallcycle_all_stop(wc
,ewc
,cycle
);
222 else if (wc
->wc_depth
== 2)
224 wallcycle_all_start(wc
,ewc
,cycle
);
231 void wallcycle_reset_all(gmx_wallcycle_t wc
)
240 for(i
=0; i
<ewcNR
; i
++)
244 wc
->wcc
[i
].start
= 0;
249 void wallcycle_sum(t_commrec
*cr
, gmx_wallcycle_t wc
,double cycles
[])
252 double cycles_n
[ewcNR
],buf
[ewcNR
],*cyc_all
,*buf_all
;
262 if (wcc
[ewcDDCOMMLOAD
].n
> 0)
264 wcc
[ewcDOMDEC
].c
-= wcc
[ewcDDCOMMLOAD
].c
;
266 if (wcc
[ewcDDCOMMBOUND
].n
> 0)
268 wcc
[ewcDOMDEC
].c
-= wcc
[ewcDDCOMMBOUND
].c
;
270 if (cr
->npmenodes
== 0)
272 /* All nodes do PME (or no PME at all) */
273 if (wcc
[ewcPMEMESH
].n
> 0)
275 wcc
[ewcFORCE
].c
-= wcc
[ewcPMEMESH
].c
;
280 /* The are PME-only nodes */
281 if (wcc
[ewcPMEMESH
].n
> 0)
283 /* This must be a PME only node, calculate the Wait + Comm. time */
284 wcc
[ewcPMEWAITCOMM
].c
= wcc
[ewcRUN
].c
- wcc
[ewcPMEMESH
].c
;
288 /* Store the cycles in a double buffer for summing */
289 for(i
=0; i
<ewcNR
; i
++)
291 cycles_n
[i
] = (double)wcc
[i
].n
;
292 cycles
[i
] = (double)wcc
[i
].c
;
298 MPI_Allreduce(cycles_n
,buf
,ewcNR
,MPI_DOUBLE
,MPI_MAX
,
300 for(i
=0; i
<ewcNR
; i
++)
302 wcc
[i
].n
= (int)(buf
[i
] + 0.5);
304 MPI_Allreduce(cycles
,buf
,ewcNR
,MPI_DOUBLE
,MPI_SUM
,
306 for(i
=0; i
<ewcNR
; i
++)
311 if (wc
->wcc_all
!= NULL
)
313 snew(cyc_all
,ewcNR
*ewcNR
);
314 snew(buf_all
,ewcNR
*ewcNR
);
315 for(i
=0; i
<ewcNR
*ewcNR
; i
++)
317 cyc_all
[i
] = wc
->wcc_all
[i
].c
;
319 MPI_Allreduce(cyc_all
,buf_all
,ewcNR
*ewcNR
,MPI_DOUBLE
,MPI_SUM
,
321 for(i
=0; i
<ewcNR
*ewcNR
; i
++)
323 wc
->wcc_all
[i
].c
= buf_all
[i
];
332 static void print_cycles(FILE *fplog
, double c2t
, const char *name
, int nnodes
,
333 int n
, double c
, double tot
)
341 sprintf(num
,"%10d",n
);
347 fprintf(fplog
," %-19s %4d %10s %12.3f %10.1f %5.1f\n",
348 name
,nnodes
,num
,c
*1e-9,c
*c2t
,100*c
/tot
);
352 static gmx_bool
subdivision(int ewc
)
354 return (ewc
>= ewcPME_REDISTXF
&& ewc
<= ewcPME_SOLVE
);
357 void wallcycle_print(FILE *fplog
, int nnodes
, int npme
, double realtime
,
358 gmx_wallcycle_t wc
, double cycles
[])
363 const char *myline
= "-----------------------------------------------------------------------";
379 tot
= cycles
[ewcRUN
];
380 /* Conversion factor from cycles to seconds */
383 c2t
= nnodes
*realtime
/tot
;
390 fprintf(fplog
,"\n R E A L C Y C L E A N D T I M E A C C O U N T I N G\n\n");
392 fprintf(fplog
," Computing: Nodes Number G-Cycles Seconds %c\n",'%');
393 fprintf(fplog
,"%s\n",myline
);
395 for(i
=ewcPPDURINGPME
+1; i
<ewcNR
; i
++)
399 print_cycles(fplog
,c2t
,wcn
[i
],
400 (i
==ewcPMEMESH
|| i
==ewcPMEWAITCOMM
) ? npme
: npp
,
401 wc
->wcc
[i
].n
,cycles
[i
],tot
);
405 if (wc
->wcc_all
!= NULL
)
407 for(i
=0; i
<ewcNR
; i
++)
409 for(j
=0; j
<ewcNR
; j
++)
411 sprintf(buf
,"%-9s",wcn
[i
]);
413 sprintf(buf
+10,"%-9s",wcn
[j
]);
415 print_cycles(fplog
,c2t
,buf
,
416 (i
==ewcPMEMESH
|| i
==ewcPMEWAITCOMM
) ? npme
: npp
,
417 wc
->wcc_all
[i
*ewcNR
+j
].n
,
418 wc
->wcc_all
[i
*ewcNR
+j
].c
,
423 print_cycles(fplog
,c2t
,"Rest",npp
,0,tot
-sum
,tot
);
424 fprintf(fplog
,"%s\n",myline
);
425 print_cycles(fplog
,c2t
,"Total",nnodes
,0,tot
,tot
);
426 fprintf(fplog
,"%s\n",myline
);
428 if (wc
->wcc
[ewcPMEMESH
].n
> 0)
430 fprintf(fplog
,"%s\n",myline
);
431 for(i
=ewcPPDURINGPME
+1; i
<ewcNR
; i
++)
435 print_cycles(fplog
,c2t
,wcn
[i
],
436 (i
>=ewcPMEMESH
|| i
<=ewcPME_SOLVE
) ? npme
: npp
,
437 wc
->wcc
[i
].n
,cycles
[i
],tot
);
440 fprintf(fplog
,"%s\n",myline
);
443 if (cycles
[ewcMoveE
] > tot
*0.05)
446 "NOTE: %d %% of the run time was spent communicating energies,\n"
447 " you might want to use the -gcom option of mdrun\n",
448 (int)(100*cycles
[ewcMoveE
]/tot
+0.5));
451 fprintf(fplog
,"\n%s\n",buf
);
453 /* Only the sim master calls this function, so always print to stderr */
454 fprintf(stderr
,"\n%s\n",buf
);
458 extern gmx_large_int_t
wcycle_get_reset_counters(gmx_wallcycle_t wc
)
465 return wc
->reset_counters
;
468 extern void wcycle_set_reset_counters(gmx_wallcycle_t wc
, gmx_large_int_t reset_counters
)
473 wc
->reset_counters
= reset_counters
;