Enforced rotation: fixed torque calculation for FLEX potential when using mass-weighting
[gromacs/adressmacs.git] / src / tools / gmx_tune_pme.c
blobed6e9bb407b59468b18ada313746108f29b50ab7
1 /*
2 *
3 * This source code is part of
5 * G R O M A C S
6 *
7 * GROningen MAchine for Chemical Simulations
8 *
9 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
10 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
11 * Copyright (c) 2001-2008, The GROMACS development team,
12 * check out http://www.gromacs.org for more information.
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
19 * If you want to redistribute modifications, please consider that
20 * scientific software is very special. Version control is crucial -
21 * bugs must be traceable. We will be happy to consider code for
22 * inclusion in the official distribution, but derived work must not
23 * be called official GROMACS. Details are found in the README & COPYING
24 * files - if they are missing, get the official version at www.gromacs.org.
26 * To help us fund GROMACS development, we humbly ask that you cite
27 * the papers on the package - you can find them in the top README file.
29 * For more info, check our website at http://www.gromacs.org
31 * And Hey:
32 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
34 #ifdef HAVE_CONFIG_H
35 #include <config.h>
36 #endif
39 #include <time.h>
40 #ifdef HAVE_SYS_TIME_H
41 #include <sys/time.h>
42 #endif
46 #include "statutil.h"
47 #include "typedefs.h"
48 #include "smalloc.h"
49 #include "vec.h"
50 #include "copyrite.h"
51 #include "statutil.h"
52 #include "tpxio.h"
53 #include "string2.h"
54 #include "readinp.h"
55 #include "calcgrid.h"
56 #include "checkpoint.h"
57 #include "gmx_ana.h"
58 #include "names.h"
62 enum {
63 ddnoSEL, ddnoINTERLEAVE, ddnoPP_PME, ddnoCARTESIAN, ddnoNR
66 /* Enum for situations that can occur during log file parsing, the
67 * corresponding string entries can be found in do_the_tests() in
68 * const char* ParseLog[] */
69 enum {
70 eParselogOK,
71 eParselogNotFound,
72 eParselogNoPerfData,
73 eParselogTerm,
74 eParselogResetProblem,
75 eParselogNoDDGrid,
76 eParselogTPXVersion,
77 eParselogNotParallel,
78 eParselogFatal,
79 eParselogNr
83 typedef struct
85 int nPMEnodes; /* number of PME only nodes used in this test */
86 int nx, ny, nz; /* DD grid */
87 int guessPME; /* if nPMEnodes == -1, this is the guessed number of PME nodes */
88 double *Gcycles; /* This can contain more than one value if doing multiple tests */
89 double Gcycles_Av;
90 float *ns_per_day;
91 float ns_per_day_Av;
92 float *PME_f_load; /* PME mesh/force load average*/
93 float PME_f_load_Av; /* Average average ;) ... */
94 char *mdrun_cmd_line; /* Mdrun command line used for this test */
95 } t_perf;
98 typedef struct
100 int nr_inputfiles; /* The number of tpr and mdp input files */
101 gmx_large_int_t orig_sim_steps; /* Number of steps to be done in the real simulation */
102 real *rcoulomb; /* The coulomb radii [0...nr_inputfiles] */
103 real *rvdw; /* The vdW radii */
104 real *rlist; /* Neighbourlist cutoff radius */
105 real *rlistlong;
106 int *nkx, *nky, *nkz;
107 real *fsx, *fsy, *fsz; /* Fourierspacing in x,y,z dimension */
108 } t_inputinfo;
111 static void sep_line(FILE *fp)
113 fprintf(fp, "\n------------------------------------------------------------\n");
117 /* Wrapper for system calls */
118 static int gmx_system_call(char *command)
120 #ifdef GMX_NO_SYSTEM
121 gmx_fatal(FARGS,"No calls to system(3) supported on this platform. Attempted to call:\n'%s'\n",command);
122 #else
123 return ( system(command) );
124 #endif
128 /* Check if string starts with substring */
129 static gmx_bool str_starts(const char *string, const char *substring)
131 return ( strncmp(string, substring, strlen(substring)) == 0);
135 static void cleandata(t_perf *perfdata, int test_nr)
137 perfdata->Gcycles[test_nr] = 0.0;
138 perfdata->ns_per_day[test_nr] = 0.0;
139 perfdata->PME_f_load[test_nr] = 0.0;
141 return;
145 static gmx_bool is_equal(real a, real b)
147 real diff, eps=1.0e-7;
150 diff = a - b;
152 if (diff < 0.0) diff = -diff;
154 if (diff < eps)
155 return TRUE;
156 else
157 return FALSE;
161 static void finalize(const char *fn_out)
163 char buf[STRLEN];
164 FILE *fp;
167 fp = fopen(fn_out,"r");
168 fprintf(stdout,"\n\n");
170 while( fgets(buf,STRLEN-1,fp) != NULL )
172 fprintf(stdout,"%s",buf);
174 fclose(fp);
175 fprintf(stdout,"\n\n");
176 thanx(stderr);
180 enum {eFoundNothing, eFoundDDStr, eFoundAccountingStr, eFoundCycleStr};
182 static int parse_logfile(const char *logfile, const char *errfile,
183 t_perf *perfdata, int test_nr, int presteps, gmx_large_int_t cpt_steps,
184 int nnodes)
186 FILE *fp;
187 char line[STRLEN], dumstring[STRLEN], dumstring2[STRLEN];
188 const char matchstrdd[]="Domain decomposition grid";
189 const char matchstrcr[]="resetting all time and cycle counters";
190 const char matchstrbal[]="Average PME mesh/force load:";
191 const char matchstring[]="R E A L C Y C L E A N D T I M E A C C O U N T I N G";
192 const char errSIG[]="signal, stopping at the next";
193 int iFound;
194 int procs;
195 float dum1,dum2,dum3;
196 int npme;
197 gmx_large_int_t resetsteps=-1;
198 gmx_bool bFoundResetStr = FALSE;
199 gmx_bool bResetChecked = FALSE;
202 if (!gmx_fexist(logfile))
204 fprintf(stderr, "WARNING: Could not find logfile %s.\n", logfile);
205 cleandata(perfdata, test_nr);
206 return eParselogNotFound;
209 fp = fopen(logfile, "r");
210 perfdata->PME_f_load[test_nr] = -1.0;
211 perfdata->guessPME = -1;
213 iFound = eFoundNothing;
214 if (1 == nnodes)
215 iFound = eFoundDDStr; /* Skip some case statements */
217 while (fgets(line, STRLEN, fp) != NULL)
219 /* Remove leading spaces */
220 ltrim(line);
222 /* Check for TERM and INT signals from user: */
223 if ( strstr(line, errSIG) != NULL )
225 fclose(fp);
226 cleandata(perfdata, test_nr);
227 return eParselogTerm;
230 /* Check whether cycle resetting worked */
231 if (presteps > 0 && !bFoundResetStr)
233 if (strstr(line, matchstrcr) != NULL)
235 sprintf(dumstring, "Step %s", gmx_large_int_pfmt);
236 sscanf(line, dumstring, &resetsteps);
237 bFoundResetStr = TRUE;
238 if (resetsteps == presteps+cpt_steps)
240 bResetChecked = TRUE;
242 else
244 sprintf(dumstring , gmx_large_int_pfmt, resetsteps);
245 sprintf(dumstring2, gmx_large_int_pfmt, presteps+cpt_steps);
246 fprintf(stderr, "WARNING: Time step counters were reset at step %s,\n"
247 " though they were supposed to be reset at step %s!\n",
248 dumstring, dumstring2);
253 /* Look for strings that appear in a certain order in the log file: */
254 switch(iFound)
256 case eFoundNothing:
257 /* Look for domain decomp grid and separate PME nodes: */
258 if (str_starts(line, matchstrdd))
260 sscanf(line, "Domain decomposition grid %d x %d x %d, separate PME nodes %d",
261 &(perfdata->nx), &(perfdata->ny), &(perfdata->nz), &npme);
262 if (perfdata->nPMEnodes == -1)
263 perfdata->guessPME = npme;
264 else if (perfdata->nPMEnodes != npme)
265 gmx_fatal(FARGS, "PME nodes from command line and output file are not identical");
266 iFound = eFoundDDStr;
268 /* Catch a few errors that might have occured: */
269 else if (str_starts(line, "There is no domain decomposition for"))
271 return eParselogNoDDGrid;
273 else if (str_starts(line, "reading tpx file"))
275 return eParselogTPXVersion;
277 else if (str_starts(line, "The -dd or -npme option request a parallel simulation"))
279 return eParselogNotParallel;
281 break;
282 case eFoundDDStr:
283 /* Look for PME mesh/force balance (not necessarily present, though) */
284 if (str_starts(line, matchstrbal))
285 sscanf(&line[strlen(matchstrbal)], "%f", &(perfdata->PME_f_load[test_nr]));
286 /* Look for matchstring */
287 if (str_starts(line, matchstring))
288 iFound = eFoundAccountingStr;
289 break;
290 case eFoundAccountingStr:
291 /* Already found matchstring - look for cycle data */
292 if (str_starts(line, "Total "))
294 sscanf(line,"Total %d %lf",&procs,&(perfdata->Gcycles[test_nr]));
295 iFound = eFoundCycleStr;
297 break;
298 case eFoundCycleStr:
299 /* Already found cycle data - look for remaining performance info and return */
300 if (str_starts(line, "Performance:"))
302 sscanf(line,"%s %f %f %f %f", dumstring, &dum1, &dum2, &(perfdata->ns_per_day[test_nr]), &dum3);
303 fclose(fp);
304 if (bResetChecked || presteps == 0)
305 return eParselogOK;
306 else
307 return eParselogResetProblem;
309 break;
311 } /* while */
313 /* Check why there is no performance data in the log file.
314 * Did a fatal errors occur? */
315 if (gmx_fexist(errfile))
317 fp = fopen(errfile, "r");
318 while (fgets(line, STRLEN, fp) != NULL)
320 if ( str_starts(line, "Fatal error:") )
322 if (fgets(line, STRLEN, fp) != NULL)
323 fprintf(stderr, "\nWARNING: A fatal error has occured during this benchmark:\n"
324 "%s\n", line);
325 fclose(fp);
326 cleandata(perfdata, test_nr);
327 return eParselogFatal;
330 fclose(fp);
332 else
334 fprintf(stderr, "WARNING: Could not find stderr file %s.\n", errfile);
337 /* Giving up ... we could not find out why there is no performance data in
338 * the log file. */
339 fprintf(stdout, "No performance data in log file.\n");
340 fclose(fp);
341 cleandata(perfdata, test_nr);
343 return eParselogNoPerfData;
347 static gmx_bool analyze_data(
348 FILE *fp,
349 const char *fn,
350 t_perf **perfdata,
351 int nnodes,
352 int ntprs,
353 int ntests,
354 int nrepeats,
355 t_inputinfo *info,
356 int *index_tpr, /* OUT: Nr of mdp file with best settings */
357 int *npme_optimal) /* OUT: Optimal number of PME nodes */
359 int i,j,k;
360 int line=0, line_win=-1;
361 int k_win=-1, i_win=-1, winPME;
362 double s=0.0; /* standard deviation */
363 t_perf *pd;
364 char strbuf[STRLEN];
365 char str_PME_f_load[13];
366 gmx_bool bCanUseOrigTPR;
369 if (nrepeats > 1)
371 sep_line(fp);
372 fprintf(fp, "Summary of successful runs:\n");
373 fprintf(fp, "Line tpr PME nodes Gcycles Av. Std.dev. ns/day PME/f");
374 if (nnodes > 1)
375 fprintf(fp, " DD grid");
376 fprintf(fp, "\n");
380 for (k=0; k<ntprs; k++)
382 for (i=0; i<ntests; i++)
384 /* Select the right dataset: */
385 pd = &(perfdata[k][i]);
387 pd->Gcycles_Av = 0.0;
388 pd->PME_f_load_Av = 0.0;
389 pd->ns_per_day_Av = 0.0;
391 if (pd->nPMEnodes == -1)
392 sprintf(strbuf, "(%3d)", pd->guessPME);
393 else
394 sprintf(strbuf, " ");
396 /* Get the average run time of a setting */
397 for (j=0; j<nrepeats; j++)
399 pd->Gcycles_Av += pd->Gcycles[j];
400 pd->PME_f_load_Av += pd->PME_f_load[j];
402 pd->Gcycles_Av /= nrepeats;
403 pd->PME_f_load_Av /= nrepeats;
405 for (j=0; j<nrepeats; j++)
407 if (pd->ns_per_day[j] > 0.0)
408 pd->ns_per_day_Av += pd->ns_per_day[j];
409 else
411 /* Somehow the performance number was not aquired for this run,
412 * therefor set the average to some negative value: */
413 pd->ns_per_day_Av = -1.0f*nrepeats;
414 break;
417 pd->ns_per_day_Av /= nrepeats;
419 /* Nicer output: */
420 if (pd->PME_f_load_Av > 0.0)
421 sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load_Av);
422 else
423 sprintf(str_PME_f_load, "%s", " - ");
426 /* We assume we had a successful run if both averages are positive */
427 if (pd->Gcycles_Av > 0.0 && pd->ns_per_day_Av > 0.0)
429 /* Output statistics if repeats were done */
430 if (nrepeats > 1)
432 /* Calculate the standard deviation */
433 s = 0.0;
434 for (j=0; j<nrepeats; j++)
435 s += pow( pd->Gcycles[j] - pd->Gcycles_Av, 2 );
436 s /= (nrepeats - 1);
437 s = sqrt(s);
439 fprintf(fp, "%4d %3d %4d%s %12.3f %12.3f %12.3f %s",
440 line, k, pd->nPMEnodes, strbuf, pd->Gcycles_Av, s,
441 pd->ns_per_day_Av, str_PME_f_load);
442 if (nnodes > 1)
443 fprintf(fp, " %3d %3d %3d", pd->nx, pd->ny, pd->nz);
444 fprintf(fp, "\n");
446 /* Store the index of the best run found so far in 'winner': */
447 if ( (k_win == -1) || (pd->Gcycles_Av < perfdata[k_win][i_win].Gcycles_Av) )
449 k_win = k;
450 i_win = i;
451 line_win = line;
453 line++;
458 if (k_win == -1)
459 gmx_fatal(FARGS, "None of the runs was successful! Check %s for problems.", fn);
461 sep_line(fp);
463 winPME = perfdata[k_win][i_win].nPMEnodes;
464 if (winPME == -1)
465 sprintf(strbuf, "%s", "the automatic number of");
466 else
467 sprintf(strbuf, "%d", winPME);
468 fprintf(fp, "Best performance was achieved with %s PME nodes", strbuf);
469 if (nrepeats > 1)
470 fprintf(fp, " (see line %d)", line_win);
471 fprintf(fp, "\n");
473 /* Only mention settings if they were modified: */
474 bCanUseOrigTPR = TRUE;
475 if ( !is_equal(info->rcoulomb[k_win], info->rcoulomb[0]) )
477 fprintf(fp, "Optimized PME settings:\n"
478 " New Coulomb radius: %f nm (was %f nm)\n",
479 info->rcoulomb[k_win], info->rcoulomb[0]);
480 bCanUseOrigTPR = FALSE;
483 if ( !is_equal(info->rvdw[k_win], info->rvdw[0]) )
485 fprintf(fp, " New Van der Waals radius: %f nm (was %f nm)\n",
486 info->rvdw[k_win], info->rvdw[0]);
487 bCanUseOrigTPR = FALSE;
490 if ( ! (info->nkx[k_win] == info->nkx[0] &&
491 info->nky[k_win] == info->nky[0] &&
492 info->nkz[k_win] == info->nkz[0] ) )
494 fprintf(fp, " New Fourier grid xyz: %d %d %d (was %d %d %d)\n",
495 info->nkx[k_win], info->nky[k_win], info->nkz[k_win],
496 info->nkx[0], info->nky[0], info->nkz[0]);
497 bCanUseOrigTPR = FALSE;
499 if (bCanUseOrigTPR && ntprs > 1)
500 fprintf(fp, "and original PME settings.\n");
502 fflush(fp);
504 /* Return the index of the mdp file that showed the highest performance
505 * and the optimal number of PME nodes */
506 *index_tpr = k_win;
507 *npme_optimal = winPME;
509 return bCanUseOrigTPR;
513 /* Get the commands we need to set up the runs from environment variables */
514 static void get_program_paths(gmx_bool bThreads, char *cmd_mpirun[], char cmd_np[],
515 char *cmd_mdrun[], int repeats)
517 char *command=NULL;
518 char *cp;
519 char *cp2;
520 char line[STRLEN];
521 FILE *fp;
522 const char def_mpirun[] = "mpirun";
523 const char def_mdrun[] = "mdrun";
524 const char filename[] = "benchtest.log";
525 const char match_mpi[] = "NNODES=";
526 const char match_mdrun[]= "Program: ";
527 const char empty_mpirun[] = "";
528 gmx_bool bMdrun = FALSE;
529 gmx_bool bMPI = FALSE;
532 /* Get the commands we need to set up the runs from environment variables */
533 if (!bThreads)
535 if ( (cp = getenv("MPIRUN")) != NULL)
536 *cmd_mpirun = strdup(cp);
537 else
538 *cmd_mpirun = strdup(def_mpirun);
540 else
542 *cmd_mpirun = strdup(empty_mpirun);
545 if ( (cp = getenv("MDRUN" )) != NULL )
546 *cmd_mdrun = strdup(cp);
547 else
548 *cmd_mdrun = strdup(def_mdrun);
551 /* If no simulations have to be performed, we are done here */
552 if (repeats <= 0)
553 return;
555 /* Run a small test to see whether mpirun + mdrun work */
556 fprintf(stdout, "Making sure that mdrun can be executed. ");
557 if (bThreads)
559 snew(command, strlen(*cmd_mdrun) + strlen(cmd_np) + strlen(filename) + 50);
560 sprintf(command, "%s%s-version -maxh 0.001 1> %s 2>&1", *cmd_mdrun, cmd_np, filename);
562 else
564 snew(command, strlen(*cmd_mpirun) + strlen(cmd_np) + strlen(*cmd_mdrun) + strlen(filename) + 50);
565 sprintf(command, "%s%s%s -version -maxh 0.001 1> %s 2>&1", *cmd_mpirun, cmd_np, *cmd_mdrun, filename);
567 fprintf(stdout, "Trying '%s' ... ", command);
568 make_backup(filename);
569 gmx_system_call(command);
571 /* Check if we find the characteristic string in the output: */
572 if (!gmx_fexist(filename))
573 gmx_fatal(FARGS, "Output from test run could not be found.");
575 fp = fopen(filename, "r");
576 /* We need to scan the whole output file, since sometimes the queuing system
577 * also writes stuff to stdout/err */
578 while ( !feof(fp) )
580 cp2=fgets(line, STRLEN, fp);
581 if (cp2!=NULL)
583 if ( str_starts(line, match_mdrun) )
584 bMdrun = TRUE;
585 if ( str_starts(line, match_mpi) )
586 bMPI = TRUE;
589 fclose(fp);
591 if (bThreads)
593 if (bMPI)
595 gmx_fatal(FARGS, "Need a threaded version of mdrun. This one\n"
596 "(%s)\n"
597 "seems to have been compiled with MPI instead.",
598 *cmd_mdrun);
601 else
603 if (bMdrun && !bMPI)
605 gmx_fatal(FARGS, "Need an MPI-enabled version of mdrun. This one\n"
606 "(%s)\n"
607 "seems to have been compiled without MPI support.",
608 *cmd_mdrun);
612 if (!bMdrun)
614 gmx_fatal(FARGS, "Cannot execute mdrun. Please check %s for problems!",
615 filename);
618 fprintf(stdout, "passed.\n");
620 /* Clean up ... */
621 remove(filename);
622 sfree(command);
626 static void launch_simulation(
627 gmx_bool bLaunch, /* Should the simulation be launched? */
628 FILE *fp, /* General log file */
629 gmx_bool bThreads, /* whether to use threads */
630 char *cmd_mpirun, /* Command for mpirun */
631 char *cmd_np, /* Switch for -np or -nt or empty */
632 char *cmd_mdrun, /* Command for mdrun */
633 char *args_for_mdrun, /* Arguments for mdrun */
634 const char *simulation_tpr, /* This tpr will be simulated */
635 int nnodes, /* Number of nodes to run on */
636 int nPMEnodes) /* Number of PME nodes to use */
638 char *command;
641 /* Make enough space for the system call command,
642 * (100 extra chars for -npme ... etc. options should suffice): */
643 snew(command, strlen(cmd_mpirun)+strlen(cmd_mdrun)+strlen(cmd_np)+strlen(args_for_mdrun)+strlen(simulation_tpr)+100);
645 /* Note that the -passall options requires args_for_mdrun to be at the end
646 * of the command line string */
647 if (bThreads)
649 sprintf(command, "%s%s-npme %d -s %s %s",
650 cmd_mdrun, cmd_np, nPMEnodes, simulation_tpr, args_for_mdrun);
652 else
654 sprintf(command, "%s%s%s -npme %d -s %s %s",
655 cmd_mpirun, cmd_np, cmd_mdrun, nPMEnodes, simulation_tpr, args_for_mdrun);
658 fprintf(fp, "%s this command line to launch the simulation:\n\n%s", bLaunch? "Using":"Please use", command);
659 sep_line(fp);
660 fflush(fp);
662 /* Now the real thing! */
663 if (bLaunch)
665 fprintf(stdout, "\nLaunching simulation with best parameters now.\nExecuting '%s'", command);
666 sep_line(stdout);
667 fflush(stdout);
668 gmx_system_call(command);
669 thanx(fp);
674 static void modify_PMEsettings(
675 gmx_large_int_t simsteps, /* Set this value as number of time steps */
676 const char *fn_best_tpr, /* tpr file with the best performance */
677 const char *fn_sim_tpr) /* name of tpr file to be launched */
679 t_inputrec *ir;
680 t_state state;
681 gmx_mtop_t mtop;
682 char buf[200];
684 snew(ir,1);
685 read_tpx_state(fn_best_tpr,ir,&state,NULL,&mtop);
687 /* Set nsteps to the right value */
688 ir->nsteps = simsteps;
690 /* Write the tpr file which will be launched */
691 sprintf(buf, "Writing optimized simulation file %s with nsteps=%s.\n", fn_sim_tpr, gmx_large_int_pfmt);
692 fprintf(stdout,buf,ir->nsteps);
693 fflush(stdout);
694 write_tpx_state(fn_sim_tpr,ir,&state,&mtop);
696 sfree(ir);
700 typedef struct
702 int nkx, nky, nkz; /* Fourier grid */
703 real fac; /* actual scaling factor */
704 real fs; /* Fourierspacing */
705 } t_pmegrid;
708 static void copy_grid(t_pmegrid *ingrid, t_pmegrid *outgrid)
710 outgrid->nkx = ingrid->nkx;
711 outgrid->nky = ingrid->nky;
712 outgrid->nkz = ingrid->nkz;
713 outgrid->fac = ingrid->fac;
714 outgrid->fs = ingrid->fs;
717 /* Removes entry 'index' from the t_pmegrid list */
718 static void remove_from_list(
719 t_pmegrid gridlist[],
720 int *nlist, /* Length of the list */
721 int index) /* Index to remove from the list */
723 int j;
726 for (j = index; j < (*nlist - 1); j++)
728 copy_grid(&gridlist[j+1], &gridlist[j]);
730 *nlist = *nlist - 1;
734 /* Returns the index of the least necessary grid in the list.
736 * This is the one where the following conditions hold for the scaling factor:
737 * 1. this grid has the smallest distance to its neighboring grid (distance
738 * measured by fac) -> this condition is true for two grids at the same time
739 * 2. this grid (of the two) has the smaller distance to the other neighboring
740 * grid
742 * The extreme grids (the ones with the smallest and largest
743 * scaling factor) are never thrown away.
745 static int get_throwaway_index(
746 t_pmegrid gridlist[],
747 int nlist) /* Length of the list */
749 int i,index = -1;
750 real dist,mindist,d_left,d_right;
753 /* Find the two factors with the smallest mutual distance */
754 mindist = GMX_FLOAT_MAX;
755 for (i = 1; i < nlist; i++)
757 dist = fabs(gridlist[i].fac - gridlist[i-1].fac);
758 if (dist < mindist)
760 index = i;
761 mindist = dist;
764 /* index and index-1 have the smallest mutual distance */
765 if (index == 1)
767 /* Never return the first index, i.e. index == 0 */
770 else
772 d_left = fabs(gridlist[index-1].fac - gridlist[index-2].fac);
773 d_right = fabs(gridlist[index+1].fac - gridlist[index ].fac);
775 /* Return the left index if its distance to its left neighbor is shorter
776 * than the distance of the right index to its right neighbor */
777 if (d_left < d_right)
778 index--;
780 /* Never return the last index */
781 if (index == nlist-1)
782 index--;
784 return index;
788 static void make_grid_list(
789 real fmin, /* minimum scaling factor (downscaling fac) */
790 real fmax, /* maximum scaling factor (upscaling fac) */
791 int ntprs, /* Number of tpr files to test */
792 matrix box, /* The box */
793 t_pmegrid *griduse[], /* List of grids that have to be tested */
794 int *npmegrid, /* Number of grids in the list */
795 real fs) /* Requested fourierspacing at a scaling factor
796 of unity */
798 real req_fac,act_fac=0,act_fs,eps;
799 rvec box_size;
800 int i,jmin=0,d,ngridall=0;
801 int nkx=0,nky=0,nkz=0;
802 int nkx_old=0,nky_old=0,nkz_old=0;
803 t_pmegrid *gridall;
804 int gridalloc,excess;
807 /* Determine length of triclinic box vectors */
808 for(d=0; d<DIM; d++)
810 box_size[d] = 0;
811 for(i=0;i<DIM;i++)
812 box_size[d] += box[d][i]*box[d][i];
813 box_size[d] = sqrt(box_size[d]);
816 gridalloc = 25;
817 snew(gridall, gridalloc);
819 fprintf(stdout, "Possible PME grid settings (apart from input file settings):\n");
820 fprintf(stdout, " nkx nky nkz max spacing scaling factor\n");
822 /* eps should provide a fine enough spacing not to miss any grid */
823 if (fmax != fmin)
824 eps = (fmax-fmin)/(100.0*(ntprs-1));
825 else
826 eps = 1.0/max( (*griduse)[0].nkz, max( (*griduse)[0].nkx, (*griduse)[0].nky ) );
828 for (req_fac = fmin; act_fac < fmax; req_fac += eps)
830 nkx=0;
831 nky=0;
832 nkz=0;
833 calc_grid(NULL,box,fs*req_fac,&nkx,&nky,&nkz);
834 act_fs = max(box_size[XX]/nkx,max(box_size[YY]/nky,box_size[ZZ]/nkz));
835 act_fac = act_fs/fs;
836 if ( ! ( nkx==nkx_old && nky==nky_old && nkz==nkz_old ) /* Exclude if grid is already in list */
837 && ! ( nkx==(*griduse)[0].nkx && nky==(*griduse)[0].nky && nkz==(*griduse)[0].nkz ) ) /* Exclude input file grid */
839 /* We found a new grid that will do */
840 nkx_old = nkx;
841 nky_old = nky;
842 nkz_old = nkz;
843 gridall[ngridall].nkx = nkx;
844 gridall[ngridall].nky = nky;
845 gridall[ngridall].nkz = nkz;
846 gridall[ngridall].fac = act_fac;
847 gridall[ngridall].fs = act_fs;
848 fprintf(stdout, "%5d%5d%5d %12f %12f\n",nkx,nky,nkz,act_fs,act_fac);
849 ngridall++;
850 if (ngridall >= gridalloc)
852 gridalloc += 25;
853 srenew(gridall, gridalloc);
858 /* Return the actual number of grids that can be tested. We cannot test more
859 * than the number of grids we found plus 1 (the input file) */
860 *npmegrid = min(ngridall+1,ntprs);
862 /* excess is the number of grids we have to get rid of */
863 excess = ngridall+1 - ntprs;
865 /* If we found less grids than tpr files were requested, simply test all
866 * the grid there are ... */
867 if (excess < 0)
869 fprintf(stdout, "NOTE: You requested %d tpr files, but apart from the input file grid only the\n"
870 " above listed %d suitable PME grids were found. Will test all suitable settings.\n",
871 ntprs, ngridall);
873 else
875 if (2 == ntprs)
877 /* We can only keep the input tpr file plus one extra tpr file.
878 * We make that choice depending on the values of -upfac and -downfac */
879 if (fmin < 1.0)
881 /* Keep the one with the -downfac as scaling factor. This is already
882 * stored in gridall[0] */
885 else
887 /* Keep the one with -upfac as scaling factor */
888 copy_grid(&(gridall[ngridall-1]), &(gridall[0]));
892 else
894 /* From the grid list throw away the unnecessary ones (keep the extremes) */
895 for (i = 0; i < excess; i++)
897 /* Get the index of the least necessary grid from the list ... */
898 jmin = get_throwaway_index(gridall, ngridall);
899 /* ... and remove the grid from the list */
900 remove_from_list(gridall, &ngridall, jmin);
905 /* The remaining list contains the grids we want to test */
906 for (i=1; i < *npmegrid; i++)
907 copy_grid(&(gridall[i-1]), &((*griduse)[i]));
909 sfree(gridall);
913 #define EPME_SWITCHED(e) ((e) == eelPMESWITCH || (e) == eelPMEUSERSWITCH)
915 /* Make additional TPR files with more computational load for the
916 * direct space processors: */
917 static void make_benchmark_tprs(
918 const char *fn_sim_tpr, /* READ : User-provided tpr file */
919 char *fn_bench_tprs[], /* WRITE: Names of benchmark tpr files */
920 gmx_large_int_t benchsteps, /* Number of time steps for benchmark runs */
921 gmx_large_int_t statesteps, /* Step counter in checkpoint file */
922 real upfac, /* Scale rcoulomb inbetween downfac and upfac */
923 real downfac,
924 int ntprs, /* No. of TPRs to write, each with a different rcoulomb and fourierspacing */
925 real fourierspacing, /* Basic fourierspacing from tpr input file */
926 t_inputinfo *info, /* Contains information about mdp file options */
927 FILE *fp) /* Write the output here */
929 int i,j,d;
930 t_inputrec *ir;
931 t_state state;
932 gmx_mtop_t mtop;
933 real nlist_buffer; /* Thickness of the buffer regions for PME-switch potentials: */
934 char buf[200];
935 rvec box_size;
936 gmx_bool bNote = FALSE;
937 t_pmegrid *pmegrid=NULL; /* Grid settings for the PME grids to test */
938 int npmegrid=1; /* Number of grids that can be tested,
939 * normally = ntpr but could be less */
942 sprintf(buf, "Making benchmark tpr file%s with %s time step%s",
943 ntprs>1? "s":"", gmx_large_int_pfmt, benchsteps>1?"s":"");
944 fprintf(stdout, buf, benchsteps);
945 if (statesteps > 0)
947 sprintf(buf, " (adding %s steps from checkpoint file)", gmx_large_int_pfmt);
948 fprintf(stdout, buf, statesteps);
949 benchsteps += statesteps;
951 fprintf(stdout, ".\n");
954 snew(ir,1);
955 read_tpx_state(fn_sim_tpr,ir,&state,NULL,&mtop);
957 /* Check if some kind of PME was chosen */
958 if (EEL_PME(ir->coulombtype) == FALSE)
959 gmx_fatal(FARGS, "Can only do optimizations for simulations with %s electrostatics.",
960 EELTYPE(eelPME));
962 /* Check if rcoulomb == rlist, which is necessary for plain PME. */
963 if ( (eelPME == ir->coulombtype) && !(ir->rcoulomb == ir->rlist) )
965 gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to rlist (%f).",
966 EELTYPE(eelPME), ir->rcoulomb, ir->rlist);
968 /* For other PME types, rcoulomb is allowed to be smaller than rlist */
969 else if (ir->rcoulomb > ir->rlist)
971 gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to or smaller than rlist (%f)",
972 EELTYPE(ir->coulombtype), ir->rcoulomb, ir->rlist);
975 /* Reduce the number of steps for the benchmarks */
976 info->orig_sim_steps = ir->nsteps;
977 ir->nsteps = benchsteps;
979 /* For PME-switch potentials, keep the radial distance of the buffer region */
980 nlist_buffer = ir->rlist - ir->rcoulomb;
982 /* Determine length of triclinic box vectors */
983 for(d=0; d<DIM; d++)
985 box_size[d] = 0;
986 for(i=0;i<DIM;i++)
987 box_size[d] += state.box[d][i]*state.box[d][i];
988 box_size[d] = sqrt(box_size[d]);
991 /* Reconstruct fourierspacing per dimension from the number of grid points and box size */
992 info->fsx[0] = box_size[XX]/ir->nkx;
993 info->fsy[0] = box_size[YY]/ir->nky;
994 info->fsz[0] = box_size[ZZ]/ir->nkz;
996 /* Put the input grid as first entry into the grid list */
997 snew(pmegrid, ntprs);
998 pmegrid[0].fac = 1.00;
999 pmegrid[0].nkx = ir->nkx;
1000 pmegrid[0].nky = ir->nky;
1001 pmegrid[0].nkz = ir->nkz;
1002 pmegrid[0].fs = max(box_size[ZZ]/ir->nkz, max(box_size[XX]/ir->nkx, box_size[YY]/ir->nky));
1004 /* If no value for the fourierspacing was provided on the command line, we
1005 * use the reconstruction from the tpr file */
1006 if (fourierspacing <= 0)
1008 fourierspacing = pmegrid[0].fs;
1011 fprintf(stdout, "Calculating PME grid points on the basis of a fourierspacing of %f nm\n", fourierspacing);
1013 /* Print information about settings of which some are potentially modified: */
1014 fprintf(fp, " Coulomb type : %s\n", EELTYPE(ir->coulombtype));
1015 fprintf(fp, " Grid spacing x y z : %f %f %f\n",
1016 box_size[XX]/ir->nkx, box_size[YY]/ir->nky, box_size[ZZ]/ir->nkz);
1017 fprintf(fp, " Van der Waals type : %s\n", EVDWTYPE(ir->vdwtype));
1018 if (EVDW_SWITCHED(ir->vdwtype))
1019 fprintf(fp, " rvdw_switch : %f nm\n", ir->rvdw_switch);
1020 if (EPME_SWITCHED(ir->coulombtype))
1021 fprintf(fp, " rlist : %f nm\n", ir->rlist);
1022 if (ir->rlistlong != max_cutoff(ir->rvdw,ir->rcoulomb))
1023 fprintf(fp, " rlistlong : %f nm\n", ir->rlistlong);
1025 /* Print a descriptive line about the tpr settings tested */
1026 fprintf(fp, "\nWill try these real/reciprocal workload settings:\n");
1027 fprintf(fp, " No. scaling rcoulomb");
1028 fprintf(fp, " nkx nky nkz");
1029 fprintf(fp, " spacing");
1030 if (evdwCUT == ir->vdwtype)
1031 fprintf(fp, " rvdw");
1032 if (EPME_SWITCHED(ir->coulombtype))
1033 fprintf(fp, " rlist");
1034 if ( ir->rlistlong != max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb)) )
1035 fprintf(fp, " rlistlong");
1036 fprintf(fp, " tpr file\n");
1039 /* Get useful PME grids if requested, the actual number of entries is
1040 * returned in npmegrid */
1041 if (ntprs > 1)
1042 make_grid_list(downfac, upfac, ntprs, state.box, &pmegrid, &npmegrid, fourierspacing);
1044 /* Loop to create the requested number of tpr input files */
1045 for (j = 0; j < npmegrid; j++)
1047 /* The first one is the provided tpr file, just need to modify the number
1048 * of steps, so skip the following block */
1049 if (j > 0)
1051 /* Scale the Coulomb radius */
1052 ir->rcoulomb = info->rcoulomb[0]*pmegrid[j].fac;
1054 /* Adjust other radii since various conditions neet to be fulfilled */
1055 if (eelPME == ir->coulombtype)
1057 /* plain PME, rcoulomb must be equal to rlist */
1058 ir->rlist = ir->rcoulomb;
1060 else
1062 /* rlist must be >= rcoulomb, we keep the size of the buffer region */
1063 ir->rlist = ir->rcoulomb + nlist_buffer;
1066 if (evdwCUT == ir->vdwtype)
1068 /* For vdw cutoff, rvdw >= rlist */
1069 ir->rvdw = max(info->rvdw[0], ir->rlist);
1072 ir->rlistlong = max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb));
1074 /* Copy the optimal grid dimensions to ir */
1075 ir->nkx = pmegrid[j].nkx;
1076 ir->nky = pmegrid[j].nky;
1077 ir->nkz = pmegrid[j].nkz;
1079 } /* end of "if (j != 0)" */
1081 /* for j==0: Save the original settings
1082 * for j >0: Save modified radii and fourier grids */
1083 info->rcoulomb[j] = ir->rcoulomb;
1084 info->rvdw[j] = ir->rvdw;
1085 info->nkx[j] = ir->nkx;
1086 info->nky[j] = ir->nky;
1087 info->nkz[j] = ir->nkz;
1088 info->rlist[j] = ir->rlist;
1089 info->rlistlong[j] = ir->rlistlong;
1090 info->fsx[j] = pmegrid[j].fs;
1091 info->fsy[j] = pmegrid[j].fs;
1092 info->fsz[j] = pmegrid[j].fs;
1094 /* Write the benchmark tpr file */
1095 strncpy(fn_bench_tprs[j],fn_sim_tpr,strlen(fn_sim_tpr)-strlen(".tpr"));
1096 sprintf(buf, "_bench%.2d.tpr", j);
1097 strcat(fn_bench_tprs[j], buf);
1098 fprintf(stdout,"Writing benchmark tpr %s with nsteps=", fn_bench_tprs[j]);
1099 fprintf(stdout, gmx_large_int_pfmt, ir->nsteps);
1100 if (j > 0)
1101 fprintf(stdout,", scaling factor %f\n", pmegrid[j].fac);
1102 else
1103 fprintf(stdout,", unmodified settings\n");
1105 write_tpx_state(fn_bench_tprs[j],ir,&state,&mtop);
1107 /* Write information about modified tpr settings to log file */
1108 if (j==0)
1109 fprintf(fp, "%4d%10s%10f", j, "-input-", ir->rcoulomb);
1110 else
1111 fprintf(fp, "%4d%10f%10f", j, pmegrid[j].fac, ir->rcoulomb);
1112 fprintf(fp, "%5d%5d%5d", ir->nkx, ir->nky, ir->nkz);
1113 fprintf(fp, " %9f ", info->fsx[j]);
1114 if (evdwCUT == ir->vdwtype)
1115 fprintf(fp, "%10f", ir->rvdw);
1116 if (EPME_SWITCHED(ir->coulombtype))
1117 fprintf(fp, "%10f", ir->rlist);
1118 if ( info->rlistlong[0] != max_cutoff(info->rlist[0],max_cutoff(info->rvdw[0],info->rcoulomb[0])) )
1119 fprintf(fp, "%10f", ir->rlistlong);
1120 fprintf(fp, " %-14s\n",fn_bench_tprs[j]);
1122 /* Make it clear to the user that some additional settings were modified */
1123 if ( !is_equal(ir->rvdw , info->rvdw[0])
1124 || !is_equal(ir->rlistlong, info->rlistlong[0]) )
1126 bNote = TRUE;
1129 if (bNote)
1130 fprintf(fp, "\nNote that in addition to rcoulomb and the fourier grid\n"
1131 "also other input settings were changed (see table above).\n"
1132 "Please check if the modified settings are appropriate.\n");
1133 fflush(stdout);
1134 fflush(fp);
1135 sfree(ir);
1139 /* Whether these files are written depends on tpr (or mdp) settings,
1140 * not on mdrun command line options! */
1141 static gmx_bool tpr_triggers_file(const char *opt)
1143 if ( (0 == strcmp(opt, "-ro"))
1144 || (0 == strcmp(opt, "-ra"))
1145 || (0 == strcmp(opt, "-rt"))
1146 || (0 == strcmp(opt, "-rs"))
1147 || (0 == strcmp(opt, "-pf"))
1148 || (0 == strcmp(opt, "-px")) )
1149 return TRUE;
1150 else
1151 return FALSE;
1155 /* Rename the files we want to keep to some meaningful filename and
1156 * delete the rest */
1157 static void cleanup(const t_filenm *fnm, int nfile, int k, int nnodes,
1158 int nPMEnodes, int nr, gmx_bool bKeepStderr)
1160 char numstring[STRLEN];
1161 char newfilename[STRLEN];
1162 const char *fn=NULL;
1163 int i;
1164 const char *opt;
1167 fprintf(stdout, "Cleaning up, deleting benchmark temp files ...\n");
1169 for (i=0; i<nfile; i++)
1171 opt = (char *)fnm[i].opt;
1172 if ( strcmp(opt, "-p") == 0 )
1174 /* do nothing; keep this file */
1177 else if (strcmp(opt, "-bg") == 0)
1179 /* Give the log file a nice name so one can later see which parameters were used */
1180 numstring[0] = '\0';
1181 if (nr > 0)
1182 sprintf(numstring, "_%d", nr);
1183 sprintf(newfilename, "%s_no%d_np%d_npme%d%s", opt2fn("-bg",nfile,fnm), k, nnodes, nPMEnodes, numstring);
1184 if (gmx_fexist(opt2fn("-bg",nfile,fnm)))
1186 fprintf(stdout, "renaming log file to %s\n", newfilename);
1187 make_backup(newfilename);
1188 rename(opt2fn("-bg",nfile,fnm), newfilename);
1191 else if (strcmp(opt, "-err") == 0)
1193 /* This file contains the output of stderr. We want to keep it in
1194 * cases where there have been problems. */
1195 fn = opt2fn(opt, nfile, fnm);
1196 numstring[0] = '\0';
1197 if (nr > 0)
1198 sprintf(numstring, "_%d", nr);
1199 sprintf(newfilename, "%s_no%d_np%d_npme%d%s", fn, k, nnodes, nPMEnodes, numstring);
1200 if (gmx_fexist(fn))
1202 if (bKeepStderr)
1204 fprintf(stdout, "Saving stderr output in %s\n", newfilename);
1205 make_backup(newfilename);
1206 rename(fn, newfilename);
1208 else
1210 fprintf(stdout, "Deleting %s\n", fn);
1211 remove(fn);
1215 /* Delete the files which are created for each benchmark run: (options -b*) */
1216 else if ( ( (0 == strncmp(opt, "-b", 2)) && (opt2bSet(opt,nfile,fnm) || !is_optional(&fnm[i])) )
1217 || tpr_triggers_file(opt) )
1219 fn = opt2fn(opt, nfile, fnm);
1220 if (gmx_fexist(fn))
1222 fprintf(stdout, "Deleting %s\n", fn);
1223 remove(fn);
1230 /* Returns the largest common factor of n1 and n2 */
1231 static int largest_common_factor(int n1, int n2)
1233 int factor, nmax;
1235 nmax = min(n1, n2);
1236 for (factor=nmax; factor > 0; factor--)
1238 if ( 0==(n1 % factor) && 0==(n2 % factor) )
1240 return(factor);
1243 return 0; /* one for the compiler */
1246 enum {eNpmeAuto, eNpmeAll, eNpmeReduced, eNpmeSubset, eNpmeNr};
1248 /* Create a list of numbers of PME nodes to test */
1249 static void make_npme_list(
1250 const char *npmevalues_opt, /* Make a complete list with all
1251 * possibilities or a short list that keeps only
1252 * reasonable numbers of PME nodes */
1253 int *nentries, /* Number of entries we put in the nPMEnodes list */
1254 int *nPMEnodes[], /* Each entry contains the value for -npme */
1255 int nnodes, /* Total number of nodes to do the tests on */
1256 int minPMEnodes, /* Minimum number of PME nodes */
1257 int maxPMEnodes) /* Maximum number of PME nodes */
1259 int i,npme,npp;
1260 int min_factor=1; /* We request that npp and npme have this minimal
1261 * largest common factor (depends on npp) */
1262 int nlistmax; /* Max. list size */
1263 int nlist; /* Actual number of entries in list */
1264 int eNPME=0;
1267 /* Do we need to check all possible values for -npme or is a reduced list enough? */
1268 if ( 0 == strcmp(npmevalues_opt, "all") )
1270 eNPME = eNpmeAll;
1272 else if ( 0 == strcmp(npmevalues_opt, "subset") )
1274 eNPME = eNpmeSubset;
1276 else if ( 0 == strcmp(npmevalues_opt, "auto") )
1278 if (nnodes <= 64)
1279 eNPME = eNpmeAll;
1280 else if (nnodes < 128)
1281 eNPME = eNpmeReduced;
1282 else
1283 eNPME = eNpmeSubset;
1285 else
1287 gmx_fatal(FARGS, "Unknown option for -npme in make_npme_list");
1290 /* Calculate how many entries we could possibly have (in case of -npme all) */
1291 if (nnodes > 2)
1293 nlistmax = maxPMEnodes - minPMEnodes + 3;
1294 if (0 == minPMEnodes)
1295 nlistmax--;
1297 else
1298 nlistmax = 1;
1300 /* Now make the actual list which is at most of size nlist */
1301 snew(*nPMEnodes, nlistmax);
1302 nlist = 0; /* start counting again, now the real entries in the list */
1303 for (i = 0; i < nlistmax - 2; i++)
1305 npme = maxPMEnodes - i;
1306 npp = nnodes-npme;
1307 switch (eNPME)
1309 case eNpmeAll:
1310 min_factor = 1;
1311 break;
1312 case eNpmeReduced:
1313 min_factor = 2;
1314 break;
1315 case eNpmeSubset:
1316 /* For 2d PME we want a common largest factor of at least the cube
1317 * root of the number of PP nodes */
1318 min_factor = (int) pow(npp, 1.0/3.0);
1319 break;
1320 default:
1321 gmx_fatal(FARGS, "Unknown option for eNPME in make_npme_list");
1322 break;
1324 if (largest_common_factor(npp, npme) >= min_factor)
1326 (*nPMEnodes)[nlist] = npme;
1327 nlist++;
1330 /* We always test 0 PME nodes and the automatic number */
1331 *nentries = nlist + 2;
1332 (*nPMEnodes)[nlist ] = 0;
1333 (*nPMEnodes)[nlist+1] = -1;
1335 fprintf(stderr, "Will try the following %d different values for -npme:\n", *nentries);
1336 for (i=0; i<*nentries-1; i++)
1337 fprintf(stderr, "%d, ", (*nPMEnodes)[i]);
1338 fprintf(stderr, "and %d (auto).\n", (*nPMEnodes)[*nentries-1]);
1342 /* Allocate memory to store the performance data */
1343 static void init_perfdata(t_perf *perfdata[], int ntprs, int datasets, int repeats)
1345 int i, j, k;
1348 for (k=0; k<ntprs; k++)
1350 snew(perfdata[k], datasets);
1351 for (i=0; i<datasets; i++)
1353 for (j=0; j<repeats; j++)
1355 snew(perfdata[k][i].Gcycles , repeats);
1356 snew(perfdata[k][i].ns_per_day, repeats);
1357 snew(perfdata[k][i].PME_f_load, repeats);
1364 static void do_the_tests(
1365 FILE *fp, /* General g_tune_pme output file */
1366 char **tpr_names, /* Filenames of the input files to test */
1367 int maxPMEnodes, /* Max fraction of nodes to use for PME */
1368 int minPMEnodes, /* Min fraction of nodes to use for PME */
1369 const char *npmevalues_opt, /* Which -npme values should be tested */
1370 t_perf **perfdata, /* Here the performace data is stored */
1371 int *pmeentries, /* Entries in the nPMEnodes list */
1372 int repeats, /* Repeat each test this often */
1373 int nnodes, /* Total number of nodes = nPP + nPME */
1374 int nr_tprs, /* Total number of tpr files to test */
1375 gmx_bool bThreads, /* Threads or MPI? */
1376 char *cmd_mpirun, /* mpirun command string */
1377 char *cmd_np, /* "-np", "-n", whatever mpirun needs */
1378 char *cmd_mdrun, /* mdrun command string */
1379 char *cmd_args_bench, /* arguments for mdrun in a string */
1380 const t_filenm *fnm, /* List of filenames from command line */
1381 int nfile, /* Number of files specified on the cmdl. */
1382 int sim_part, /* For checkpointing */
1383 int presteps, /* DLB equilibration steps, is checked */
1384 gmx_large_int_t cpt_steps) /* Time step counter in the checkpoint */
1386 int i,nr,k,ret,count=0,totaltests;
1387 int *nPMEnodes=NULL;
1388 t_perf *pd=NULL;
1389 int cmdline_length;
1390 char *command, *cmd_stub;
1391 char buf[STRLEN];
1392 gmx_bool bResetProblem=FALSE;
1395 /* This string array corresponds to the eParselog enum type at the start
1396 * of this file */
1397 const char* ParseLog[] = {"OK.",
1398 "Logfile not found!",
1399 "No timings, logfile truncated?",
1400 "Run was terminated.",
1401 "Counters were not reset properly.",
1402 "No DD grid found for these settings.",
1403 "TPX version conflict!",
1404 "mdrun was not started in parallel!",
1405 "A fatal error occured!" };
1406 char str_PME_f_load[13];
1409 /* Allocate space for the mdrun command line. 100 extra characters should
1410 be more than enough for the -npme etcetera arguments */
1411 cmdline_length = strlen(cmd_mpirun)
1412 + strlen(cmd_np)
1413 + strlen(cmd_mdrun)
1414 + strlen(cmd_args_bench)
1415 + strlen(tpr_names[0]) + 100;
1416 snew(command , cmdline_length);
1417 snew(cmd_stub, cmdline_length);
1419 /* Construct the part of the command line that stays the same for all tests: */
1420 if (bThreads)
1422 sprintf(cmd_stub, "%s%s", cmd_mdrun, cmd_np);
1424 else
1426 sprintf(cmd_stub, "%s%s%s ", cmd_mpirun, cmd_np, cmd_mdrun);
1429 /* Create a list of numbers of PME nodes to test */
1430 make_npme_list(npmevalues_opt, pmeentries, &nPMEnodes,
1431 nnodes, minPMEnodes, maxPMEnodes);
1433 if (0 == repeats)
1435 fprintf(fp, "\nNo benchmarks done since number of repeats (-r) is 0.\n");
1436 fclose(fp);
1437 finalize(opt2fn("-p", nfile, fnm));
1438 exit(0);
1441 /* Allocate one dataset for each tpr input file: */
1442 init_perfdata(perfdata, nr_tprs, *pmeentries, repeats);
1444 /*****************************************/
1445 /* Main loop over all tpr files to test: */
1446 /*****************************************/
1447 totaltests = nr_tprs*(*pmeentries)*repeats;
1448 for (k=0; k<nr_tprs;k++)
1450 fprintf(fp, "\nIndividual timings for input file %d (%s):\n", k, tpr_names[k]);
1451 fprintf(fp, "PME nodes Gcycles ns/day PME/f Remark\n");
1452 /* Loop over various numbers of PME nodes: */
1453 for (i = 0; i < *pmeentries; i++)
1455 pd = &perfdata[k][i];
1457 /* Loop over the repeats for each scenario: */
1458 for (nr = 0; nr < repeats; nr++)
1460 pd->nPMEnodes = nPMEnodes[i];
1462 /* Add -npme and -s to the command line and save it. Note that
1463 * the -passall (if set) options requires cmd_args_bench to be
1464 * at the end of the command line string */
1465 snew(pd->mdrun_cmd_line, cmdline_length);
1466 sprintf(pd->mdrun_cmd_line, "%s-npme %d -s %s %s",
1467 cmd_stub, pd->nPMEnodes, tpr_names[k], cmd_args_bench);
1469 /* Do a benchmark simulation: */
1470 if (repeats > 1)
1471 sprintf(buf, ", pass %d/%d", nr+1, repeats);
1472 else
1473 buf[0]='\0';
1474 fprintf(stdout, "\n=== Progress %2.0f%%, tpr %d/%d, run %d/%d%s:\n",
1475 (100.0*count)/totaltests,
1476 k+1, nr_tprs, i+1, *pmeentries, buf);
1477 make_backup(opt2fn("-err",nfile,fnm));
1478 sprintf(command, "%s 1> /dev/null 2>%s", pd->mdrun_cmd_line, opt2fn("-err",nfile,fnm));
1479 fprintf(stdout, "%s\n", pd->mdrun_cmd_line);
1480 gmx_system_call(command);
1482 /* Collect the performance data from the log file; also check stderr
1483 * for fatal errors */
1484 ret = parse_logfile(opt2fn("-bg",nfile,fnm), opt2fn("-err",nfile,fnm),
1485 pd, nr, presteps, cpt_steps, nnodes);
1486 if ((presteps > 0) && (ret == eParselogResetProblem))
1487 bResetProblem = TRUE;
1489 if (-1 == pd->nPMEnodes)
1490 sprintf(buf, "(%3d)", pd->guessPME);
1491 else
1492 sprintf(buf, " ");
1494 /* Nicer output */
1495 if (pd->PME_f_load[nr] > 0.0)
1496 sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load[nr]);
1497 else
1498 sprintf(str_PME_f_load, "%s", " - ");
1500 /* Write the data we got to disk */
1501 fprintf(fp, "%4d%s %12.3f %12.3f %s %s", pd->nPMEnodes,
1502 buf, pd->Gcycles[nr], pd->ns_per_day[nr], str_PME_f_load, ParseLog[ret]);
1503 if (! (ret==eParselogOK || ret==eParselogNoDDGrid || ret==eParselogNotFound) )
1504 fprintf(fp, " Check %s file for problems.", ret==eParselogFatal? "err":"log");
1505 fprintf(fp, "\n");
1506 fflush(fp);
1507 count++;
1509 /* Do some cleaning up and delete the files we do not need any more */
1510 cleanup(fnm, nfile, k, nnodes, pd->nPMEnodes, nr, ret==eParselogFatal);
1512 /* If the first run with this number of processors already failed, do not try again: */
1513 if (pd->Gcycles[0] <= 0.0 && repeats > 1)
1515 fprintf(stdout, "Skipping remaining passes of unsuccessful setting, see log file for details.\n");
1516 count += repeats-(nr+1);
1517 break;
1519 } /* end of repeats loop */
1520 } /* end of -npme loop */
1521 } /* end of tpr file loop */
1522 if (bResetProblem)
1524 sep_line(fp);
1525 fprintf(fp, "WARNING: The cycle and time step counters could not be reset\n"
1526 "properly. The reason could be that mpirun did not manage to\n"
1527 "export the environment variable GMX_RESET_COUNTER. You might\n"
1528 "have to give a special switch to mpirun for that.\n"
1529 "Alternatively, you can manually set GMX_RESET_COUNTER to the\n"
1530 "value normally provided by -presteps.");
1531 sep_line(fp);
1533 sfree(command);
1534 sfree(cmd_stub);
1538 static void check_input(
1539 int nnodes,
1540 int repeats,
1541 int *ntprs,
1542 real *upfac,
1543 real *downfac,
1544 real maxPMEfraction,
1545 real minPMEfraction,
1546 real fourierspacing,
1547 gmx_large_int_t bench_nsteps,
1548 const t_filenm *fnm,
1549 int nfile,
1550 int sim_part,
1551 int presteps,
1552 int npargs,
1553 t_pargs *pa)
1555 /* Make sure the input file exists */
1556 if (!gmx_fexist(opt2fn("-s",nfile,fnm)))
1557 gmx_fatal(FARGS, "File %s not found.", opt2fn("-s",nfile,fnm));
1559 /* Make sure that the checkpoint file is not overwritten by the benchmark runs */
1560 if ( (0 == strcmp(opt2fn("-cpi",nfile,fnm), opt2fn("-cpo",nfile,fnm)) ) && (sim_part > 1) )
1561 gmx_fatal(FARGS, "Checkpoint input and output file must not be identical,\nbecause then the input file might change during the benchmarks.");
1563 /* Make sure that repeats is >= 0 (if == 0, only write tpr files) */
1564 if (repeats < 0)
1565 gmx_fatal(FARGS, "Number of repeats < 0!");
1567 /* Check number of nodes */
1568 if (nnodes < 1)
1569 gmx_fatal(FARGS, "Number of nodes/threads must be a positive integer.");
1571 /* Automatically choose -ntpr if not set */
1572 if (*ntprs < 1)
1574 if (nnodes < 16)
1575 *ntprs = 1;
1576 else
1577 *ntprs = 3;
1578 fprintf(stderr, "Will test %d tpr file%s.\n", *ntprs, *ntprs==1?"":"s");
1580 else
1582 if (1 == *ntprs)
1583 fprintf(stderr, "Note: Choose ntpr>1 to shift PME load between real and reciprocal space.\n");
1586 if ( is_equal(*downfac,*upfac) && (*ntprs > 2) && !(fourierspacing > 0.0))
1588 fprintf(stderr, "WARNING: Resetting -ntpr to 2 since both scaling factors are the same.\n"
1589 "Please choose upfac unequal to downfac to test various PME grid settings\n");
1590 *ntprs = 2;
1593 /* Check whether max and min fraction are within required values */
1594 if (maxPMEfraction > 0.5 || maxPMEfraction < 0)
1595 gmx_fatal(FARGS, "-max must be between 0 and 0.5");
1596 if (minPMEfraction > 0.5 || minPMEfraction < 0)
1597 gmx_fatal(FARGS, "-min must be between 0 and 0.5");
1598 if (maxPMEfraction < minPMEfraction)
1599 gmx_fatal(FARGS, "-max must be larger or equal to -min");
1601 /* Check whether the number of steps - if it was set - has a reasonable value */
1602 if (bench_nsteps < 0)
1603 gmx_fatal(FARGS, "Number of steps must be positive.");
1605 if (bench_nsteps > 10000 || bench_nsteps < 100)
1607 fprintf(stderr, "WARNING: steps=");
1608 fprintf(stderr, gmx_large_int_pfmt, bench_nsteps);
1609 fprintf(stderr, ". Are you sure you want to perform so %s steps for each benchmark?\n", (bench_nsteps < 100)? "few" : "many");
1612 if (presteps < 0)
1614 gmx_fatal(FARGS, "Cannot have a negative number of presteps.\n");
1617 if (*upfac <= 0.0 || *downfac <= 0.0 || *downfac > *upfac)
1618 gmx_fatal(FARGS, "Both scaling factors must be larger than zero and upper\n"
1619 "scaling limit (%f) must be larger than lower limit (%f).",
1620 *upfac, *downfac);
1622 if (*downfac < 0.75 || *upfac > 1.25)
1623 fprintf(stderr, "WARNING: Applying extreme scaling factor. I hope you know what you are doing.\n");
1625 if (fourierspacing < 0)
1626 gmx_fatal(FARGS, "Please choose a positive value for fourierspacing.");
1628 /* Make shure that the scaling factor options are compatible with the number of tprs */
1629 if ( (*ntprs < 3) && ( opt2parg_bSet("-upfac",npargs,pa) || opt2parg_bSet("-downfac",npargs,pa) ) )
1631 if (opt2parg_bSet("-upfac",npargs,pa) && opt2parg_bSet("-downfac",npargs,pa) && !is_equal(*upfac,*downfac))
1633 fprintf(stderr, "NOTE: Specify -ntpr > 2 for both scaling factors to take effect.\n"
1634 "(upfac=%f, downfac=%f)\n", *upfac, *downfac);
1636 if (opt2parg_bSet("-upfac",npargs,pa))
1637 *downfac = *upfac;
1638 if (opt2parg_bSet("-downfac",npargs,pa))
1639 *upfac = *downfac;
1641 if ( (2 == *ntprs) && (opt2parg_bSet("-downfac",npargs,pa)) && !is_equal(*downfac, 1.0))
1643 *upfac = 1.0;
1648 /* Returns TRUE when "opt" is a switch for g_tune_pme itself */
1649 static gmx_bool is_main_switch(char *opt)
1651 if ( (0 == strcmp(opt,"-s" ))
1652 || (0 == strcmp(opt,"-p" ))
1653 || (0 == strcmp(opt,"-launch" ))
1654 || (0 == strcmp(opt,"-r" ))
1655 || (0 == strcmp(opt,"-ntpr" ))
1656 || (0 == strcmp(opt,"-max" ))
1657 || (0 == strcmp(opt,"-min" ))
1658 || (0 == strcmp(opt,"-upfac" ))
1659 || (0 == strcmp(opt,"-downfac" ))
1660 || (0 == strcmp(opt,"-four" ))
1661 || (0 == strcmp(opt,"-steps" ))
1662 || (0 == strcmp(opt,"-simsteps" ))
1663 || (0 == strcmp(opt,"-resetstep"))
1664 || (0 == strcmp(opt,"-so" ))
1665 || (0 == strcmp(opt,"-npstring" ))
1666 || (0 == strcmp(opt,"-npme" ))
1667 || (0 == strcmp(opt,"-passall" )) )
1668 return TRUE;
1670 return FALSE;
1674 /* Returns TRUE when "opt" is needed at launch time */
1675 static gmx_bool is_launch_option(char *opt, gmx_bool bSet)
1677 if (bSet)
1678 return TRUE;
1679 else
1680 return FALSE;
1684 /* Returns TRUE when "opt" is needed at launch time */
1685 static gmx_bool is_launch_file(char *opt, gmx_bool bSet)
1687 /* We need all options that were set on the command line
1688 * and that do not start with -b */
1689 if (0 == strncmp(opt,"-b", 2))
1690 return FALSE;
1692 if (bSet)
1693 return TRUE;
1694 else
1695 return FALSE;
1699 /* Returns TRUE when "opt" gives an option needed for the benchmarks runs */
1700 static gmx_bool is_bench_option(char *opt, gmx_bool bSet)
1702 /* If option is set, we might need it for the benchmarks.
1703 * This includes -cpi */
1704 if (bSet)
1706 if ( (0 == strcmp(opt, "-append" ))
1707 || (0 == strcmp(opt, "-maxh" ))
1708 || (0 == strcmp(opt, "-deffnm" ))
1709 || (0 == strcmp(opt, "-resethway"))
1710 || (0 == strcmp(opt, "-cpnum" )) )
1711 return FALSE;
1712 else
1713 return TRUE;
1715 else
1716 return FALSE;
1720 /* Returns TRUE when "opt" defines a file which is needed for the benchmarks runs */
1721 static gmx_bool is_bench_file(char *opt, gmx_bool bSet, gmx_bool bOptional, gmx_bool bIsOutput)
1723 /* All options starting with "-b" are for _b_enchmark files exclusively */
1724 if (0 == strncmp(opt,"-b", 2))
1726 if (!bOptional || bSet)
1727 return TRUE;
1728 else
1729 return FALSE;
1731 else
1733 if (bIsOutput)
1734 return FALSE;
1735 else
1736 if (bSet) /* These are additional input files like -cpi -ei */
1737 return TRUE;
1738 else
1739 return FALSE;
1744 /* Adds 'buf' to 'str' */
1745 static void add_to_string(char **str, char *buf)
1747 int len;
1750 len = strlen(*str) + strlen(buf) + 1;
1751 srenew(*str, len);
1752 strcat(*str, buf);
1756 /* Create the command line for the benchmark as well as for the real run */
1757 static void create_command_line_snippets(
1758 gmx_bool bThreads,
1759 int presteps,
1760 int nfile,
1761 t_filenm fnm[],
1762 int npargs,
1763 t_pargs *pa,
1764 const char *procstring, /* How to pass the number of processors to $MPIRUN */
1765 char *cmd_np[], /* Actual command line snippet, e.g. '-np <N>' */
1766 char *cmd_args_bench[], /* command line arguments for benchmark runs */
1767 char *cmd_args_launch[], /* command line arguments for simulation run */
1768 char extra_args[]) /* Add this to the end of the command line */
1770 int i;
1771 char *opt;
1772 const char *name;
1773 char *np_or_nt;
1774 #define BUFLENGTH 255
1775 char buf[BUFLENGTH];
1776 char strbuf[BUFLENGTH];
1777 char strbuf2[BUFLENGTH];
1780 if (bThreads)
1781 np_or_nt=strdup("-nt");
1782 else
1783 np_or_nt=strdup("-np");
1785 /* strlen needs at least '\0' as a string: */
1786 snew(*cmd_args_bench ,1);
1787 snew(*cmd_args_launch,1);
1788 *cmd_args_launch[0]='\0';
1789 *cmd_args_bench[0] ='\0';
1792 /*******************************************/
1793 /* 1. Process other command line arguments */
1794 /*******************************************/
1795 for (i=0; i<npargs; i++)
1797 /* What command line switch are we currently processing: */
1798 opt = (char *)pa[i].option;
1799 /* Skip options not meant for mdrun */
1800 if (!is_main_switch(opt))
1802 /* Print it to a string buffer, strip away trailing whitespaces that pa_val also returns: */
1803 sprintf(strbuf2, "%s", pa_val(&pa[i],buf,BUFLENGTH));
1804 rtrim(strbuf2);
1805 sprintf(strbuf, "%s %s ", opt, strbuf2);
1806 /* We need the -np (or -nt) switch in a separate buffer - whether or not it was set! */
1807 if (0 == strcmp(opt,np_or_nt))
1809 if (strcmp(procstring, "none")==0 && !bThreads)
1811 /* Omit -np <N> entirely */
1812 snew(*cmd_np, 2);
1813 sprintf(*cmd_np, " ");
1815 else
1817 /* This is the normal case with -np <N> */
1818 snew(*cmd_np, strlen(procstring)+strlen(strbuf2)+4);
1819 sprintf(*cmd_np, " %s %s ", bThreads? "-nt" : procstring, strbuf2);
1822 else
1824 if (is_bench_option(opt,pa[i].bSet))
1825 add_to_string(cmd_args_bench, strbuf);
1827 if (is_launch_option(opt,pa[i].bSet))
1828 add_to_string(cmd_args_launch, strbuf);
1832 if (presteps > 0)
1834 /* Add equilibration steps to benchmark options */
1835 sprintf(strbuf, "-resetstep %d ", presteps);
1836 add_to_string(cmd_args_bench, strbuf);
1839 /********************/
1840 /* 2. Process files */
1841 /********************/
1842 for (i=0; i<nfile; i++)
1844 opt = (char *)fnm[i].opt;
1845 name = opt2fn(opt,nfile,fnm);
1847 /* Strbuf contains the options, now let's sort out where we need that */
1848 sprintf(strbuf, "%s %s ", opt, name);
1850 /* Skip options not meant for mdrun */
1851 if (!is_main_switch(opt))
1854 if ( is_bench_file(opt, opt2bSet(opt,nfile,fnm), is_optional(&fnm[i]), is_output(&fnm[i])) )
1856 /* All options starting with -b* need the 'b' removed,
1857 * therefore overwrite strbuf */
1858 if (0 == strncmp(opt, "-b", 2))
1859 sprintf(strbuf, "-%s %s ", &opt[2], name);
1861 add_to_string(cmd_args_bench, strbuf);
1864 if ( is_launch_file(opt,opt2bSet(opt,nfile,fnm)) )
1865 add_to_string(cmd_args_launch, strbuf);
1869 add_to_string(cmd_args_bench , extra_args);
1870 add_to_string(cmd_args_launch, extra_args);
1871 #undef BUFLENGTH
1875 /* Set option opt */
1876 static void setopt(const char *opt,int nfile,t_filenm fnm[])
1878 int i;
1880 for(i=0; (i<nfile); i++)
1881 if (strcmp(opt,fnm[i].opt)==0)
1882 fnm[i].flag |= ffSET;
1886 static void couple_files_options(int nfile, t_filenm fnm[])
1888 int i;
1889 gmx_bool bSet,bBench;
1890 char *opt;
1891 char buf[20];
1894 for (i=0; i<nfile; i++)
1896 opt = (char *)fnm[i].opt;
1897 bSet = ((fnm[i].flag & ffSET) != 0);
1898 bBench = (0 == strncmp(opt,"-b", 2));
1900 /* Check optional files */
1901 /* If e.g. -eo is set, then -beo also needs to be set */
1902 if (is_optional(&fnm[i]) && bSet && !bBench)
1904 sprintf(buf, "-b%s", &opt[1]);
1905 setopt(buf,nfile,fnm);
1907 /* If -beo is set, then -eo also needs to be! */
1908 if (is_optional(&fnm[i]) && bSet && bBench)
1910 sprintf(buf, "-%s", &opt[2]);
1911 setopt(buf,nfile,fnm);
1917 static double gettime()
1919 #ifdef HAVE_GETTIMEOFDAY
1920 struct timeval t;
1921 struct timezone tz = { 0,0 };
1922 double seconds;
1924 gettimeofday(&t,&tz);
1926 seconds = (double) t.tv_sec + 1e-6*(double)t.tv_usec;
1928 return seconds;
1929 #else
1930 double seconds;
1932 seconds = time(NULL);
1934 return seconds;
1935 #endif
1939 #define BENCHSTEPS (1000)
1941 int gmx_tune_pme(int argc,char *argv[])
1943 const char *desc[] = {
1944 "For a given number [TT]-np[tt] or [TT]-nt[tt] of processors/threads, this program systematically",
1945 "times mdrun with various numbers of PME-only nodes and determines",
1946 "which setting is fastest. It will also test whether performance can",
1947 "be enhanced by shifting load from the reciprocal to the real space",
1948 "part of the Ewald sum. ",
1949 "Simply pass your [TT].tpr[tt] file to g_tune_pme together with other options",
1950 "for mdrun as needed.[PAR]",
1951 "Which executables are used can be set in the environment variables",
1952 "MPIRUN and MDRUN. If these are not present, 'mpirun' and 'mdrun'",
1953 "will be used as defaults. Note that for certain MPI frameworks you",
1954 "need to provide a machine- or hostfile. This can also be passed",
1955 "via the MPIRUN variable, e.g.",
1956 "'export MPIRUN=\"/usr/local/mpirun -machinefile hosts\"'[PAR]",
1957 "Please call g_tune_pme with the normal options you would pass to",
1958 "mdrun and add [TT]-np[tt] for the number of processors to perform the",
1959 "tests on, or [TT]-nt[tt] for the number of threads. You can also add [TT]-r[tt]",
1960 "to repeat each test several times to get better statistics. [PAR]",
1961 "g_tune_pme can test various real space / reciprocal space workloads",
1962 "for you. With [TT]-ntpr[tt] you control how many extra [TT].tpr[tt] files will be",
1963 "written with enlarged cutoffs and smaller fourier grids respectively.",
1964 "Typically, the first test (no. 0) will be with the settings from the input",
1965 "[TT].tpr[tt] file; the last test (no. [TT]ntpr[tt]) will have cutoffs multiplied",
1966 "by (and at the same time fourier grid dimensions divided by) the scaling",
1967 "factor [TT]-fac[tt] (default 1.2). The remaining [TT].tpr[tt] files will have equally",
1968 "spaced values inbetween these extremes. Note that you can set [TT]-ntpr[tt] to 1",
1969 "if you just want to find the optimal number of PME-only nodes; in that case",
1970 "your input [TT].tpr[tt] file will remain unchanged.[PAR]",
1971 "For the benchmark runs, the default of 1000 time steps should suffice for most",
1972 "MD systems. The dynamic load balancing needs about 100 time steps",
1973 "to adapt to local load imbalances, therefore the time step counters",
1974 "are by default reset after 100 steps. For large systems",
1975 "(>1M atoms) you may have to set [TT]-resetstep[tt] to a higher value.",
1976 "From the 'DD' load imbalance entries in the md.log output file you",
1977 "can tell after how many steps the load is sufficiently balanced.[PAR]"
1978 "Example call: [TT]g_tune_pme -np 64 -s protein.tpr -launch[tt][PAR]",
1979 "After calling mdrun several times, detailed performance information",
1980 "is available in the output file perf.out. ",
1981 "Note that during the benchmarks a couple of temporary files are written",
1982 "(options -b*), these will be automatically deleted after each test.[PAR]",
1983 "If you want the simulation to be started automatically with the",
1984 "optimized parameters, use the command line option [TT]-launch[tt].[PAR]",
1987 int nnodes =1;
1988 int repeats=2;
1989 int pmeentries=0; /* How many values for -npme do we actually test for each tpr file */
1990 real maxPMEfraction=0.50;
1991 real minPMEfraction=0.25;
1992 int maxPMEnodes, minPMEnodes;
1993 real downfac=1.0,upfac=1.2;
1994 int ntprs=0;
1995 real fs=0.0; /* 0 indicates: not set by the user */
1996 gmx_large_int_t bench_nsteps=BENCHSTEPS;
1997 gmx_large_int_t new_sim_nsteps=-1; /* -1 indicates: not set by the user */
1998 gmx_large_int_t cpt_steps=0; /* Step counter in .cpt input file */
1999 int presteps=100; /* Do a full cycle reset after presteps steps */
2000 gmx_bool bOverwrite=FALSE, bKeepTPR;
2001 gmx_bool bLaunch=FALSE;
2002 gmx_bool bPassAll=FALSE;
2003 char *ExtraArgs=NULL;
2004 char **tpr_names=NULL;
2005 const char *simulation_tpr=NULL;
2006 int best_npme, best_tpr;
2007 int sim_part = 1; /* For benchmarks with checkpoint files */
2009 /* Default program names if nothing else is found */
2010 char *cmd_mpirun=NULL, *cmd_mdrun=NULL;
2011 char *cmd_args_bench, *cmd_args_launch;
2012 char *cmd_np=NULL;
2014 t_perf **perfdata=NULL;
2015 t_inputinfo *info;
2016 int i;
2017 FILE *fp;
2018 t_commrec *cr;
2020 /* Print out how long the tuning took */
2021 double seconds;
2023 static t_filenm fnm[] = {
2024 /* g_tune_pme */
2025 { efOUT, "-p", "perf", ffWRITE },
2026 { efLOG, "-err", "errors", ffWRITE },
2027 { efTPX, "-so", "tuned", ffWRITE },
2028 /* mdrun: */
2029 { efTPX, NULL, NULL, ffREAD },
2030 { efTRN, "-o", NULL, ffWRITE },
2031 { efXTC, "-x", NULL, ffOPTWR },
2032 { efCPT, "-cpi", NULL, ffOPTRD },
2033 { efCPT, "-cpo", NULL, ffOPTWR },
2034 { efSTO, "-c", "confout", ffWRITE },
2035 { efEDR, "-e", "ener", ffWRITE },
2036 { efLOG, "-g", "md", ffWRITE },
2037 { efXVG, "-dhdl", "dhdl", ffOPTWR },
2038 { efXVG, "-field", "field", ffOPTWR },
2039 { efXVG, "-table", "table", ffOPTRD },
2040 { efXVG, "-tablep", "tablep", ffOPTRD },
2041 { efXVG, "-tableb", "table", ffOPTRD },
2042 { efTRX, "-rerun", "rerun", ffOPTRD },
2043 { efXVG, "-tpi", "tpi", ffOPTWR },
2044 { efXVG, "-tpid", "tpidist", ffOPTWR },
2045 { efEDI, "-ei", "sam", ffOPTRD },
2046 { efEDO, "-eo", "sam", ffOPTWR },
2047 { efGCT, "-j", "wham", ffOPTRD },
2048 { efGCT, "-jo", "bam", ffOPTWR },
2049 { efXVG, "-ffout", "gct", ffOPTWR },
2050 { efXVG, "-devout", "deviatie", ffOPTWR },
2051 { efXVG, "-runav", "runaver", ffOPTWR },
2052 { efXVG, "-px", "pullx", ffOPTWR },
2053 { efXVG, "-pf", "pullf", ffOPTWR },
2054 { efXVG, "-ro", "rotation", ffOPTWR },
2055 { efLOG, "-ra", "rotangles",ffOPTWR },
2056 { efLOG, "-rs", "rotslabs", ffOPTWR },
2057 { efLOG, "-rt", "rottorque",ffOPTWR },
2058 { efMTX, "-mtx", "nm", ffOPTWR },
2059 { efNDX, "-dn", "dipole", ffOPTWR },
2060 /* Output files that are deleted after each benchmark run */
2061 { efTRN, "-bo", "bench", ffWRITE },
2062 { efXTC, "-bx", "bench", ffWRITE },
2063 { efCPT, "-bcpo", "bench", ffWRITE },
2064 { efSTO, "-bc", "bench", ffWRITE },
2065 { efEDR, "-be", "bench", ffWRITE },
2066 { efLOG, "-bg", "bench", ffWRITE },
2067 { efEDO, "-beo", "bench", ffOPTWR },
2068 { efXVG, "-bdhdl", "benchdhdl",ffOPTWR },
2069 { efXVG, "-bfield", "benchfld" ,ffOPTWR },
2070 { efXVG, "-btpi", "benchtpi", ffOPTWR },
2071 { efXVG, "-btpid", "benchtpid",ffOPTWR },
2072 { efGCT, "-bjo", "bench", ffOPTWR },
2073 { efXVG, "-bffout", "benchgct", ffOPTWR },
2074 { efXVG, "-bdevout","benchdev", ffOPTWR },
2075 { efXVG, "-brunav", "benchrnav",ffOPTWR },
2076 { efXVG, "-bpx", "benchpx", ffOPTWR },
2077 { efXVG, "-bpf", "benchpf", ffOPTWR },
2078 { efXVG, "-bro", "benchrot", ffOPTWR },
2079 { efLOG, "-bra", "benchrota",ffOPTWR },
2080 { efLOG, "-brs", "benchrots",ffOPTWR },
2081 { efLOG, "-brt", "benchrott",ffOPTWR },
2082 { efMTX, "-bmtx", "benchn", ffOPTWR },
2083 { efNDX, "-bdn", "bench", ffOPTWR }
2086 /* Command line options of mdrun */
2087 gmx_bool bDDBondCheck = TRUE;
2088 gmx_bool bDDBondComm = TRUE;
2089 gmx_bool bVerbose = FALSE;
2090 gmx_bool bCompact = TRUE;
2091 gmx_bool bSepPot = FALSE;
2092 gmx_bool bRerunVSite = FALSE;
2093 gmx_bool bIonize = FALSE;
2094 gmx_bool bConfout = TRUE;
2095 gmx_bool bReproducible = FALSE;
2096 gmx_bool bThreads = FALSE;
2098 int nmultisim=0;
2099 int nstglobalcomm=-1;
2100 int repl_ex_nst=0;
2101 int repl_ex_seed=-1;
2102 int nstepout=100;
2103 int nthreads=1;
2105 const char *ddno_opt[ddnoNR+1] =
2106 { NULL, "interleave", "pp_pme", "cartesian", NULL };
2107 const char *dddlb_opt[] =
2108 { NULL, "auto", "no", "yes", NULL };
2109 const char *procstring[] =
2110 { NULL, "-np", "-n", "none", NULL };
2111 const char *npmevalues_opt[] =
2112 { NULL, "auto", "all", "subset", NULL };
2113 real rdd=0.0,rconstr=0.0,dlb_scale=0.8,pforce=-1;
2114 char *ddcsx=NULL,*ddcsy=NULL,*ddcsz=NULL;
2115 char *deffnm=NULL;
2116 #define STD_CPT_PERIOD (15.0)
2117 real cpt_period=STD_CPT_PERIOD,max_hours=-1;
2118 gmx_bool bAppendFiles=TRUE;
2119 gmx_bool bKeepAndNumCPT=FALSE;
2120 gmx_bool bResetCountersHalfWay=FALSE;
2121 output_env_t oenv=NULL;
2123 t_pargs pa[] = {
2124 /***********************/
2125 /* g_tune_pme options: */
2126 /***********************/
2127 { "-np", FALSE, etINT, {&nnodes},
2128 "Number of nodes to run the tests on (must be > 2 for separate PME nodes)" },
2129 { "-npstring", FALSE, etENUM, {procstring},
2130 "Specify the number of processors to $MPIRUN using this string"},
2131 { "-passall", FALSE, etBOOL, {&bPassAll},
2132 "HIDDENPut arguments unknown to mdrun at the end of the command line. Can e.g. be used for debugging purposes. "},
2133 { "-nt", FALSE, etINT, {&nthreads},
2134 "Number of threads to run the tests on (turns MPI & mpirun off)"},
2135 { "-r", FALSE, etINT, {&repeats},
2136 "Repeat each test this often" },
2137 { "-max", FALSE, etREAL, {&maxPMEfraction},
2138 "Max fraction of PME nodes to test with" },
2139 { "-min", FALSE, etREAL, {&minPMEfraction},
2140 "Min fraction of PME nodes to test with" },
2141 { "-npme", FALSE, etENUM, {npmevalues_opt},
2142 "Benchmark all possible values for -npme or just the subset that is expected to perform well"},
2143 { "-upfac", FALSE, etREAL, {&upfac},
2144 "Upper limit for rcoulomb scaling factor (Note that rcoulomb upscaling results in fourier grid downscaling)" },
2145 { "-downfac", FALSE, etREAL, {&downfac},
2146 "Lower limit for rcoulomb scaling factor" },
2147 { "-ntpr", FALSE, etINT, {&ntprs},
2148 "Number of tpr files to benchmark. Create these many files with scaling factors ranging from 1.0 to fac. If < 1, automatically choose the number of tpr files to test" },
2149 { "-four", FALSE, etREAL, {&fs},
2150 "Use this fourierspacing value instead of the grid found in the tpr input file. (Spacing applies to a scaling factor of 1.0 if multiple tpr files are written)" },
2151 { "-steps", FALSE, etGMX_LARGE_INT, {&bench_nsteps},
2152 "Take timings for these many steps in the benchmark runs" },
2153 { "-resetstep",FALSE, etINT, {&presteps},
2154 "Let dlb equilibrate these many steps before timings are taken (reset cycle counters after these many steps)" },
2155 { "-simsteps", FALSE, etGMX_LARGE_INT, {&new_sim_nsteps},
2156 "If non-negative, perform these many steps in the real run (overwrite nsteps from tpr, add cpt steps)" },
2157 { "-launch", FALSE, etBOOL, {&bLaunch},
2158 "Lauch the real simulation after optimization" },
2159 /******************/
2160 /* mdrun options: */
2161 /******************/
2162 { "-deffnm", FALSE, etSTR, {&deffnm},
2163 "Set the default filename for all file options at launch time" },
2164 { "-ddorder", FALSE, etENUM, {ddno_opt},
2165 "DD node order" },
2166 { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
2167 "Check for all bonded interactions with DD" },
2168 { "-ddbondcomm",FALSE, etBOOL, {&bDDBondComm},
2169 "HIDDENUse special bonded atom communication when -rdd > cut-off" },
2170 { "-rdd", FALSE, etREAL, {&rdd},
2171 "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" },
2172 { "-rcon", FALSE, etREAL, {&rconstr},
2173 "Maximum distance for P-LINCS (nm), 0 is estimate" },
2174 { "-dlb", FALSE, etENUM, {dddlb_opt},
2175 "Dynamic load balancing (with DD)" },
2176 { "-dds", FALSE, etREAL, {&dlb_scale},
2177 "Minimum allowed dlb scaling of the DD cell size" },
2178 { "-ddcsx", FALSE, etSTR, {&ddcsx},
2179 "HIDDENThe DD cell sizes in x" },
2180 { "-ddcsy", FALSE, etSTR, {&ddcsy},
2181 "HIDDENThe DD cell sizes in y" },
2182 { "-ddcsz", FALSE, etSTR, {&ddcsz},
2183 "HIDDENThe DD cell sizes in z" },
2184 { "-gcom", FALSE, etINT, {&nstglobalcomm},
2185 "Global communication frequency" },
2186 { "-v", FALSE, etBOOL, {&bVerbose},
2187 "Be loud and noisy" },
2188 { "-compact", FALSE, etBOOL, {&bCompact},
2189 "Write a compact log file" },
2190 { "-seppot", FALSE, etBOOL, {&bSepPot},
2191 "Write separate V and dVdl terms for each interaction type and node to the log file(s)" },
2192 { "-pforce", FALSE, etREAL, {&pforce},
2193 "Print all forces larger than this (kJ/mol nm)" },
2194 { "-reprod", FALSE, etBOOL, {&bReproducible},
2195 "Try to avoid optimizations that affect binary reproducibility" },
2196 { "-cpt", FALSE, etREAL, {&cpt_period},
2197 "Checkpoint interval (minutes)" },
2198 { "-cpnum", FALSE, etBOOL, {&bKeepAndNumCPT},
2199 "Keep and number checkpoint files" },
2200 { "-append", FALSE, etBOOL, {&bAppendFiles},
2201 "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names (for launch only)" },
2202 { "-maxh", FALSE, etREAL, {&max_hours},
2203 "Terminate after 0.99 times this time (hours)" },
2204 { "-multi", FALSE, etINT, {&nmultisim},
2205 "Do multiple simulations in parallel" },
2206 { "-replex", FALSE, etINT, {&repl_ex_nst},
2207 "Attempt replica exchange every # steps" },
2208 { "-reseed", FALSE, etINT, {&repl_ex_seed},
2209 "Seed for replica exchange, -1 is generate a seed" },
2210 { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite},
2211 "HIDDENRecalculate virtual site coordinates with -rerun" },
2212 { "-ionize", FALSE, etBOOL, {&bIonize},
2213 "Do a simulation including the effect of an X-Ray bombardment on your system" },
2214 { "-confout", FALSE, etBOOL, {&bConfout},
2215 "HIDDENWrite the last configuration with -c and force checkpointing at the last step" },
2216 { "-stepout", FALSE, etINT, {&nstepout},
2217 "HIDDENFrequency of writing the remaining runtime" },
2218 { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
2219 "HIDDENReset the cycle counters after half the number of steps or halfway -maxh (launch only)" }
2223 #define NFILE asize(fnm)
2225 CopyRight(stderr,argv[0]);
2227 seconds = gettime();
2229 parse_common_args(&argc,argv,PCA_NOEXIT_ON_ARGS,
2230 NFILE,fnm,asize(pa),pa,asize(desc),desc,
2231 0,NULL,&oenv);
2233 /* Store the remaining unparsed command line entries in a string */
2234 snew(ExtraArgs, 1);
2235 ExtraArgs[0] = '\0';
2236 for (i=1; i<argc; i++) /* argc will now be 1 if everything was understood */
2238 add_to_string(&ExtraArgs, argv[i]);
2239 add_to_string(&ExtraArgs, " ");
2241 if ( !bPassAll && (ExtraArgs[0] != '\0') )
2243 fprintf(stderr, "\nWARNING: The following arguments you provided have no effect:\n"
2244 "%s\n"
2245 "Use the -passall option to force them to appear on the command lines\n"
2246 "for the benchmark simulations%s.\n\n",
2247 ExtraArgs, bLaunch? " and at launch time" : "");
2250 if (opt2parg_bSet("-nt",asize(pa),pa))
2252 bThreads=TRUE;
2253 if (opt2parg_bSet("-npstring",asize(pa),pa))
2254 fprintf(stderr, "WARNING: -npstring has no effect when using threads.\n");
2256 if (nnodes > 1)
2257 gmx_fatal(FARGS, "Can't run multi-threaded MPI simulation yet!");
2258 /* and now we just set this; a bit of an ugly hack*/
2259 nnodes=nthreads;
2261 /* Automatically set -beo options if -eo is set etc. */
2262 couple_files_options(NFILE,fnm);
2264 /* Construct the command line arguments for benchmark runs
2265 * as well as for the simulation run
2267 create_command_line_snippets(bThreads,presteps,NFILE,fnm,asize(pa),pa,procstring[0],
2268 &cmd_np, &cmd_args_bench, &cmd_args_launch,
2269 bPassAll? ExtraArgs : (char *)"");
2271 /* Read in checkpoint file if requested */
2272 sim_part = 1;
2273 if(opt2bSet("-cpi",NFILE,fnm))
2275 snew(cr,1);
2276 cr->duty=DUTY_PP; /* makes the following routine happy */
2277 read_checkpoint_simulation_part(opt2fn("-cpi",NFILE,fnm),
2278 &sim_part,&cpt_steps,cr,
2279 FALSE,NFILE,fnm,NULL,NULL);
2280 sfree(cr);
2281 sim_part++;
2282 /* sim_part will now be 1 if no checkpoint file was found */
2283 if (sim_part<=1)
2284 gmx_fatal(FARGS, "Checkpoint file %s not found!", opt2fn("-cpi",
2285 NFILE,
2286 fnm));
2289 /* Open performance output file and write header info */
2290 fp = ffopen(opt2fn("-p",NFILE,fnm),"w");
2292 /* Make a quick consistency check of command line parameters */
2293 check_input(nnodes, repeats, &ntprs, &upfac, &downfac, maxPMEfraction,
2294 minPMEfraction, fs, bench_nsteps, fnm, NFILE, sim_part, presteps,
2295 asize(pa),pa);
2297 /* Determine max and min number of PME nodes to test: */
2298 if (nnodes > 2)
2300 maxPMEnodes = floor(maxPMEfraction*nnodes);
2301 minPMEnodes = max(floor(minPMEfraction*nnodes), 0);
2302 fprintf(stdout, "Will try runs with %d ", minPMEnodes);
2303 if (maxPMEnodes != minPMEnodes)
2304 fprintf(stdout, "- %d ", maxPMEnodes);
2305 fprintf(stdout, "PME-only nodes.\n Note that the automatic number of PME-only nodes and no separate PME nodes are always tested.\n");
2307 else
2309 maxPMEnodes = 0;
2310 minPMEnodes = 0;
2313 /* Get the commands we need to set up the runs from environment variables */
2314 get_program_paths(bThreads, &cmd_mpirun, cmd_np, &cmd_mdrun, repeats);
2316 /* Print some header info to file */
2317 sep_line(fp);
2318 fprintf(fp, "\n P E R F O R M A N C E R E S U L T S\n");
2319 sep_line(fp);
2320 fprintf(fp, "%s for Gromacs %s\n", ShortProgram(),GromacsVersion());
2321 if (!bThreads)
2323 fprintf(fp, "Number of nodes : %d\n", nnodes);
2324 fprintf(fp, "The mpirun command is : %s\n", cmd_mpirun);
2325 if ( strcmp(procstring[0], "none") != 0)
2326 fprintf(fp, "Passing # of nodes via : %s\n", procstring[0]);
2327 else
2328 fprintf(fp, "Not setting number of nodes in system call\n");
2330 else
2331 fprintf(fp, "Number of threads : %d\n", nnodes);
2333 fprintf(fp, "The mdrun command is : %s\n", cmd_mdrun);
2334 fprintf(fp, "mdrun args benchmarks : %s\n", cmd_args_bench);
2335 fprintf(fp, "Benchmark steps : ");
2336 fprintf(fp, gmx_large_int_pfmt, bench_nsteps);
2337 fprintf(fp, "\n");
2338 fprintf(fp, "dlb equilibration steps : %d\n", presteps);
2339 if (sim_part > 1)
2341 fprintf(fp, "Checkpoint time step : ");
2342 fprintf(fp, gmx_large_int_pfmt, cpt_steps);
2343 fprintf(fp, "\n");
2345 if (bLaunch)
2346 fprintf(fp, "mdrun args at launchtime: %s\n", cmd_args_launch);
2347 if (!bPassAll && ExtraArgs[0] != '\0')
2348 fprintf(fp, "Unused arguments : %s\n", ExtraArgs);
2349 if (new_sim_nsteps >= 0)
2351 bOverwrite = TRUE;
2352 fprintf(stderr, "Note: Simulation input file %s will have ", opt2fn("-so",NFILE,fnm));
2353 fprintf(stderr, gmx_large_int_pfmt, new_sim_nsteps+cpt_steps);
2354 fprintf(stderr, " steps.\n");
2355 fprintf(fp, "Simulation steps : ");
2356 fprintf(fp, gmx_large_int_pfmt, new_sim_nsteps);
2357 fprintf(fp, "\n");
2359 if (repeats > 1)
2360 fprintf(fp, "Repeats for each test : %d\n", repeats);
2362 if (fs > 0.0)
2364 fprintf(fp, "Requested grid spacing : %f (This will be the grid spacing at a scaling factor of 1.0)\n", fs);
2367 fprintf(fp, "Input file : %s\n", opt2fn("-s",NFILE,fnm));
2369 /* Allocate memory for the inputinfo struct: */
2370 snew(info, 1);
2371 info->nr_inputfiles = ntprs;
2372 for (i=0; i<ntprs; i++)
2374 snew(info->rcoulomb , ntprs);
2375 snew(info->rvdw , ntprs);
2376 snew(info->rlist , ntprs);
2377 snew(info->rlistlong, ntprs);
2378 snew(info->nkx , ntprs);
2379 snew(info->nky , ntprs);
2380 snew(info->nkz , ntprs);
2381 snew(info->fsx , ntprs);
2382 snew(info->fsy , ntprs);
2383 snew(info->fsz , ntprs);
2385 /* Make alternative tpr files to test: */
2386 snew(tpr_names, ntprs);
2387 for (i=0; i<ntprs; i++)
2388 snew(tpr_names[i], STRLEN);
2390 make_benchmark_tprs(opt2fn("-s",NFILE,fnm), tpr_names, bench_nsteps+presteps,
2391 cpt_steps, upfac, downfac, ntprs, fs, info, fp);
2394 /********************************************************************************/
2395 /* Main loop over all scenarios we need to test: tpr files, PME nodes, repeats */
2396 /********************************************************************************/
2397 snew(perfdata, ntprs);
2398 do_the_tests(fp, tpr_names, maxPMEnodes, minPMEnodes, npmevalues_opt[0], perfdata, &pmeentries,
2399 repeats, nnodes, ntprs, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
2400 cmd_args_bench, fnm, NFILE, sim_part, presteps, cpt_steps);
2402 fprintf(fp, "\nTuning took%8.1f minutes.\n", (gettime()-seconds)/60.0);
2404 /* Analyse the results and give a suggestion for optimal settings: */
2405 bKeepTPR = analyze_data(fp, opt2fn("-p", NFILE, fnm), perfdata, nnodes, ntprs, pmeentries,
2406 repeats, info, &best_tpr, &best_npme);
2408 /* Take the best-performing tpr file and enlarge nsteps to original value */
2409 if ( bKeepTPR && !bOverwrite && !(fs > 0.0) )
2411 simulation_tpr = opt2fn("-s",NFILE,fnm);
2413 else
2415 simulation_tpr = opt2fn("-so",NFILE,fnm);
2416 modify_PMEsettings(bOverwrite? (new_sim_nsteps+cpt_steps) :
2417 info->orig_sim_steps, tpr_names[best_tpr],
2418 simulation_tpr);
2421 /* Now start the real simulation if the user requested it ... */
2422 launch_simulation(bLaunch, fp, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
2423 cmd_args_launch, simulation_tpr, nnodes, best_npme);
2424 ffclose(fp);
2426 /* ... or simply print the performance results to screen: */
2427 if (!bLaunch)
2428 finalize(opt2fn("-p", NFILE, fnm));
2430 return 0;