changed reading hint
[gromacs/adressmacs.git] / src / tools / g_analyze.c
blob6190f309773706fba15fb878875af1b7a71a5d3f
1 /*
2 * $Id$
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 2.0
12 * Copyright (c) 1991-1999
13 * BIOSON Research Institute, Dept. of Biophysical Chemistry
14 * University of Groningen, The Netherlands
16 * Please refer to:
17 * GROMACS: A message-passing parallel molecular dynamics implementation
18 * H.J.C. Berendsen, D. van der Spoel and R. van Drunen
19 * Comp. Phys. Comm. 91, 43-56 (1995)
21 * Also check out our WWW page:
22 * http://md.chem.rug.nl/~gmx
23 * or e-mail to:
24 * gromacs@chem.rug.nl
26 * And Hey:
27 * GRowing Old MAkes el Chrono Sweat
29 static char *SRCID_g_analyze_c = "$Id$";
31 #include <math.h>
32 #include <string.h>
33 #include "statutil.h"
34 #include "sysstuff.h"
35 #include "typedefs.h"
36 #include "smalloc.h"
37 #include "macros.h"
38 #include "fatal.h"
39 #include "vec.h"
40 #include "copyrite.h"
41 #include "futil.h"
42 #include "statutil.h"
43 #include "txtdump.h"
44 #include "gstat.h"
45 #include "xvgr.h"
47 real **read_val(char *fn,bool bHaveT,int nsets_in,
48 int *nset,int *nval,real *t0,real *dt)
50 FILE *fp;
51 char line0[4096],*line,*format;
52 int a,narg,n,sin,set,nchar;
53 double dbl,tend;
54 bool bEndOfSet;
55 real **val;
57 val = NULL;
58 fp = ffopen(fn,"r");
59 for(sin=0; sin<nsets_in; sin++) {
60 if (nsets_in == 1)
61 narg = 0;
62 else
63 narg = bHaveT ? 2 : 1;
64 n = 0;
65 bEndOfSet = FALSE;
66 while (!bEndOfSet && fgets(line0,STRLEN-1,fp)) {
67 line = line0;
68 bEndOfSet = (line[0] == '&');
69 if ((line[0] != '#') && (line[0] != '@') && !bEndOfSet) {
70 a = 0;
71 while (((a<narg) || ((nsets_in==1) && (n==0))) &&
72 (line[0] != '\n') && sscanf(line,"%lf%n",&dbl,&nchar)) {
73 if (sin) {
74 if (!bHaveT || (a>0))
75 set = sin;
76 else
77 set = -1;
78 } else {
79 if (!bHaveT)
80 set = a;
81 else
82 set = a-1;
84 if (n==0) {
85 if (nsets_in == 1)
86 narg++;
87 if (set == -1)
88 *t0 = dbl;
89 else {
90 *nset = set+1;
91 srenew(val,*nset);
92 val[set] = NULL;
95 if (set == -1)
96 tend = dbl;
97 else {
98 if (n % 100 == 0)
99 srenew(val[set],n+100);
100 val[set][n] = (real)dbl;
102 a++;
103 line += nchar;
105 n++;
106 if (a != narg)
107 fprintf(stderr,"Invalid line in %s: '%s'\n",fn,line0);
110 if (sin==0) {
111 *nval = n;
112 if (!bHaveT)
113 *dt = 1.0;
114 else
115 *dt = (real)(tend-*t0)/(n-1.0);
116 } else {
117 if (n < *nval) {
118 fprintf(stderr,"Set %d is shorter (%d) than the previous set (%d)\n",
119 sin+1,n,*nval);
120 *nval = n;
121 fprintf(stderr,"Will use only the first %d points of every set\n",
122 *nval);
126 fclose(fp);
128 return val;
131 void histogram(char *distfile,real binwidth,int n, int nset, real **val)
133 FILE *fp;
134 int i,s;
135 real min,max;
136 int nbin;
137 real *histo;
139 min=val[0][0];
140 max=val[0][0];
141 for(s=0; s<nset; s++)
142 for(i=0; i<n; i++)
143 if (val[s][i] < min)
144 min = val[s][i];
145 else if (val[s][i] > max)
146 max = val[s][i];
148 if (-min > max)
149 max = -min;
150 nbin = (int)(max/binwidth)+1;
151 fprintf(stderr,"Making distributions with %d bins\n",2*nbin+1);
152 snew(histo,2*nbin+1);
153 fp = xvgropen(distfile,"Distribution","","");
154 for(s=0; s<nset; s++) {
155 for(i=0; i<2*nbin+1; i++)
156 histo[i] = 0;
157 for(i=0; i<n; i++)
158 histo[nbin+(int)(floor(val[s][i]/binwidth+0.5))]++;
159 for(i=0; i<2*nbin+1; i++)
160 fprintf(fp," %g %g\n",(i-nbin)*binwidth,(real)histo[i]/(n*binwidth));
161 if (s<nset-1)
162 fprintf(fp,"&\n");
164 fclose(fp);
167 void average(char *avfile,char **avbar_opt,
168 int n, int nset,real **val,real t0,real dt)
170 FILE *fp;
171 int i,s;
172 real av,var,err;
173 char c;
175 c = avbar_opt[0][0];
177 fp = ffopen(avfile,"w");
178 if ((c == 'e') && (nset == 1))
179 c = 'n';
180 if (c != 'n')
181 fprintf(fp,"@TYPE xydy\n");
184 for(i=0; i<n; i++) {
185 av = 0;
186 for(s=0; s<nset; s++)
187 av += val[s][i];
188 av /= nset;
189 fprintf(fp," %g %g",t0+dt*i,av);
190 var = 0;
191 if (c != 'n') {
192 for(s=0; s<nset; s++)
193 var += sqr(val[s][i]-av);
194 if (c == 's')
195 err = sqrt(var/nset);
196 else
197 err = sqrt(var/(nset*(nset-1)));
198 fprintf(fp," %g",err);
200 fprintf(fp,"\n");
202 fclose(fp);
205 void estimate_error(char *eefile,int resol,int n,int nset,
206 real *av,real **val,real dt)
208 FILE *fp;
209 int log2max,rlog2,bs,prev_bs,nb;
210 int s,i,j;
211 real blav,var;
212 char **leg;
214 log2max = (int)(log(n)/log(2));
216 snew(leg,nset);
217 for(s=0; s<nset; s++) {
218 snew(leg[s],STRLEN);
219 sprintf(leg[s],"av %f",av[s]);
222 fp = xvgropen(eefile,"Error estimates","Block size (time)","Error estimate");
223 fprintf(fp,
224 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
225 n*dt,n);
226 xvgr_legend(fp,nset,leg);
227 for(s=0; s<nset; s++)
228 sfree(leg[s]);
229 sfree(leg);
231 for(s=0; s<nset; s++) {
232 prev_bs = 0;
233 for(rlog2=resol*log2max; rlog2>=2*resol; rlog2--) {
234 bs = n*pow(0.5,(real)rlog2/(real)resol);
235 if (bs != prev_bs) {
236 nb = 0;
237 i = 0;
238 var = 0;
239 while (i+bs <= n) {
240 blav=0;
241 for (j=0; j<bs; j++) {
242 blav += val[s][i];
243 i++;
245 var += sqr(av[s] - blav/bs);
246 nb++;
248 fprintf(fp," %g %g\n",bs*dt,sqrt(var/(nb*(nb-1))));
250 prev_bs = bs;
252 if (s < nset)
253 fprintf(fp,"&\n");
256 fclose(fp);
259 int main(int argc,char *argv[])
261 static char *desc[] = {
262 "g_analyze reads an ascii file and analyzes data sets.",
263 "A line in the input file may start with a time",
264 "(see option [TT]-time[tt]) and any number of y values may follow.",
265 "Multiple sets can also be",
266 "read when they are seperated by & (option [TT]-n[tt]),",
267 "in this case only one y value is read from each line.",
268 "All lines starting with # and @ are skipped.",
269 "All analyses can also be done for the derivative of a set",
270 "(option [TT]-d[tt]).[PAR]",
271 "Option [TT]-ac[tt] produces the autocorrelation function(s).[PAR]",
272 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
273 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
274 "Option [TT]-av[tt] produces the average over the sets,",
275 "optionally with error bars ([TT]-errbar[tt]).[PAR]",
276 "Option [TT]-ee[tt] produces error estimates using block averaging.",
277 "A set is divided in a number of blocks and averages are calculated for",
278 "each block. The error for the total average is calculated from the",
279 "variance between the block averages. These errors are plotted as a",
280 "function of the block size. For a good error estimate the block size",
281 "should be at least as large as the correlation time, but possibly much",
282 "larger.[PAR]"
284 static real frac=0.5,binwidth=0.1;
285 static bool bHaveT=TRUE,bDer=FALSE,bSubAv=FALSE,bAverCorr=FALSE;
286 static int nsets_in=1,d=1,resol=8;
288 static char *avbar_opt[] = { NULL, "none", "stddev", "error", NULL };
290 t_pargs pa[] = {
291 { "-time", FALSE, etBOOL, {&bHaveT},
292 "Expect a time in the input" },
293 { "-n", FALSE, etINT, {&nsets_in},
294 "Read # sets seperated by &" },
295 { "-d", FALSE, etBOOL, {&bDer},
296 "Use the derivative" },
297 { "-dp", FALSE, etINT, {&d},
298 "HIDDENThe derivative is the difference over # points" },
299 { "-bw", FALSE, etREAL, {&binwidth},
300 "Binwidth for the distribution" },
301 { "-errbar", FALSE, etENUM, {&avbar_opt},
302 "Error bars for the average" },
303 { "-resol", FALSE, etINT, {&resol},
304 "HIDDENResolution for the block averaging, block size increases with"
305 " a factor 2^(1/#)" },
306 { "-subav", FALSE, etBOOL, {&bSubAv},
307 "Subtract the average before autocorrelating" },
308 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
309 "Calculate one ACF over all sets" }
311 #define NPA asize(pa)
313 FILE *out;
314 int n,nlast,s,nset,i,t;
315 real **val,t0,dt,tot,*av;
316 char *acfile,*msdfile,*distfile,*avfile,*eefile;
318 t_filenm fnm[] = {
319 { efXVG, "-f", "graph", ffREAD },
320 { efXVG, "-ac", "autocorr", ffOPTWR },
321 { efXVG, "-msd", "msd", ffOPTWR },
322 { efXVG, "-dist", "distr", ffOPTWR },
323 { efXVG, "-av", "average", ffOPTWR },
324 { efXVG, "-ee", "errest", ffOPTWR }
326 #define NFILE asize(fnm)
328 int npargs;
329 t_pargs *ppa;
331 npargs = asize(pa);
332 ppa = add_acf_pargs(&npargs,pa);
334 CopyRight(stderr,argv[0]);
335 parse_common_args(&argc,argv,0,TRUE,
336 NFILE,fnm,npargs,ppa,asize(desc),desc,0,NULL);
338 acfile = opt2fn_null("-ac",NFILE,fnm);
339 msdfile = opt2fn_null("-msd",NFILE,fnm);
340 distfile = opt2fn_null("-dist",NFILE,fnm);
341 avfile = opt2fn_null("-av",NFILE,fnm);
342 eefile = opt2fn_null("-ee",NFILE,fnm);
343 if (!acfile && !msdfile && !distfile && !avfile && !eefile) {
344 fprintf(stderr,"Please use one of the output file options\n");
345 exit(0);
348 val=read_val(opt2fn("-f",NFILE,fnm),bHaveT,nsets_in,&nset,&n,&t0,&dt);
349 fprintf(stderr,"Read %d sets of %d points, dt = %g\n",nset,n,dt);
350 if (bDer) {
351 fprintf(stderr,"Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n",
352 d,d);
353 n -= d;
354 for(s=0; s<nset; s++)
355 for(i=0; i<n; i++)
356 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
359 snew(av,nset);
360 for(s=0; s<nset; s++) {
361 for(i=0; i<n; i++)
362 av[s] += val[s][i];
363 av[s] /= n;
364 fprintf(stderr,"Average of set %d: %g\n",s+1,av[s]);
367 if (msdfile) {
368 out=xvgropen(msdfile,"Mean square displacement",
369 "time (ps)","MSD (nm\\S2\\N)");
370 nlast = (int)(n*frac);
371 for(s=0; s<nset; s++) {
372 for(t=0; t<=nlast; t++) {
373 if (t % 100 == 0)
374 fprintf(stderr,"\r%d",t);
375 tot=0;
376 for(i=0; i<n-t; i++)
377 tot += sqr(val[s][i]-val[s][i+t]);
378 tot /= (real)(n-t);
379 fprintf(out," %g %8g\n",dt*t,tot);
381 if (s<nset-1)
382 fprintf(out,"&\n");
384 fclose(out);
385 fprintf(stderr,"\r%d, time=%g\n",t-1,(t-1)*dt);
388 if (distfile)
389 histogram(distfile,binwidth,n,nset,val);
391 if (avfile)
392 average(avfile,avbar_opt,n,nset,val,t0,dt);
394 if (eefile)
395 estimate_error(eefile,resol,n,nset,av,val,dt);
397 if (acfile) {
398 if (bSubAv)
399 for(s=0; s<nset; s++)
400 for(i=0; i<n; i++)
401 val[s][i] -= av[s];
402 do_autocorr(acfile,"Autocorrelation",n,nset,val,dt,
403 eacNormal,bAverCorr);
406 return 0;