2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2012,2014,2015,2017,2018, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 #include "statistics.h"
43 #include "gromacs/math/functions.h"
44 #include "gromacs/math/vec.h"
45 #include "gromacs/utility/fatalerror.h"
46 #include "gromacs/utility/real.h"
47 #include "gromacs/utility/smalloc.h"
49 static int gmx_dnint(double x
)
51 return gmx::roundToInt(x
);
54 typedef struct gmx_stats
{
55 double aa
, a
, b
, sigma_aa
, sigma_a
, sigma_b
, aver
, sigma_aver
, error
;
56 double rmsd
, Rdata
, Rfit
, Rfitaa
, chi2
, chi2aa
;
57 double *x
, *y
, *dx
, *dy
;
62 gmx_stats_t
gmx_stats_init()
68 return static_cast<gmx_stats_t
>(stats
);
71 int gmx_stats_get_npoints(gmx_stats_t gstats
, int *N
)
73 gmx_stats
*stats
= static_cast<gmx_stats
*>(gstats
);
80 void gmx_stats_free(gmx_stats_t gstats
)
82 gmx_stats
*stats
= static_cast<gmx_stats
*>(gstats
);
91 int gmx_stats_add_point(gmx_stats_t gstats
, double x
, double y
,
94 gmx_stats
*stats
= gstats
;
96 if (stats
->np
+1 >= stats
->nalloc
)
98 if (stats
->nalloc
== 0)
100 stats
->nalloc
= 1024;
106 srenew(stats
->x
, stats
->nalloc
);
107 srenew(stats
->y
, stats
->nalloc
);
108 srenew(stats
->dx
, stats
->nalloc
);
109 srenew(stats
->dy
, stats
->nalloc
);
110 for (int i
= stats
->np
; (i
< stats
->nalloc
); i
++)
118 stats
->x
[stats
->np
] = x
;
119 stats
->y
[stats
->np
] = y
;
120 stats
->dx
[stats
->np
] = dx
;
121 stats
->dy
[stats
->np
] = dy
;
128 int gmx_stats_get_point(gmx_stats_t gstats
, real
*x
, real
*y
,
129 real
*dx
, real
*dy
, real level
)
131 gmx_stats
*stats
= gstats
;
135 if ((ok
= gmx_stats_get_rmsd(gstats
, &rmsd
)) != estatsOK
)
140 while ((outlier
== 0) && (stats
->np_c
< stats
->np
))
142 r
= std::abs(stats
->x
[stats
->np_c
] - stats
->y
[stats
->np_c
]);
143 outlier
= static_cast<int>(r
> rmsd
*level
);
148 *x
= stats
->x
[stats
->np_c
];
152 *y
= stats
->y
[stats
->np_c
];
156 *dx
= stats
->dx
[stats
->np_c
];
160 *dy
= stats
->dy
[stats
->np_c
];
173 return estatsNO_POINTS
;
176 int gmx_stats_add_points(gmx_stats_t gstats
, int n
, real
*x
, real
*y
,
179 for (int i
= 0; (i
< n
); i
++)
182 if ((ok
= gmx_stats_add_point(gstats
, x
[i
], y
[i
],
183 (nullptr != dx
) ? dx
[i
] : 0,
184 (nullptr != dy
) ? dy
[i
] : 0)) != estatsOK
)
192 static int gmx_stats_compute(gmx_stats
*stats
, int weight
)
194 double yy
, yx
, xx
, sx
, sy
, dy
, chi2
, chi2aa
, d2
;
195 double ssxx
, ssyy
, ssxy
;
196 double w
, wtot
, yx_nw
, sy_nw
, sx_nw
, yy_nw
, xx_nw
, dx2
, dy2
;
200 if (stats
->computed
== 0)
204 return estatsNO_POINTS
;
214 for (int i
= 0; (i
< N
); i
++)
216 d2
+= gmx::square(stats
->x
[i
]-stats
->y
[i
]);
217 if (((stats
->dy
[i
]) != 0.0) && (weight
== elsqWEIGHT_Y
))
219 w
= 1/gmx::square(stats
->dy
[i
]);
228 xx
+= w
*gmx::square(stats
->x
[i
]);
229 xx_nw
+= gmx::square(stats
->x
[i
]);
231 yy
+= w
*gmx::square(stats
->y
[i
]);
232 yy_nw
+= gmx::square(stats
->y
[i
]);
234 yx
+= w
*stats
->y
[i
]*stats
->x
[i
];
235 yx_nw
+= stats
->y
[i
]*stats
->x
[i
];
238 sx_nw
+= stats
->x
[i
];
241 sy_nw
+= stats
->y
[i
];
244 /* Compute average, sigma and error */
245 stats
->aver
= sy_nw
/N
;
246 stats
->sigma_aver
= std::sqrt(yy_nw
/N
- gmx::square(sy_nw
/N
));
247 stats
->error
= stats
->sigma_aver
/std::sqrt(static_cast<double>(N
));
249 /* Compute RMSD between x and y */
250 stats
->rmsd
= std::sqrt(d2
/N
);
252 /* Correlation coefficient for data */
258 ssxx
= N
*(xx_nw
- gmx::square(sx_nw
));
259 ssyy
= N
*(yy_nw
- gmx::square(sy_nw
));
260 ssxy
= N
*(yx_nw
- (sx_nw
*sy_nw
));
261 stats
->Rdata
= std::sqrt(gmx::square(ssxy
)/(ssxx
*ssyy
));
263 /* Compute straight line through datapoints, either with intercept
264 zero (result in aa) or with intercept variable (results in a
272 stats
->a
= (yx
-sx
*sy
)/(xx
-sx
*sx
);
273 stats
->b
= (sy
)-(stats
->a
)*(sx
);
275 /* Compute chi2, deviation from a line y = ax+b. Also compute
276 chi2aa which returns the deviation from a line y = ax. */
279 for (int i
= 0; (i
< N
); i
++)
281 if (stats
->dy
[i
] > 0)
289 chi2aa
+= gmx::square((stats
->y
[i
]-(stats
->aa
*stats
->x
[i
]))/dy
);
290 chi2
+= gmx::square((stats
->y
[i
]-(stats
->a
*stats
->x
[i
]+stats
->b
))/dy
);
294 stats
->chi2
= std::sqrt(chi2
/(N
-2));
295 stats
->chi2aa
= std::sqrt(chi2aa
/(N
-2));
297 /* Look up equations! */
300 stats
->sigma_a
= std::sqrt(stats
->chi2
/((N
-2)*dx2
));
301 stats
->sigma_b
= stats
->sigma_a
*std::sqrt(xx
);
302 stats
->Rfit
= std::abs(ssxy
)/std::sqrt(ssxx
*ssyy
);
303 stats
->Rfitaa
= stats
->aa
*std::sqrt(dx2
/dy2
);
321 int gmx_stats_get_ab(gmx_stats_t gstats
, int weight
,
322 real
*a
, real
*b
, real
*da
, real
*db
,
323 real
*chi2
, real
*Rfit
)
325 gmx_stats
*stats
= gstats
;
328 if ((ok
= gmx_stats_compute(stats
, weight
)) != estatsOK
)
342 *da
= stats
->sigma_a
;
346 *db
= stats
->sigma_b
;
360 int gmx_stats_get_a(gmx_stats_t gstats
, int weight
, real
*a
, real
*da
,
361 real
*chi2
, real
*Rfit
)
363 gmx_stats
*stats
= gstats
;
366 if ((ok
= gmx_stats_compute(stats
, weight
)) != estatsOK
)
376 *da
= stats
->sigma_aa
;
380 *chi2
= stats
->chi2aa
;
384 *Rfit
= stats
->Rfitaa
;
390 int gmx_stats_get_average(gmx_stats_t gstats
, real
*aver
)
392 gmx_stats
*stats
= gstats
;
395 if ((ok
= gmx_stats_compute(stats
, elsqWEIGHT_NONE
)) != estatsOK
)
405 int gmx_stats_get_ase(gmx_stats_t gstats
, real
*aver
, real
*sigma
, real
*error
)
407 gmx_stats
*stats
= gstats
;
410 if ((ok
= gmx_stats_compute(stats
, elsqWEIGHT_NONE
)) != estatsOK
)
419 if (nullptr != sigma
)
421 *sigma
= stats
->sigma_aver
;
423 if (nullptr != error
)
425 *error
= stats
->error
;
431 int gmx_stats_get_sigma(gmx_stats_t gstats
, real
*sigma
)
433 gmx_stats
*stats
= gstats
;
436 if ((ok
= gmx_stats_compute(stats
, elsqWEIGHT_NONE
)) != estatsOK
)
441 *sigma
= stats
->sigma_aver
;
446 int gmx_stats_get_error(gmx_stats_t gstats
, real
*error
)
448 gmx_stats
*stats
= gstats
;
451 if ((ok
= gmx_stats_compute(stats
, elsqWEIGHT_NONE
)) != estatsOK
)
456 *error
= stats
->error
;
461 int gmx_stats_get_corr_coeff(gmx_stats_t gstats
, real
*R
)
463 gmx_stats
*stats
= gstats
;
466 if ((ok
= gmx_stats_compute(stats
, elsqWEIGHT_NONE
)) != estatsOK
)
476 int gmx_stats_get_rmsd(gmx_stats_t gstats
, real
*rmsd
)
478 gmx_stats
*stats
= gstats
;
481 if ((ok
= gmx_stats_compute(stats
, elsqWEIGHT_NONE
)) != estatsOK
)
491 int gmx_stats_dump_xy(gmx_stats_t gstats
, FILE *fp
)
493 gmx_stats
*stats
= gstats
;
495 for (int i
= 0; (i
< stats
->np
); i
++)
497 fprintf(fp
, "%12g %12g %12g %12g\n", stats
->x
[i
], stats
->y
[i
],
498 stats
->dx
[i
], stats
->dy
[i
]);
504 int gmx_stats_remove_outliers(gmx_stats_t gstats
, double level
)
506 gmx_stats
*stats
= gstats
;
507 int iter
= 1, done
= 0, ok
;
510 while ((stats
->np
>= 10) && !done
)
512 if ((ok
= gmx_stats_get_rmsd(gstats
, &rmsd
)) != estatsOK
)
517 for (int i
= 0; (i
< stats
->np
); )
519 r
= std::abs(stats
->x
[i
]-stats
->y
[i
]);
522 fprintf(stderr
, "Removing outlier, iter = %d, rmsd = %g, x = %g, y = %g\n",
523 iter
, rmsd
, stats
->x
[i
], stats
->y
[i
]);
526 stats
->x
[i
] = stats
->x
[stats
->np
-1];
527 stats
->y
[i
] = stats
->y
[stats
->np
-1];
528 stats
->dx
[i
] = stats
->dx
[stats
->np
-1];
529 stats
->dy
[i
] = stats
->dy
[stats
->np
-1];
545 int gmx_stats_make_histogram(gmx_stats_t gstats
, real binwidth
, int *nb
,
546 int ehisto
, int normalized
, real
**x
, real
**y
)
548 gmx_stats
*stats
= gstats
;
549 int index
= 0, nbins
= *nb
, *nindex
;
550 double minx
, maxx
, maxy
, miny
, delta
, dd
, minh
;
552 if (((binwidth
<= 0) && (nbins
<= 0)) ||
553 ((binwidth
> 0) && (nbins
> 0)))
555 return estatsINVALID_INPUT
;
559 return estatsNO_POINTS
;
561 minx
= maxx
= stats
->x
[0];
562 miny
= maxy
= stats
->y
[0];
563 for (int i
= 1; (i
< stats
->np
); i
++)
565 miny
= (stats
->y
[i
] < miny
) ? stats
->y
[i
] : miny
;
566 maxy
= (stats
->y
[i
] > maxy
) ? stats
->y
[i
] : maxy
;
567 minx
= (stats
->x
[i
] < minx
) ? stats
->x
[i
] : minx
;
568 maxx
= (stats
->x
[i
] > maxx
) ? stats
->x
[i
] : maxx
;
570 if (ehisto
== ehistoX
)
575 else if (ehisto
== ehistoY
)
582 return estatsINVALID_INPUT
;
587 binwidth
= (delta
)/nbins
;
591 nbins
= gmx_dnint((delta
)/binwidth
+ 0.5);
595 for (int i
= 0; (i
< nbins
); i
++)
597 (*x
)[i
] = minh
+ binwidth
*(i
+0.5);
605 dd
= 1.0/(binwidth
*stats
->np
);
609 for (int i
= 0; (i
< stats
->np
); i
++)
611 if (ehisto
== ehistoY
)
613 index
= static_cast<int>((stats
->y
[i
]-miny
)/binwidth
);
615 else if (ehisto
== ehistoX
)
617 index
= static_cast<int>((stats
->x
[i
]-minx
)/binwidth
);
634 for (int i
= 0; (i
< nbins
); i
++)
638 (*y
)[i
] /= nindex
[i
];
647 static const char *stats_error
[estatsNR
] =
649 "All well in STATS land",
652 "Invalid histogram input",
654 "Not implemented yet"
657 const char *gmx_stats_message(int estats
)
659 if ((estats
>= 0) && (estats
< estatsNR
))
661 return stats_error
[estats
];
665 return stats_error
[estatsERROR
];
669 /* Old convenience functions, should be merged with the core
671 int lsq_y_ax(int n
, real x
[], real y
[], real
*a
)
673 gmx_stats_t lsq
= gmx_stats_init();
677 gmx_stats_add_points(lsq
, n
, x
, y
, nullptr, nullptr);
678 ok
= gmx_stats_get_a(lsq
, elsqWEIGHT_NONE
, a
, &da
, &chi2
, &Rfit
);
684 static int low_lsq_y_ax_b(int n
, const real
*xr
, const double *xd
, real yr
[],
685 real
*a
, real
*b
, real
*r
, real
*chi2
)
687 gmx_stats_t lsq
= gmx_stats_init();
690 for (int i
= 0; (i
< n
); i
++)
698 else if (xr
!= nullptr)
704 gmx_incons("Either xd or xr has to be non-NULL in low_lsq_y_ax_b()");
707 if ((ok
= gmx_stats_add_point(lsq
, pt
, yr
[i
], 0, 0)) != estatsOK
)
713 ok
= gmx_stats_get_ab(lsq
, elsqWEIGHT_NONE
, a
, b
, nullptr, nullptr, chi2
, r
);
719 int lsq_y_ax_b(int n
, real x
[], real y
[], real
*a
, real
*b
, real
*r
, real
*chi2
)
721 return low_lsq_y_ax_b(n
, x
, nullptr, y
, a
, b
, r
, chi2
);
724 int lsq_y_ax_b_xdouble(int n
, double x
[], real y
[], real
*a
, real
*b
,
727 return low_lsq_y_ax_b(n
, nullptr, x
, y
, a
, b
, r
, chi2
);
730 int lsq_y_ax_b_error(int n
, real x
[], real y
[], real dy
[],
731 real
*a
, real
*b
, real
*da
, real
*db
,
734 gmx_stats_t lsq
= gmx_stats_init();
737 for (int i
= 0; (i
< n
); i
++)
739 ok
= gmx_stats_add_point(lsq
, x
[i
], y
[i
], 0, dy
[i
]);
746 ok
= gmx_stats_get_ab(lsq
, elsqWEIGHT_Y
, a
, b
, da
, db
, chi2
, r
);