2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2012,2014,2015,2017,2018 by the GROMACS development team.
7 * Copyright (c) 2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
40 #include "statistics.h"
44 #include "gromacs/math/functions.h"
45 #include "gromacs/math/vec.h"
46 #include "gromacs/utility/fatalerror.h"
47 #include "gromacs/utility/real.h"
48 #include "gromacs/utility/smalloc.h"
50 static int gmx_dnint(double x
)
52 return gmx::roundToInt(x
);
55 typedef struct gmx_stats
57 double aa
, a
, b
, sigma_aa
, sigma_a
, sigma_b
, aver
, sigma_aver
, error
;
58 double rmsd
, Rdata
, Rfit
, Rfitaa
, chi2
, chi2aa
;
59 double *x
, *y
, *dx
, *dy
;
64 gmx_stats_t
gmx_stats_init()
70 return static_cast<gmx_stats_t
>(stats
);
73 int gmx_stats_get_npoints(gmx_stats_t gstats
, int* N
)
75 gmx_stats
* stats
= static_cast<gmx_stats
*>(gstats
);
82 void gmx_stats_free(gmx_stats_t gstats
)
84 gmx_stats
* stats
= static_cast<gmx_stats
*>(gstats
);
93 int gmx_stats_add_point(gmx_stats_t gstats
, double x
, double y
, double dx
, double dy
)
95 gmx_stats
* stats
= gstats
;
97 if (stats
->np
+ 1 >= stats
->nalloc
)
99 if (stats
->nalloc
== 0)
101 stats
->nalloc
= 1024;
107 srenew(stats
->x
, stats
->nalloc
);
108 srenew(stats
->y
, stats
->nalloc
);
109 srenew(stats
->dx
, stats
->nalloc
);
110 srenew(stats
->dy
, stats
->nalloc
);
111 for (int i
= stats
->np
; (i
< stats
->nalloc
); i
++)
119 stats
->x
[stats
->np
] = x
;
120 stats
->y
[stats
->np
] = y
;
121 stats
->dx
[stats
->np
] = dx
;
122 stats
->dy
[stats
->np
] = dy
;
129 int gmx_stats_get_point(gmx_stats_t gstats
, real
* x
, real
* y
, real
* dx
, real
* dy
, real level
)
131 gmx_stats
* stats
= gstats
;
135 if ((ok
= gmx_stats_get_rmsd(gstats
, &rmsd
)) != estatsOK
)
140 while ((outlier
== 0) && (stats
->np_c
< stats
->np
))
142 r
= std::abs(stats
->x
[stats
->np_c
] - stats
->y
[stats
->np_c
]);
143 outlier
= static_cast<int>(r
> rmsd
* level
);
148 *x
= stats
->x
[stats
->np_c
];
152 *y
= stats
->y
[stats
->np_c
];
156 *dx
= stats
->dx
[stats
->np_c
];
160 *dy
= stats
->dy
[stats
->np_c
];
173 return estatsNO_POINTS
;
176 int gmx_stats_add_points(gmx_stats_t gstats
, int n
, real
* x
, real
* y
, real
* dx
, real
* dy
)
178 for (int i
= 0; (i
< n
); i
++)
181 if ((ok
= gmx_stats_add_point(gstats
, x
[i
], y
[i
], (nullptr != dx
) ? dx
[i
] : 0,
182 (nullptr != dy
) ? dy
[i
] : 0))
191 static int gmx_stats_compute(gmx_stats
* stats
, int weight
)
193 double yy
, yx
, xx
, sx
, sy
, dy
, chi2
, chi2aa
, d2
;
194 double ssxx
, ssyy
, ssxy
;
195 double w
, wtot
, yx_nw
, sy_nw
, sx_nw
, yy_nw
, xx_nw
, dx2
, dy2
;
199 if (stats
->computed
== 0)
203 return estatsNO_POINTS
;
213 for (int i
= 0; (i
< N
); i
++)
215 d2
+= gmx::square(stats
->x
[i
] - stats
->y
[i
]);
216 if (((stats
->dy
[i
]) != 0.0) && (weight
== elsqWEIGHT_Y
))
218 w
= 1 / gmx::square(stats
->dy
[i
]);
227 xx
+= w
* gmx::square(stats
->x
[i
]);
228 xx_nw
+= gmx::square(stats
->x
[i
]);
230 yy
+= w
* gmx::square(stats
->y
[i
]);
231 yy_nw
+= gmx::square(stats
->y
[i
]);
233 yx
+= w
* stats
->y
[i
] * stats
->x
[i
];
234 yx_nw
+= stats
->y
[i
] * stats
->x
[i
];
236 sx
+= w
* stats
->x
[i
];
237 sx_nw
+= stats
->x
[i
];
239 sy
+= w
* stats
->y
[i
];
240 sy_nw
+= stats
->y
[i
];
243 /* Compute average, sigma and error */
244 stats
->aver
= sy_nw
/ N
;
245 stats
->sigma_aver
= std::sqrt(yy_nw
/ N
- gmx::square(sy_nw
/ N
));
246 stats
->error
= stats
->sigma_aver
/ std::sqrt(static_cast<double>(N
));
248 /* Compute RMSD between x and y */
249 stats
->rmsd
= std::sqrt(d2
/ N
);
251 /* Correlation coefficient for data */
257 ssxx
= N
* (xx_nw
- gmx::square(sx_nw
));
258 ssyy
= N
* (yy_nw
- gmx::square(sy_nw
));
259 ssxy
= N
* (yx_nw
- (sx_nw
* sy_nw
));
260 stats
->Rdata
= std::sqrt(gmx::square(ssxy
) / (ssxx
* ssyy
));
262 /* Compute straight line through datapoints, either with intercept
263 zero (result in aa) or with intercept variable (results in a
270 stats
->aa
= (yx
/ xx
);
271 stats
->a
= (yx
- sx
* sy
) / (xx
- sx
* sx
);
272 stats
->b
= (sy
) - (stats
->a
) * (sx
);
274 /* Compute chi2, deviation from a line y = ax+b. Also compute
275 chi2aa which returns the deviation from a line y = ax. */
278 for (int i
= 0; (i
< N
); i
++)
280 if (stats
->dy
[i
] > 0)
288 chi2aa
+= gmx::square((stats
->y
[i
] - (stats
->aa
* stats
->x
[i
])) / dy
);
289 chi2
+= gmx::square((stats
->y
[i
] - (stats
->a
* stats
->x
[i
] + stats
->b
)) / dy
);
293 stats
->chi2
= std::sqrt(chi2
/ (N
- 2));
294 stats
->chi2aa
= std::sqrt(chi2aa
/ (N
- 2));
296 /* Look up equations! */
297 dx2
= (xx
- sx
* sx
);
298 dy2
= (yy
- sy
* sy
);
299 stats
->sigma_a
= std::sqrt(stats
->chi2
/ ((N
- 2) * dx2
));
300 stats
->sigma_b
= stats
->sigma_a
* std::sqrt(xx
);
301 stats
->Rfit
= std::abs(ssxy
) / std::sqrt(ssxx
* ssyy
);
302 stats
->Rfitaa
= stats
->aa
* std::sqrt(dx2
/ dy2
);
320 int gmx_stats_get_ab(gmx_stats_t gstats
, int weight
, real
* a
, real
* b
, real
* da
, real
* db
, real
* chi2
, real
* Rfit
)
322 gmx_stats
* stats
= gstats
;
325 if ((ok
= gmx_stats_compute(stats
, weight
)) != estatsOK
)
339 *da
= stats
->sigma_a
;
343 *db
= stats
->sigma_b
;
357 int gmx_stats_get_a(gmx_stats_t gstats
, int weight
, real
* a
, real
* da
, real
* chi2
, real
* Rfit
)
359 gmx_stats
* stats
= gstats
;
362 if ((ok
= gmx_stats_compute(stats
, weight
)) != estatsOK
)
372 *da
= stats
->sigma_aa
;
376 *chi2
= stats
->chi2aa
;
380 *Rfit
= stats
->Rfitaa
;
386 int gmx_stats_get_average(gmx_stats_t gstats
, real
* aver
)
388 gmx_stats
* stats
= gstats
;
391 if ((ok
= gmx_stats_compute(stats
, elsqWEIGHT_NONE
)) != estatsOK
)
401 int gmx_stats_get_ase(gmx_stats_t gstats
, real
* aver
, real
* sigma
, real
* error
)
403 gmx_stats
* stats
= gstats
;
406 if ((ok
= gmx_stats_compute(stats
, elsqWEIGHT_NONE
)) != estatsOK
)
415 if (nullptr != sigma
)
417 *sigma
= stats
->sigma_aver
;
419 if (nullptr != error
)
421 *error
= stats
->error
;
427 int gmx_stats_get_sigma(gmx_stats_t gstats
, real
* sigma
)
429 gmx_stats
* stats
= gstats
;
432 if ((ok
= gmx_stats_compute(stats
, elsqWEIGHT_NONE
)) != estatsOK
)
437 *sigma
= stats
->sigma_aver
;
442 int gmx_stats_get_error(gmx_stats_t gstats
, real
* error
)
444 gmx_stats
* stats
= gstats
;
447 if ((ok
= gmx_stats_compute(stats
, elsqWEIGHT_NONE
)) != estatsOK
)
452 *error
= stats
->error
;
457 int gmx_stats_get_corr_coeff(gmx_stats_t gstats
, real
* R
)
459 gmx_stats
* stats
= gstats
;
462 if ((ok
= gmx_stats_compute(stats
, elsqWEIGHT_NONE
)) != estatsOK
)
472 int gmx_stats_get_rmsd(gmx_stats_t gstats
, real
* rmsd
)
474 gmx_stats
* stats
= gstats
;
477 if ((ok
= gmx_stats_compute(stats
, elsqWEIGHT_NONE
)) != estatsOK
)
487 int gmx_stats_dump_xy(gmx_stats_t gstats
, FILE* fp
)
489 gmx_stats
* stats
= gstats
;
491 for (int i
= 0; (i
< stats
->np
); i
++)
493 fprintf(fp
, "%12g %12g %12g %12g\n", stats
->x
[i
], stats
->y
[i
], stats
->dx
[i
], stats
->dy
[i
]);
499 int gmx_stats_remove_outliers(gmx_stats_t gstats
, double level
)
501 gmx_stats
* stats
= gstats
;
502 int iter
= 1, done
= 0, ok
;
505 while ((stats
->np
>= 10) && !done
)
507 if ((ok
= gmx_stats_get_rmsd(gstats
, &rmsd
)) != estatsOK
)
512 for (int i
= 0; (i
< stats
->np
);)
514 r
= std::abs(stats
->x
[i
] - stats
->y
[i
]);
515 if (r
> level
* rmsd
)
517 fprintf(stderr
, "Removing outlier, iter = %d, rmsd = %g, x = %g, y = %g\n", iter
,
518 rmsd
, stats
->x
[i
], stats
->y
[i
]);
519 if (i
< stats
->np
- 1)
521 stats
->x
[i
] = stats
->x
[stats
->np
- 1];
522 stats
->y
[i
] = stats
->y
[stats
->np
- 1];
523 stats
->dx
[i
] = stats
->dx
[stats
->np
- 1];
524 stats
->dy
[i
] = stats
->dy
[stats
->np
- 1];
540 int gmx_stats_make_histogram(gmx_stats_t gstats
, real binwidth
, int* nb
, int ehisto
, int normalized
, real
** x
, real
** y
)
542 gmx_stats
* stats
= gstats
;
543 int index
= 0, nbins
= *nb
, *nindex
;
544 double minx
, maxx
, maxy
, miny
, delta
, dd
, minh
;
546 if (((binwidth
<= 0) && (nbins
<= 0)) || ((binwidth
> 0) && (nbins
> 0)))
548 return estatsINVALID_INPUT
;
552 return estatsNO_POINTS
;
554 minx
= maxx
= stats
->x
[0];
555 miny
= maxy
= stats
->y
[0];
556 for (int i
= 1; (i
< stats
->np
); i
++)
558 miny
= (stats
->y
[i
] < miny
) ? stats
->y
[i
] : miny
;
559 maxy
= (stats
->y
[i
] > maxy
) ? stats
->y
[i
] : maxy
;
560 minx
= (stats
->x
[i
] < minx
) ? stats
->x
[i
] : minx
;
561 maxx
= (stats
->x
[i
] > maxx
) ? stats
->x
[i
] : maxx
;
563 if (ehisto
== ehistoX
)
568 else if (ehisto
== ehistoY
)
575 return estatsINVALID_INPUT
;
580 binwidth
= (delta
) / nbins
;
584 nbins
= gmx_dnint((delta
) / binwidth
+ 0.5);
588 for (int i
= 0; (i
< nbins
); i
++)
590 (*x
)[i
] = minh
+ binwidth
* (i
+ 0.5);
598 dd
= 1.0 / (binwidth
* stats
->np
);
602 for (int i
= 0; (i
< stats
->np
); i
++)
604 if (ehisto
== ehistoY
)
606 index
= static_cast<int>((stats
->y
[i
] - miny
) / binwidth
);
608 else if (ehisto
== ehistoX
)
610 index
= static_cast<int>((stats
->x
[i
] - minx
) / binwidth
);
616 if (index
> nbins
- 1)
627 for (int i
= 0; (i
< nbins
); i
++)
631 (*y
)[i
] /= nindex
[i
];
640 static const char* stats_error
[estatsNR
] = { "All well in STATS land", "No points",
641 "Not enough memory", "Invalid histogram input",
642 "Unknown error", "Not implemented yet" };
644 const char* gmx_stats_message(int estats
)
646 if ((estats
>= 0) && (estats
< estatsNR
))
648 return stats_error
[estats
];
652 return stats_error
[estatsERROR
];
656 /* Old convenience functions, should be merged with the core
658 int lsq_y_ax(int n
, real x
[], real y
[], real
* a
)
660 gmx_stats_t lsq
= gmx_stats_init();
664 gmx_stats_add_points(lsq
, n
, x
, y
, nullptr, nullptr);
665 ok
= gmx_stats_get_a(lsq
, elsqWEIGHT_NONE
, a
, &da
, &chi2
, &Rfit
);
671 static int low_lsq_y_ax_b(int n
, const real
* xr
, const double* xd
, real yr
[], real
* a
, real
* b
, real
* r
, real
* chi2
)
673 gmx_stats_t lsq
= gmx_stats_init();
676 for (int i
= 0; (i
< n
); i
++)
684 else if (xr
!= nullptr)
690 gmx_incons("Either xd or xr has to be non-NULL in low_lsq_y_ax_b()");
693 if ((ok
= gmx_stats_add_point(lsq
, pt
, yr
[i
], 0, 0)) != estatsOK
)
699 ok
= gmx_stats_get_ab(lsq
, elsqWEIGHT_NONE
, a
, b
, nullptr, nullptr, chi2
, r
);
705 int lsq_y_ax_b(int n
, real x
[], real y
[], real
* a
, real
* b
, real
* r
, real
* chi2
)
707 return low_lsq_y_ax_b(n
, x
, nullptr, y
, a
, b
, r
, chi2
);
710 int lsq_y_ax_b_xdouble(int n
, double x
[], real y
[], real
* a
, real
* b
, real
* r
, real
* chi2
)
712 return low_lsq_y_ax_b(n
, nullptr, x
, y
, a
, b
, r
, chi2
);
715 int lsq_y_ax_b_error(int n
, real x
[], real y
[], real dy
[], real
* a
, real
* b
, real
* da
, real
* db
, real
* r
, real
* chi2
)
717 gmx_stats_t lsq
= gmx_stats_init();
720 for (int i
= 0; (i
< n
); i
++)
722 ok
= gmx_stats_add_point(lsq
, x
[i
], y
[i
], 0, dy
[i
]);
729 ok
= gmx_stats_get_ab(lsq
, elsqWEIGHT_Y
, a
, b
, da
, db
, chi2
, r
);