2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2008, The GROMACS development team.
6 * Copyright (c) 2010,2014,2015,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 /*! \libinternal \file
39 * Declares simple statistics toolbox
41 * \authors David van der Spoel <david.vanderspoel@icm.uu.se>
44 #ifndef GMX_STATISTICS_H
45 #define GMX_STATISTICS_H
49 #include "gromacs/utility/real.h"
51 //! Abstract container type
52 typedef struct gmx_stats
* gmx_stats_t
;
54 //! Error codes returned by the routines
62 estatsNOT_IMPLEMENTED
,
66 //! Enum for statistical weights
76 //! Enum determining which coordinate to histogram
85 * Initiate a data structure
86 * \return the data structure
88 gmx_stats_t
gmx_stats_init();
91 * Destroy a data structure
92 * \param stats The data structure
94 void gmx_stats_free(gmx_stats_t stats
);
97 * Remove outliers from a straight line, where level in units of
98 * sigma. Level needs to be larger than one obviously.
99 * \param[in] stats The data structure
100 * \param[in] level The sigma level
103 int gmx_stats_remove_outliers(gmx_stats_t stats
, double level
);
106 * Add a point to the data set
107 * \param[in] stats The data structure
108 * \param[in] x The x value
109 * \param[in] y The y value
110 * \param[in] dx The error in the x value
111 * \param[in] dy The error in the y value
114 int gmx_stats_add_point(gmx_stats_t stats
, double x
, double y
, double dx
, double dy
);
117 * Add a series of datapoints at once. The arrays dx and dy may
118 * be NULL in that case zero uncertainties will be assumed.
120 * \param[in] stats The data structure
121 * \param[in] n Number of points
122 * \param[in] x The array of x values
123 * \param[in] y The array of y values
124 * \param[in] dx The error in the x value
125 * \param[in] dy The error in the y value
128 int gmx_stats_add_points(gmx_stats_t stats
, int n
, real
* x
, real
* y
, real
* dx
, real
* dy
);
131 * Delivers data points from the statistics.
133 * Should be used in a while loop. Variables for either
134 * pointer may be NULL, in which case the routine can be used as an
135 * expensive point counter.
136 * Return the data points one by one. Return estatsOK while there are
137 * more points, and returns estatsNOPOINTS when the last point has
139 * If level > 0 then the outliers outside level*sigma are reported
141 * \param[in] stats The data structure
142 * \param[out] x The array of x values
143 * \param[out] y The array of y values
144 * \param[out] dx The error in the x value
145 * \param[out] dy The error in the y value
146 * \param[in] level sigma level (see above)
149 int gmx_stats_get_point(gmx_stats_t stats
, real
* x
, real
* y
, real
* dx
, real
* dy
, real level
);
152 * Fit the data to y = ax + b, possibly weighted, if uncertainties
153 * have been input. da and db may be NULL.
154 * \param[in] stats The data structure
155 * \param[in] weight type of weighting
156 * \param[out] a slope
157 * \param[out] b intercept
158 * \param[out] da sigma in a
159 * \param[out] db sigma in b
160 * \param[out] chi2 normalized quality of fit
161 * \param[out] Rfit correlation coefficient
164 int gmx_stats_get_ab(gmx_stats_t stats
, int weight
, real
* a
, real
* b
, real
* da
, real
* db
, real
* chi2
, real
* Rfit
);
167 * Fit the data to y = ax, possibly weighted, if uncertainties have
168 * have been input. da and db may be NULL.
169 * \param[in] stats The data structure
170 * \param[in] weight type of weighting
171 * \param[out] a slope
172 * \param[out] da sigma in a
173 * \param[out] chi2 normalized quality of fit
174 * \param[out] Rfit correlation coefficient
177 int gmx_stats_get_a(gmx_stats_t stats
, int weight
, real
* a
, real
* da
, real
* chi2
, real
* Rfit
);
180 * Get the correlation coefficient.
181 * \param[in] stats The data structure
182 * \param[out] R the correlation coefficient between the data (x and y) as input to the structure.
185 int gmx_stats_get_corr_coeff(gmx_stats_t stats
, real
* R
);
188 * Get the root mean square deviation.
189 * \param[in] stats The data structure
190 * \param[out] rmsd the root mean square deviation between x and y values.
193 int gmx_stats_get_rmsd(gmx_stats_t stats
, real
* rmsd
);
196 * Get the number of points.
197 * \param[in] stats The data structure
198 * \param[out] N number of data points
201 int gmx_stats_get_npoints(gmx_stats_t stats
, int* N
);
204 * Computes and returns the average value.
205 * \param[in] stats The data structure
206 * \param[out] aver Average value
209 int gmx_stats_get_average(gmx_stats_t stats
, real
* aver
);
212 * Computes and returns the standard deviation.
213 * \param[in] stats The data structure
214 * \param[out] sigma Standard deviation
217 int gmx_stats_get_sigma(gmx_stats_t stats
, real
* sigma
);
220 * Computes and returns the standard error.
221 * \param[in] stats The data structure
222 * \param[out] error Standard error
225 int gmx_stats_get_error(gmx_stats_t stats
, real
* error
);
228 * Pointers may be null, in which case no assignment will be done.
229 * \param[in] stats The data structure
230 * \param[out] aver Average value
231 * \param[out] sigma Standard deviation
232 * \param[out] error Standard error
235 int gmx_stats_get_ase(gmx_stats_t stats
, real
* aver
, real
* sigma
, real
* error
);
238 * Dump the x, y, dx, dy data to a text file
239 * \param[in] stats The data structure
240 * \param[in] fp File pointer
243 int gmx_stats_dump_xy(gmx_stats_t stats
, FILE* fp
);
246 * Make a histogram of the data present.
248 * Uses either binwidth to
249 * determine the number of bins, or nbins to determine the binwidth,
250 * therefore one of these should be zero, but not the other. If *nbins = 0
251 * the number of bins will be returned in this variable. ehisto should be one of
252 * ehistoX or ehistoY. If
253 * normalized not equal to zero, the integral of the histogram will be
254 * normalized to one. The output is in two arrays, *x and *y, to which
255 * you should pass a pointer. Memory for the arrays will be allocated
256 * as needed. Function returns one of the estats codes.
257 * \param[in] stats The data structure
258 * \param[in] binwidth For the histogram
259 * \param[in] nbins Number of bins
260 * \param[in] ehisto Type (see enum above)
261 * \param[in] normalized see above
262 * \param[out] x see above
263 * \param[out] y see above
266 int gmx_stats_make_histogram(gmx_stats_t stats
,
275 * Return message belonging to error code
276 * \param[in] estats error code
278 const char* gmx_stats_message(int estats
);
280 /****************************************************
281 * Some statistics utilities for convenience: useful when a complete data
282 * set is available already from another source, e.g. an xvg file.
283 ****************************************************/
285 * Fit a straight line y=ax thru the n data points x, y, return the
287 * \param[in] n number of points
288 * \param[in] x data points x
289 * \param[in] y data point y
290 * \param[out] a slope
293 int lsq_y_ax(int n
, real x
[], real y
[], real
* a
);
296 * Fit a straight line y=ax+b thru the n data points x, y.
297 * \param[in] n number of points
298 * \param[in] x data points x
299 * \param[in] y data point y
300 * \param[out] a slope
301 * \param[out] b intercept
302 * \param[out] r correlation coefficient
303 * \param[out] chi2 quality of fit
306 int lsq_y_ax_b(int n
, real x
[], real y
[], real
* a
, real
* b
, real
* r
, real
* chi2
);
308 /*! \copydoc lsq_y_ax_b
310 int lsq_y_ax_b_xdouble(int n
, double x
[], real y
[], real
* a
, real
* b
, real
* r
, real
* chi2
);
313 * Fit a straight line y=ax+b thru the n data points x, y.
314 * \param[in] n number of points
315 * \param[in] x data points x
316 * \param[in] y data point y
317 * \param[in] dy uncertainty in data point y
318 * \param[out] a slope
319 * \param[out] b intercept
320 * \param[out] da error in slope
321 * \param[out] db error in intercept
322 * \param[out] r correlation coefficient
323 * \param[out] chi2 quality of fit
326 int lsq_y_ax_b_error(int n
, real x
[], real y
[], real dy
[], real
* a
, real
* b
, real
* da
, real
* db
, real
* r
, real
* chi2
);