src/gromacs/statistics/statistics.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2008, The GROMACS development team.
   6  * Copyright (c) 2010,2014,2015,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \libinternal \file
  38  * \brief
  39  * Declares simple statistics toolbox
  40  *
  41  * \authors David van der Spoel <david.vanderspoel@icm.uu.se>
  42  * \inlibraryapi
  43  */
  44 #ifndef GMX_STATISTICS_H
  45 #define GMX_STATISTICS_H
  46
  47 #include <cstdio>
  48
  49 #include "gromacs/utility/real.h"
  50
  51 //! Abstract container type
  52 typedef struct gmx_stats* gmx_stats_t;
  53
  54 //! Error codes returned by the routines
  55 enum
  56 {
  57     estatsOK,
  58     estatsNO_POINTS,
  59     estatsNO_MEMORY,
  60     estatsERROR,
  61     estatsINVALID_INPUT,
  62     estatsNOT_IMPLEMENTED,
  63     estatsNR
  64 };
  65
  66 //! Enum for statistical weights
  67 enum
  68 {
  69     elsqWEIGHT_NONE,
  70     elsqWEIGHT_X,
  71     elsqWEIGHT_Y,
  72     elsqWEIGHT_XY,
  73     elsqWEIGHT_NR
  74 };
  75
  76 //! Enum determining which coordinate to histogram
  77 enum
  78 {
  79     ehistoX,
  80     ehistoY,
  81     ehistoNR
  82 };
  83
  84 /*! \brief
  85  * Initiate a data structure
  86  * \return the data structure
  87  */
  88 gmx_stats_t gmx_stats_init();
  89
  90 /*! \brief
  91  * Destroy a data structure
  92  * \param stats The data structure
  93  */
  94 void gmx_stats_free(gmx_stats_t stats);
  95
  96 /*! \brief
  97  * Remove outliers from a straight line, where level in units of
  98  * sigma. Level needs to be larger than one obviously.
  99  * \param[in] stats The data structure
 100  * \param[in] level The sigma level
 101  * \return error code
 102  */
 103 int gmx_stats_remove_outliers(gmx_stats_t stats, double level);
 104
 105 /*! \brief
 106  * Add a point to the data set
 107  * \param[in] stats The data structure
 108  * \param[in] x   The x value
 109  * \param[in] y   The y value
 110  * \param[in] dx  The error in the x value
 111  * \param[in] dy  The error in the y value
 112  * \return error code
 113  */
 114 int gmx_stats_add_point(gmx_stats_t stats, double x, double y, double dx, double dy);
 115
 116 /*! \brief
 117  * Add a series of datapoints at once. The arrays dx and dy may
 118  * be NULL in that case zero uncertainties will be assumed.
 119  *
 120  * \param[in] stats The data structure
 121  * \param[in] n   Number of points
 122  * \param[in] x   The array of x values
 123  * \param[in] y   The array of y values
 124  * \param[in] dx  The error in the x value
 125  * \param[in] dy  The error in the y value
 126  * \return error code
 127  */
 128 int gmx_stats_add_points(gmx_stats_t stats, int n, real* x, real* y, real* dx, real* dy);
 129
 130 /*! \brief
 131  * Delivers data points from the statistics.
 132  *
 133  * Should be used in a while loop. Variables for either
 134  * pointer may be NULL, in which case the routine can be used as an
 135  * expensive point counter.
 136  * Return the data points one by one. Return estatsOK while there are
 137  *  more points, and returns estatsNOPOINTS when the last point has
 138  *  been returned.
 139  *  If level > 0 then the outliers outside level*sigma are reported
 140  * only.
 141  * \param[in] stats The data structure
 142  * \param[out] x   The array of x values
 143  * \param[out] y   The array of y values
 144  * \param[out] dx  The error in the x value
 145  * \param[out] dy  The error in the y value
 146  * \param[in]  level sigma level (see above)
 147  * \return error code
 148  */
 149 int gmx_stats_get_point(gmx_stats_t stats, real* x, real* y, real* dx, real* dy, real level);
 150
 151 /*! \brief
 152  * Fit the data to y = ax + b, possibly weighted, if uncertainties
 153  * have been input. da and db may be NULL.
 154  * \param[in] stats The data structure
 155  * \param[in] weight type of weighting
 156  * \param[out] a slope
 157  * \param[out] b intercept
 158  * \param[out] da sigma in a
 159  * \param[out] db sigma in b
 160  * \param[out] chi2 normalized quality of fit
 161  * \param[out] Rfit correlation coefficient
 162  * \return error code
 163  */
 164 int gmx_stats_get_ab(gmx_stats_t stats, int weight, real* a, real* b, real* da, real* db, real* chi2, real* Rfit);
 165
 166 /*! \brief
 167  * Fit the data to y = ax, possibly weighted, if uncertainties have
 168  * have been input. da and db may be NULL.
 169  * \param[in] stats The data structure
 170  * \param[in] weight type of weighting
 171  * \param[out] a slope
 172  * \param[out] da sigma in a
 173  * \param[out] chi2 normalized quality of fit
 174  * \param[out] Rfit correlation coefficient
 175  * \return error code
 176  */
 177 int gmx_stats_get_a(gmx_stats_t stats, int weight, real* a, real* da, real* chi2, real* Rfit);
 178
 179 /*! \brief
 180  * Get the correlation coefficient.
 181  * \param[in]  stats The data structure
 182  * \param[out] R the correlation coefficient between the data (x and y) as input to the structure.
 183  * \return error code
 184  */
 185 int gmx_stats_get_corr_coeff(gmx_stats_t stats, real* R);
 186
 187 /*! \brief
 188  * Get the root mean square deviation.
 189  * \param[in]  stats The data structure
 190  * \param[out] rmsd  the root mean square deviation between x and y values.
 191  * \return error code
 192  */
 193 int gmx_stats_get_rmsd(gmx_stats_t stats, real* rmsd);
 194
 195 /*! \brief
 196  * Get the number of points.
 197  * \param[in]  stats The data structure
 198  * \param[out] N     number of data points
 199  * \return error code
 200  */
 201 int gmx_stats_get_npoints(gmx_stats_t stats, int* N);
 202
 203 /*! \brief
 204  * Computes and returns the average value.
 205  * \param[in]  stats The data structure
 206  * \param[out] aver  Average value
 207  * \return error code
 208  */
 209 int gmx_stats_get_average(gmx_stats_t stats, real* aver);
 210
 211 /*! \brief
 212  * Computes and returns the standard deviation.
 213  * \param[in]  stats The data structure
 214  * \param[out] sigma  Standard deviation
 215  * \return error code
 216  */
 217 int gmx_stats_get_sigma(gmx_stats_t stats, real* sigma);
 218
 219 /*! \brief
 220  * Computes and returns the standard error.
 221  * \param[in]  stats The data structure
 222  * \param[out] error Standard error
 223  * \return error code
 224  */
 225 int gmx_stats_get_error(gmx_stats_t stats, real* error);
 226
 227 /*! \brief
 228  * Pointers may be null, in which case no assignment will be done.
 229  * \param[in]  stats The data structure
 230  * \param[out] aver  Average value
 231  * \param[out] sigma  Standard deviation
 232  * \param[out] error Standard error
 233  * \return error code
 234  */
 235 int gmx_stats_get_ase(gmx_stats_t stats, real* aver, real* sigma, real* error);
 236
 237 /*! \brief
 238  * Dump the x, y, dx, dy data to a text file
 239  * \param[in]  stats The data structure
 240  * \param[in] fp  File pointer
 241  * \return error code
 242  */
 243 int gmx_stats_dump_xy(gmx_stats_t stats, FILE* fp);
 244
 245 /*! \brief
 246  * Make a histogram of the data present.
 247  *
 248  * Uses either binwidth to
 249  * determine the number of bins, or nbins to determine the binwidth,
 250  * therefore one of these should be zero, but not the other. If *nbins = 0
 251  * the number of bins will be returned in this variable. ehisto should be one of
 252  * ehistoX or ehistoY. If
 253  * normalized not equal to zero, the integral of the histogram will be
 254  * normalized to one. The output is in two arrays, *x and *y, to which
 255  * you should pass a pointer. Memory for the arrays will be allocated
 256  * as needed. Function returns one of the estats codes.
 257  * \param[in]  stats The data structure
 258  * \param[in] binwidth For the histogram
 259  * \param[in] nbins    Number of bins
 260  * \param[in] ehisto   Type (see enum above)
 261  * \param[in] normalized see above
 262  * \param[out] x see above
 263  * \param[out] y see above
 264  * \return error code
 265  */
 266 int gmx_stats_make_histogram(gmx_stats_t stats,
 267                              real        binwidth,
 268                              int*        nbins,
 269                              int         ehisto,
 270                              int         normalized,
 271                              real**      x,
 272                              real**      y);
 273
 274 /*! \brief
 275  * Return message belonging to error code
 276  * \param[in] estats error code
 277  */
 278 const char* gmx_stats_message(int estats);
 279
 280 /****************************************************
 281  * Some statistics utilities for convenience: useful when a complete data
 282  * set is available already from another source, e.g. an xvg file.
 283  ****************************************************/
 284 /*! \brief
 285  * Fit a straight line y=ax thru the n data points x, y, return the
 286  * slope in *a.
 287  * \param[in] n number of points
 288  * \param[in] x data points x
 289  * \param[in] y data point y
 290  * \param[out] a slope
 291  * \return error code
 292  */
 293 int lsq_y_ax(int n, real x[], real y[], real* a);
 294
 295 /*! \brief
 296  * Fit a straight line y=ax+b thru the n data points x, y.
 297  * \param[in] n number of points
 298  * \param[in] x data points x
 299  * \param[in] y data point y
 300  * \param[out] a slope
 301  * \param[out] b intercept
 302  * \param[out] r correlation coefficient
 303  * \param[out] chi2 quality of fit
 304  * \return error code
 305  */
 306 int lsq_y_ax_b(int n, real x[], real y[], real* a, real* b, real* r, real* chi2);
 307
 308 /*! \copydoc lsq_y_ax_b
 309  */
 310 int lsq_y_ax_b_xdouble(int n, double x[], real y[], real* a, real* b, real* r, real* chi2);
 311
 312 /*! \brief
 313  * Fit a straight line y=ax+b thru the n data points x, y.
 314  * \param[in] n number of points
 315  * \param[in] x data points x
 316  * \param[in] y data point y
 317  * \param[in] dy uncertainty in data point y
 318  * \param[out] a slope
 319  * \param[out] b intercept
 320  * \param[out] da error in slope
 321  * \param[out] db error in intercept
 322  * \param[out] r correlation coefficient
 323  * \param[out] chi2 quality of fit
 324  * \return error code
 325  */
 326 int lsq_y_ax_b_error(int n, real x[], real y[], real dy[], real* a, real* b, real* da, real* db, real* r, real* chi2);
 327
 328 #endif