js/statistics/simple_statistics.js

   1 /* global module */
   2 // # simple-statistics
   3 //
   4 // A simple, literate statistics system. The code below uses the
   5 // [Javascript module pattern](http://www.adequatelygood.com/2010/3/JavaScript-Module-Pattern-In-Depth),
   6 // eventually assigning `simple-statistics` to `ss` in browsers or the
   7 // `exports` object for node.js
   8 (function() {
   9     var ss = {};
  10
  11     if (typeof module !== 'undefined') {
  12         // Assign the `ss` object to exports, so that you can require
  13         // it in [node.js](http://nodejs.org/)
  14         module.exports = ss;
  15     } else {
  16         // Otherwise, in a browser, we assign `ss` to the window object,
  17         // so you can simply refer to it as `ss`.
  18         this.ss = ss;
  19     }
  20
  21     // # [Linear Regression](http://en.wikipedia.org/wiki/Linear_regression)
  22     //
  23     // [Simple linear regression](http://en.wikipedia.org/wiki/Simple_linear_regression)
  24     // is a simple way to find a fitted line
  25     // between a set of coordinates.
  26     function linear_regression() {
  27         var linreg = {},
  28             data = [];
  29
  30         // Assign data to the model. Data is assumed to be an array.
  31         linreg.data = function(x) {
  32             if (!arguments.length) return data;
  33             data = x.slice();
  34             return linreg;
  35         };
  36
  37         // Calculate the slope and y-intercept of the regression line
  38         // by calculating the least sum of squares
  39         linreg.mb = function() {
  40             var m, b;
  41
  42             // Store data length in a local variable to reduce
  43             // repeated object property lookups
  44             var data_length = data.length;
  45
  46             //if there's only one point, arbitrarily choose a slope of 0
  47             //and a y-intercept of whatever the y of the initial point is
  48             if (data_length === 1) {
  49                 m = 0;
  50                 b = data[0][1];
  51             } else {
  52                 // Initialize our sums and scope the `m` and `b`
  53                 // variables that define the line.
  54                 var sum_x = 0, sum_y = 0,
  55                     sum_xx = 0, sum_xy = 0;
  56
  57                 // Use local variables to grab point values
  58                 // with minimal object property lookups
  59                 var point, x, y;
  60
  61                 // Gather the sum of all x values, the sum of all
  62                 // y values, and the sum of x^2 and (x*y) for each
  63                 // value.
  64                 //
  65                 // In math notation, these would be SS_x, SS_y, SS_xx, and SS_xy
  66                 for (var i = 0; i < data_length; i++) {
  67                     point = data[i];
  68                     x = point[0];
  69                     y = point[1];
  70
  71                     sum_x += x;
  72                     sum_y += y;
  73
  74                     sum_xx += x * x;
  75                     sum_xy += x * y;
  76                 }
  77
  78                 // `m` is the slope of the regression line
  79                 m = ((data_length * sum_xy) - (sum_x * sum_y)) /
  80                     ((data_length * sum_xx) - (sum_x * sum_x));
  81
  82                 // `b` is the y-intercept of the line.
  83                 b = (sum_y / data_length) - ((m * sum_x) / data_length);
  84             }
  85
  86             // Return both values as an object.
  87             return { m: m, b: b };
  88         };
  89
  90         // a shortcut for simply getting the slope of the regression line
  91         linreg.m = function() {
  92             return linreg.mb().m;
  93         };
  94
  95         // a shortcut for simply getting the y-intercept of the regression
  96         // line.
  97         linreg.b = function() {
  98             return linreg.mb().b;
  99         };
 100
 101         // ## Fitting The Regression Line
 102         //
 103         // This is called after `.data()` and returns the
 104         // equation `y = f(x)` which gives the position
 105         // of the regression line at each point in `x`.
 106         linreg.line = function() {
 107
 108             // Get the slope, `m`, and y-intercept, `b`, of the line.
 109             var mb = linreg.mb(),
 110                 m = mb.m,
 111                 b = mb.b;
 112
 113             // Return a function that computes a `y` value for each
 114             // x value it is given, based on the values of `b` and `a`
 115             // that we just computed.
 116             return function(x) {
 117                 return b + (m * x);
 118             };
 119         };
 120
 121         return linreg;
 122     }
 123
 124     // # [R Squared](http://en.wikipedia.org/wiki/Coefficient_of_determination)
 125     //
 126     // The r-squared value of data compared with a function `f`
 127     // is the sum of the squared differences between the prediction
 128     // and the actual value.
 129     function r_squared(data, f) {
 130         if (data.length < 2) return 1;
 131
 132         // Compute the average y value for the actual
 133         // data set in order to compute the
 134         // _total sum of squares_
 135         var sum = 0, average;
 136         for (var i = 0; i < data.length; i++) {
 137             sum += data[i][1];
 138         }
 139         average = sum / data.length;
 140
 141         // Compute the total sum of squares - the
 142         // squared difference between each point
 143         // and the average of all points.
 144         var sum_of_squares = 0;
 145         for (var j = 0; j < data.length; j++) {
 146             sum_of_squares += Math.pow(average - data[j][1], 2);
 147         }
 148
 149         // Finally estimate the error: the squared
 150         // difference between the estimate and the actual data
 151         // value at each point.
 152         var err = 0;
 153         for (var k = 0; k < data.length; k++) {
 154             err += Math.pow(data[k][1] - f(data[k][0]), 2);
 155         }
 156
 157         // As the error grows larger, its ratio to the
 158         // sum of squares increases and the r squared
 159         // value grows lower.
 160         return 1 - (err / sum_of_squares);
 161     }
 162
 163
 164     // # [Bayesian Classifier](http://en.wikipedia.org/wiki/Naive_Bayes_classifier)
 165     //
 166     // This is a naïve bayesian classifier that takes
 167     // singly-nested objects.
 168     function bayesian() {
 169         // The `bayes_model` object is what will be exposed
 170         // by this closure, with all of its extended methods, and will
 171         // have access to all scope variables, like `total_count`.
 172         var bayes_model = {},
 173             // The number of items that are currently
 174             // classified in the model
 175             total_count = 0,
 176             // Every item classified in the model
 177             data = {};
 178
 179         // ## Train
 180         // Train the classifier with a new item, which has a single
 181         // dimension of Javascript literal keys and values.
 182         bayes_model.train = function(item, category) {
 183             // If the data object doesn't have any values
 184             // for this category, create a new object for it.
 185             if (!data[category]) data[category] = {};
 186
 187             // Iterate through each key in the item.
 188             for (var k in item) {
 189                 var v = item[k];
 190                 // Initialize the nested object `data[category][k][item[k]]`
 191                 // with an object of keys that equal 0.
 192                 if (data[category][k] === undefined) data[category][k] = {};
 193                 if (data[category][k][v] === undefined) data[category][k][v] = 0;
 194
 195                 // And increment the key for this key/value combination.
 196                 data[category][k][item[k]]++;
 197             }
 198             // Increment the number of items classified
 199             total_count++;
 200         };
 201
 202         // ## Score
 203         // Generate a score of how well this item matches all
 204         // possible categories based on its attributes
 205         bayes_model.score = function(item) {
 206             // Initialize an empty array of odds per category.
 207             var odds = {}, category;
 208             // Iterate through each key in the item,
 209             // then iterate through each category that has been used
 210             // in previous calls to `.train()`
 211             for (var k in item) {
 212                 var v = item[k];
 213                 for (category in data) {
 214                     // Create an empty object for storing key - value combinations
 215                     // for this category.
 216                     if (odds[category] === undefined) odds[category] = {};
 217
 218                     // If this item doesn't even have a property, it counts for nothing,
 219                     // but if it does have the property that we're looking for from
 220                     // the item to categorize, it counts based on how popular it is
 221                     // versus the whole population.
 222                     if (data[category][k]) {
 223                         odds[category][k + '_' + v] = (data[category][k][v] || 0) / total_count;
 224                     } else {
 225                         odds[category][k + '_' + v] = 0;
 226                     }
 227                 }
 228             }
 229
 230             // Set up a new object that will contain sums of these odds by category
 231             var odds_sums = {};
 232
 233             for (category in odds) {
 234                 // Tally all of the odds for each category-combination pair -
 235                 // the non-existence of a category does not add anything to the
 236                 // score.
 237                 for (var combination in odds[category]) {
 238                     if (odds_sums[category] === undefined) odds_sums[category] = 0;
 239                     odds_sums[category] += odds[category][combination];
 240                 }
 241             }
 242
 243             return odds_sums;
 244         };
 245
 246         // Return the completed model.
 247         return bayes_model;
 248     }
 249
 250     // # sum
 251     //
 252     // is simply the result of adding all numbers
 253     // together, starting from zero.
 254     //
 255     // This runs on `O(n)`, linear time in respect to the array
 256     function sum(x) {
 257         var value = 0;
 258         for (var i = 0; i < x.length; i++) {
 259             value += x[i];
 260         }
 261         return value;
 262     }
 263
 264     // # mean
 265     //
 266     // is the sum over the number of values
 267     //
 268     // This runs on `O(n)`, linear time in respect to the array
 269     function mean(x) {
 270         // The mean of no numbers is null
 271         if (x.length === 0) return null;
 272
 273         return sum(x) / x.length;
 274     }
 275
 276     // # geometric mean
 277     //
 278     // a mean function that is more useful for numbers in different
 279     // ranges.
 280     //
 281     // this is the nth root of the input numbers multiplied by each other
 282     //
 283     // This runs on `O(n)`, linear time in respect to the array
 284     function geometric_mean(x) {
 285         // The mean of no numbers is null
 286         if (x.length === 0) return null;
 287
 288         // the starting value.
 289         var value = 1;
 290
 291         for (var i = 0; i < x.length; i++) {
 292             // the geometric mean is only valid for positive numbers
 293             if (x[i] <= 0) return null;
 294
 295             // repeatedly multiply the value by each number
 296             value *= x[i];
 297         }
 298
 299         return Math.pow(value, 1 / x.length);
 300     }
 301
 302
 303     // # harmonic mean
 304     //
 305     // a mean function typically used to find the average of rates
 306     //
 307     // this is the reciprocal of the arithmetic mean of the reciprocals
 308     // of the input numbers
 309     //
 310     // This runs on `O(n)`, linear time in respect to the array
 311     function harmonic_mean(x) {
 312         // The mean of no numbers is null
 313         if (x.length === 0) return null;
 314
 315         var reciprocal_sum = 0;
 316
 317         for (var i = 0; i < x.length; i++) {
 318             // the harmonic mean is only valid for positive numbers
 319             if (x[i] <= 0) return null;
 320
 321             reciprocal_sum += 1 / x[i];
 322         }
 323
 324         // divide n by the the reciprocal sum
 325         return x.length / reciprocal_sum;
 326     }
 327
 328     // root mean square (RMS)
 329     //
 330     // a mean function used as a measure of the magnitude of a set
 331     // of numbers, regardless of their sign
 332     //
 333     // this is the square root of the mean of the squares of the
 334     // input numbers
 335     //
 336     // This runs on `O(n)`, linear time in respect to the array
 337     function root_mean_square(x) {
 338         if (x.length === 0) return null;
 339
 340         var sum_of_squares = 0;
 341         for (var i = 0; i < x.length; i++) {
 342             sum_of_squares += Math.pow(x[i], 2);
 343         }
 344
 345         return Math.sqrt(sum_of_squares / x.length);
 346     }
 347
 348     // # min
 349     //
 350     // This is simply the minimum number in the set.
 351     //
 352     // This runs on `O(n)`, linear time in respect to the array
 353     function min(x) {
 354         var value;
 355         for (var i = 0; i < x.length; i++) {
 356             // On the first iteration of this loop, min is
 357             // undefined and is thus made the minimum element in the array
 358             if (x[i] < value || value === undefined) value = x[i];
 359         }
 360         return value;
 361     }
 362
 363     // # max
 364     //
 365     // This is simply the maximum number in the set.
 366     //
 367     // This runs on `O(n)`, linear time in respect to the array
 368     function max(x) {
 369         var value;
 370         for (var i = 0; i < x.length; i++) {
 371             // On the first iteration of this loop, max is
 372             // undefined and is thus made the maximum element in the array
 373             if (x[i] > value || value === undefined) value = x[i];
 374         }
 375         return value;
 376     }
 377
 378     // # [variance](http://en.wikipedia.org/wiki/Variance)
 379     //
 380     // is the sum of squared deviations from the mean
 381     //
 382     // depends on `mean()`
 383     function variance(x) {
 384         // The variance of no numbers is null
 385         if (x.length === 0) return null;
 386
 387         var mean_value = mean(x),
 388             deviations = [];
 389
 390         // Make a list of squared deviations from the mean.
 391         for (var i = 0; i < x.length; i++) {
 392             deviations.push(Math.pow(x[i] - mean_value, 2));
 393         }
 394
 395         // Find the mean value of that list
 396         return mean(deviations);
 397     }
 398
 399     // # [standard deviation](http://en.wikipedia.org/wiki/Standard_deviation)
 400     //
 401     // is just the square root of the variance.
 402     //
 403     // depends on `variance()`
 404     function standard_deviation(x) {
 405         // The standard deviation of no numbers is null
 406         if (x.length === 0) return null;
 407
 408         return Math.sqrt(variance(x));
 409     }
 410
 411     // The sum of deviations to the Nth power.
 412     // When n=2 it's the sum of squared deviations.
 413     // When n=3 it's the sum of cubed deviations.
 414     //
 415     // depends on `mean()`
 416     function sum_nth_power_deviations(x, n) {
 417         var mean_value = mean(x),
 418             sum = 0;
 419
 420         for (var i = 0; i < x.length; i++) {
 421             sum += Math.pow(x[i] - mean_value, n);
 422         }
 423
 424         return sum;
 425     }
 426
 427     // # [variance](http://en.wikipedia.org/wiki/Variance)
 428     //
 429     // is the sum of squared deviations from the mean
 430     //
 431     // depends on `sum_nth_power_deviations`
 432     function sample_variance(x) {
 433         // The variance of no numbers is null
 434         if (x.length <= 1) return null;
 435
 436         var sum_squared_deviations_value = sum_nth_power_deviations(x, 2);
 437
 438         // Find the mean value of that list
 439         return sum_squared_deviations_value / (x.length - 1);
 440     }
 441
 442     // # [standard deviation](http://en.wikipedia.org/wiki/Standard_deviation)
 443     //
 444     // is just the square root of the variance.
 445     //
 446     // depends on `sample_variance()`
 447     function sample_standard_deviation(x) {
 448         // The standard deviation of no numbers is null
 449         if (x.length <= 1) return null;
 450
 451         return Math.sqrt(sample_variance(x));
 452     }
 453
 454     // # [covariance](http://en.wikipedia.org/wiki/Covariance)
 455     //
 456     // sample covariance of two datasets:
 457     // how much do the two datasets move together?
 458     // x and y are two datasets, represented as arrays of numbers.
 459     //
 460     // depends on `mean()`
 461     function sample_covariance(x, y) {
 462
 463         // The two datasets must have the same length which must be more than 1
 464         if (x.length <= 1 || x.length != y.length){
 465             return null;
 466         }
 467
 468         // determine the mean of each dataset so that we can judge each
 469         // value of the dataset fairly as the difference from the mean. this
 470         // way, if one dataset is [1, 2, 3] and [2, 3, 4], their covariance
 471         // does not suffer because of the difference in absolute values
 472         var xmean = mean(x),
 473             ymean = mean(y),
 474             sum = 0;
 475
 476         // for each pair of values, the covariance increases when their
 477         // difference from the mean is associated - if both are well above
 478         // or if both are well below
 479         // the mean, the covariance increases significantly.
 480         for (var i = 0; i < x.length; i++){
 481             sum += (x[i] - xmean) * (y[i] - ymean);
 482         }
 483
 484         // the covariance is weighted by the length of the datasets.
 485         return sum / (x.length - 1);
 486     }
 487
 488     // # [correlation](http://en.wikipedia.org/wiki/Correlation_and_dependence)
 489     //
 490     // Gets a measure of how correlated two datasets are, between -1 and 1
 491     //
 492     // depends on `sample_standard_deviation()` and `sample_covariance()`
 493     function sample_correlation(x, y) {
 494         var cov = sample_covariance(x, y),
 495             xstd = sample_standard_deviation(x),
 496             ystd = sample_standard_deviation(y);
 497
 498         if (cov === null || xstd === null || ystd === null) {
 499             return null;
 500         }
 501
 502         return cov / xstd / ystd;
 503     }
 504
 505     // # [median](http://en.wikipedia.org/wiki/Median)
 506     //
 507     // The middle number of a list. This is often a good indicator of 'the middle'
 508     // when there are outliers that skew the `mean()` value.
 509     function median(x) {
 510         // The median of an empty list is null
 511         if (x.length === 0) return null;
 512
 513         // Sorting the array makes it easy to find the center, but
 514         // use `.slice()` to ensure the original array `x` is not modified
 515         var sorted = x.slice().sort(function (a, b) { return a - b; });
 516
 517         // If the length of the list is odd, it's the central number
 518         if (sorted.length % 2 === 1) {
 519             return sorted[(sorted.length - 1) / 2];
 520         // Otherwise, the median is the average of the two numbers
 521         // at the center of the list
 522         } else {
 523             var a = sorted[(sorted.length / 2) - 1];
 524             var b = sorted[(sorted.length / 2)];
 525             return (a + b) / 2;
 526         }
 527     }
 528
 529     // # [mode](http://bit.ly/W5K4Yt)
 530     //
 531     // The mode is the number that appears in a list the highest number of times.
 532     // There can be multiple modes in a list: in the event of a tie, this
 533     // algorithm will return the most recently seen mode.
 534     //
 535     // This implementation is inspired by [science.js](https://github.com/jasondavies/science.js/blob/master/src/stats/mode.js)
 536     //
 537     // This runs on `O(n)`, linear time in respect to the array
 538     function mode(x) {
 539
 540         // Handle edge cases:
 541         // The median of an empty list is null
 542         if (x.length === 0) return null;
 543         else if (x.length === 1) return x[0];
 544
 545         // Sorting the array lets us iterate through it below and be sure
 546         // that every time we see a new number it's new and we'll never
 547         // see the same number twice
 548         var sorted = x.slice().sort(function (a, b) { return a - b; });
 549
 550         // This assumes it is dealing with an array of size > 1, since size
 551         // 0 and 1 are handled immediately. Hence it starts at index 1 in the
 552         // array.
 553         var last = sorted[0],
 554             // store the mode as we find new modes
 555             value,
 556             // store how many times we've seen the mode
 557             max_seen = 0,
 558             // how many times the current candidate for the mode
 559             // has been seen
 560             seen_this = 1;
 561
 562         // end at sorted.length + 1 to fix the case in which the mode is
 563         // the highest number that occurs in the sequence. the last iteration
 564         // compares sorted[i], which is undefined, to the highest number
 565         // in the series
 566         for (var i = 1; i < sorted.length + 1; i++) {
 567             // we're seeing a new number pass by
 568             if (sorted[i] !== last) {
 569                 // the last number is the new mode since we saw it more
 570                 // often than the old one
 571                 if (seen_this > max_seen) {
 572                     max_seen = seen_this;
 573                     value = last;
 574                 }
 575                 seen_this = 1;
 576                 last = sorted[i];
 577             // if this isn't a new number, it's one more occurrence of
 578             // the potential mode
 579             } else { seen_this++; }
 580         }
 581         return value;
 582     }
 583
 584     // # [t-test](http://en.wikipedia.org/wiki/Student's_t-test)
 585     //
 586     // This is to compute a one-sample t-test, comparing the mean
 587     // of a sample to a known value, x.
 588     //
 589     // in this case, we're trying to determine whether the
 590     // population mean is equal to the value that we know, which is `x`
 591     // here. usually the results here are used to look up a
 592     // [p-value](http://en.wikipedia.org/wiki/P-value), which, for
 593     // a certain level of significance, will let you determine that the
 594     // null hypothesis can or cannot be rejected.
 595     //
 596     // Depends on `standard_deviation()` and `mean()`
 597     function t_test(sample, x) {
 598         // The mean of the sample
 599         var sample_mean = mean(sample);
 600
 601         // The standard deviation of the sample
 602         var sd = standard_deviation(sample);
 603
 604         // Square root the length of the sample
 605         var rootN = Math.sqrt(sample.length);
 606
 607         // Compute the known value against the sample,
 608         // returning the t value
 609         return (sample_mean - x) / (sd / rootN);
 610     }
 611
 612     // # [2-sample t-test](http://en.wikipedia.org/wiki/Student's_t-test)
 613     //
 614     // This is to compute two sample t-test.
 615     // Tests whether "mean(X)-mean(Y) = difference", (
 616     // in the most common case, we often have `difference == 0` to test if two samples
 617     // are likely to be taken from populations with the same mean value) with
 618     // no prior knowledge on standard deviations of both samples
 619     // other than the fact that they have the same standard deviation.
 620     //
 621     // Usually the results here are used to look up a
 622     // [p-value](http://en.wikipedia.org/wiki/P-value), which, for
 623     // a certain level of significance, will let you determine that the
 624     // null hypothesis can or cannot be rejected.
 625     //
 626     // `diff` can be omitted if it equals 0.
 627     //
 628     // [This is used to confirm or deny](http://www.monarchlab.org/Lab/Research/Stats/2SampleT.aspx)
 629     // a null hypothesis that the two populations that have been sampled into
 630     // `sample_x` and `sample_y` are equal to each other.
 631     //
 632     // Depends on `sample_variance()` and `mean()`
 633     function t_test_two_sample(sample_x, sample_y, difference) {
 634         var n = sample_x.length,
 635             m = sample_y.length;
 636
 637         // If either sample doesn't actually have any values, we can't
 638         // compute this at all, so we return `null`.
 639         if (!n || !m) return null ;
 640
 641         // default difference (mu) is zero
 642         if (!difference) difference = 0;
 643
 644         var meanX = mean(sample_x),
 645             meanY = mean(sample_y);
 646
 647         var weightedVariance = ((n - 1) * sample_variance(sample_x) +
 648             (m - 1) * sample_variance(sample_y)) / (n + m - 2);
 649
 650         return (meanX - meanY - difference) /
 651             Math.sqrt(weightedVariance * (1 / n + 1 / m));
 652     }
 653
 654     // # chunk
 655     //
 656     // Split an array into chunks of a specified size. This function
 657     // has the same behavior as [PHP's array_chunk](http://php.net/manual/en/function.array-chunk.php)
 658     // function, and thus will insert smaller-sized chunks at the end if
 659     // the input size is not divisible by the chunk size.
 660     //
 661     // `sample` is expected to be an array, and `chunkSize` a number.
 662     // The `sample` array can contain any kind of data.
 663     function chunk(sample, chunkSize) {
 664
 665         // a list of result chunks, as arrays in an array
 666         var output = [];
 667
 668         // `chunkSize` must be zero or higher - otherwise the loop below,
 669         // in which we call `start += chunkSize`, will loop infinitely.
 670         // So, we'll detect and return null in that case to indicate
 671         // invalid input.
 672         if (chunkSize <= 0) {
 673             return null;
 674         }
 675
 676         // `start` is the index at which `.slice` will start selecting
 677         // new array elements
 678         for (var start = 0; start < sample.length; start += chunkSize) {
 679
 680             // for each chunk, slice that part of the array and add it
 681             // to the output. The `.slice` function does not change
 682             // the original array.
 683             output.push(sample.slice(start, start + chunkSize));
 684         }
 685         return output;
 686     }
 687
 688     // # shuffle_in_place
 689     //
 690     // A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle)
 691     // in-place - which means that it will change the order of the original
 692     // array by reference.
 693     function shuffle_in_place(sample, randomSource) {
 694
 695         // a custom random number source can be provided if you want to use
 696         // a fixed seed or another random number generator, like
 697         // [random-js](https://www.npmjs.org/package/random-js)
 698         randomSource = randomSource || Math.random;
 699
 700         // store the current length of the sample to determine
 701         // when no elements remain to shuffle.
 702         var length = sample.length;
 703
 704         // temporary is used to hold an item when it is being
 705         // swapped between indices.
 706         var temporary;
 707
 708         // The index to swap at each stage.
 709         var index;
 710
 711         // While there are still items to shuffle
 712         while (length > 0) {
 713             // chose a random index within the subset of the array
 714             // that is not yet shuffled
 715             index = Math.floor(randomSource() * length--);
 716
 717             // store the value that we'll move temporarily
 718             temporary = sample[length];
 719
 720             // swap the value at `sample[length]` with `sample[index]`
 721             sample[length] = sample[index];
 722             sample[index] = temporary;
 723         }
 724
 725         return sample;
 726     }
 727
 728     // # shuffle
 729     //
 730     // A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle)
 731     // is a fast way to create a random permutation of a finite set.
 732     function shuffle(sample, randomSource) {
 733         // slice the original array so that it is not modified
 734         sample = sample.slice();
 735
 736         // and then shuffle that shallow-copied array, in place
 737         return shuffle_in_place(sample.slice(), randomSource);
 738     }
 739
 740     // # sample
 741     //
 742     // Create a [simple random sample](http://en.wikipedia.org/wiki/Simple_random_sample)
 743     // from a given array of `n` elements.
 744     function sample(array, n, randomSource) {
 745         // shuffle the original array using a fisher-yates shuffle
 746         var shuffled = shuffle(array, randomSource);
 747
 748         // and then return a subset of it - the first `n` elements.
 749         return shuffled.slice(0, n);
 750     }
 751
 752     // # quantile
 753     //
 754     // This is a population quantile, since we assume to know the entire
 755     // dataset in this library. Thus I'm trying to follow the
 756     // [Quantiles of a Population](http://en.wikipedia.org/wiki/Quantile#Quantiles_of_a_population)
 757     // algorithm from wikipedia.
 758     //
 759     // Sample is a one-dimensional array of numbers,
 760     // and p is either a decimal number from 0 to 1 or an array of decimal
 761     // numbers from 0 to 1.
 762     // In terms of a k/q quantile, p = k/q - it's just dealing with fractions or dealing
 763     // with decimal values.
 764     // When p is an array, the result of the function is also an array containing the appropriate
 765     // quantiles in input order
 766     function quantile(sample, p) {
 767
 768         // We can't derive quantiles from an empty list
 769         if (sample.length === 0) return null;
 770
 771         // Sort a copy of the array. We'll need a sorted array to index
 772         // the values in sorted order.
 773         var sorted = sample.slice().sort(function (a, b) { return a - b; });
 774
 775         if (p.length) {
 776             // Initialize the result array
 777             var results = [];
 778             // For each requested quantile
 779             for (var i = 0; i < p.length; i++) {
 780                 results[i] = quantile_sorted(sorted, p[i]);
 781             }
 782             return results;
 783         } else {
 784             return quantile_sorted(sorted, p);
 785         }
 786     }
 787
 788     // # quantile
 789     //
 790     // This is the internal implementation of quantiles: when you know
 791     // that the order is sorted, you don't need to re-sort it, and the computations
 792     // are much faster.
 793     function quantile_sorted(sample, p) {
 794         var idx = (sample.length) * p;
 795         if (p < 0 || p > 1) {
 796             return null;
 797         } else if (p === 1) {
 798             // If p is 1, directly return the last element
 799             return sample[sample.length - 1];
 800         } else if (p === 0) {
 801             // If p is 0, directly return the first element
 802             return sample[0];
 803         } else if (idx % 1 !== 0) {
 804             // If p is not integer, return the next element in array
 805             return sample[Math.ceil(idx) - 1];
 806         } else if (sample.length % 2 === 0) {
 807             // If the list has even-length, we'll take the average of this number
 808             // and the next value, if there is one
 809             return (sample[idx - 1] + sample[idx]) / 2;
 810         } else {
 811             // Finally, in the simple case of an integer value
 812             // with an odd-length list, return the sample value at the index.
 813             return sample[idx];
 814         }
 815     }
 816
 817     // # [Interquartile range](http://en.wikipedia.org/wiki/Interquartile_range)
 818     //
 819     // A measure of statistical dispersion, or how scattered, spread, or
 820     // concentrated a distribution is. It's computed as the difference between
 821     // the third quartile and first quartile.
 822     function iqr(sample) {
 823         // We can't derive quantiles from an empty list
 824         if (sample.length === 0) return null;
 825
 826         // Interquartile range is the span between the upper quartile,
 827         // at `0.75`, and lower quartile, `0.25`
 828         return quantile(sample, 0.75) - quantile(sample, 0.25);
 829     }
 830
 831     // # [Median Absolute Deviation](http://en.wikipedia.org/wiki/Median_absolute_deviation)
 832     //
 833     // The Median Absolute Deviation (MAD) is a robust measure of statistical
 834     // dispersion. It is more resilient to outliers than the standard deviation.
 835     function mad(x) {
 836         // The mad of nothing is null
 837         if (!x || x.length === 0) return null;
 838
 839         var median_value = median(x),
 840             median_absolute_deviations = [];
 841
 842         // Make a list of absolute deviations from the median
 843         for (var i = 0; i < x.length; i++) {
 844             median_absolute_deviations.push(Math.abs(x[i] - median_value));
 845         }
 846
 847         // Find the median value of that list
 848         return median(median_absolute_deviations);
 849     }
 850
 851     // ## Compute Matrices for Jenks
 852     //
 853     // Compute the matrices required for Jenks breaks. These matrices
 854     // can be used for any classing of data with `classes <= n_classes`
 855     function jenksMatrices(data, n_classes) {
 856
 857         // in the original implementation, these matrices are referred to
 858         // as `LC` and `OP`
 859         //
 860         // * lower_class_limits (LC): optimal lower class limits
 861         // * variance_combinations (OP): optimal variance combinations for all classes
 862         var lower_class_limits = [],
 863             variance_combinations = [],
 864             // loop counters
 865             i, j,
 866             // the variance, as computed at each step in the calculation
 867             variance = 0;
 868
 869         // Initialize and fill each matrix with zeroes
 870         for (i = 0; i < data.length + 1; i++) {
 871             var tmp1 = [], tmp2 = [];
 872             // despite these arrays having the same values, we need
 873             // to keep them separate so that changing one does not change
 874             // the other
 875             for (j = 0; j < n_classes + 1; j++) {
 876                 tmp1.push(0);
 877                 tmp2.push(0);
 878             }
 879             lower_class_limits.push(tmp1);
 880             variance_combinations.push(tmp2);
 881         }
 882
 883         for (i = 1; i < n_classes + 1; i++) {
 884             lower_class_limits[1][i] = 1;
 885             variance_combinations[1][i] = 0;
 886             // in the original implementation, 9999999 is used but
 887             // since Javascript has `Infinity`, we use that.
 888             for (j = 2; j < data.length + 1; j++) {
 889                 variance_combinations[j][i] = Infinity;
 890             }
 891         }
 892
 893         for (var l = 2; l < data.length + 1; l++) {
 894
 895             // `SZ` originally. this is the sum of the values seen thus
 896             // far when calculating variance.
 897             var sum = 0,
 898                 // `ZSQ` originally. the sum of squares of values seen
 899                 // thus far
 900                 sum_squares = 0,
 901                 // `WT` originally. This is the number of
 902                 w = 0,
 903                 // `IV` originally
 904                 i4 = 0;
 905
 906             // in several instances, you could say `Math.pow(x, 2)`
 907             // instead of `x * x`, but this is slower in some browsers
 908             // introduces an unnecessary concept.
 909             for (var m = 1; m < l + 1; m++) {
 910
 911                 // `III` originally
 912                 var lower_class_limit = l - m + 1,
 913                     val = data[lower_class_limit - 1];
 914
 915                 // here we're estimating variance for each potential classing
 916                 // of the data, for each potential number of classes. `w`
 917                 // is the number of data points considered so far.
 918                 w++;
 919
 920                 // increase the current sum and sum-of-squares
 921                 sum += val;
 922                 sum_squares += val * val;
 923
 924                 // the variance at this point in the sequence is the difference
 925                 // between the sum of squares and the total x 2, over the number
 926                 // of samples.
 927                 variance = sum_squares - (sum * sum) / w;
 928
 929                 i4 = lower_class_limit - 1;
 930
 931                 if (i4 !== 0) {
 932                     for (j = 2; j < n_classes + 1; j++) {
 933                         // if adding this element to an existing class
 934                         // will increase its variance beyond the limit, break
 935                         // the class at this point, setting the `lower_class_limit`
 936                         // at this point.
 937                         if (variance_combinations[l][j] >=
 938                             (variance + variance_combinations[i4][j - 1])) {
 939                             lower_class_limits[l][j] = lower_class_limit;
 940                             variance_combinations[l][j] = variance +
 941                                 variance_combinations[i4][j - 1];
 942                         }
 943                     }
 944                 }
 945             }
 946
 947             lower_class_limits[l][1] = 1;
 948             variance_combinations[l][1] = variance;
 949         }
 950
 951         // return the two matrices. for just providing breaks, only
 952         // `lower_class_limits` is needed, but variances can be useful to
 953         // evaluate goodness of fit.
 954         return {
 955             lower_class_limits: lower_class_limits,
 956             variance_combinations: variance_combinations
 957         };
 958     }
 959
 960     // ## Pull Breaks Values for Jenks
 961     //
 962     // the second part of the jenks recipe: take the calculated matrices
 963     // and derive an array of n breaks.
 964     function jenksBreaks(data, lower_class_limits, n_classes) {
 965
 966         var k = data.length - 1,
 967             kclass = [],
 968             countNum = n_classes;
 969
 970         // the calculation of classes will never include the upper and
 971         // lower bounds, so we need to explicitly set them
 972         kclass[n_classes] = data[data.length - 1];
 973         kclass[0] = data[0];
 974
 975         // the lower_class_limits matrix is used as indices into itself
 976         // here: the `k` variable is reused in each iteration.
 977         while (countNum > 1) {
 978             kclass[countNum - 1] = data[lower_class_limits[k][countNum] - 2];
 979             k = lower_class_limits[k][countNum] - 1;
 980             countNum--;
 981         }
 982
 983         return kclass;
 984     }
 985
 986     // # [Jenks natural breaks optimization](http://en.wikipedia.org/wiki/Jenks_natural_breaks_optimization)
 987     //
 988     // Implementations: [1](http://danieljlewis.org/files/2010/06/Jenks.pdf) (python),
 989     // [2](https://github.com/vvoovv/djeo-jenks/blob/master/main.js) (buggy),
 990     // [3](https://github.com/simogeo/geostats/blob/master/lib/geostats.js#L407) (works)
 991     //
 992     // Depends on `jenksBreaks()` and `jenksMatrices()`
 993     function jenks(data, n_classes) {
 994
 995         if (n_classes > data.length) return null;
 996
 997         // sort data in numerical order, since this is expected
 998         // by the matrices function
 999         data = data.slice().sort(function (a, b) { return a - b; });
1000
1001         // get our basic matrices
1002         var matrices = jenksMatrices(data, n_classes),
1003             // we only need lower class limits here
1004             lower_class_limits = matrices.lower_class_limits;
1005
1006         // extract n_classes out of the computed matrices
1007         return jenksBreaks(data, lower_class_limits, n_classes);
1008
1009     }
1010
1011     // # [Skewness](http://en.wikipedia.org/wiki/Skewness)
1012     //
1013     // A measure of the extent to which a probability distribution of a
1014     // real-valued random variable "leans" to one side of the mean.
1015     // The skewness value can be positive or negative, or even undefined.
1016     //
1017     // Implementation is based on the adjusted Fisher-Pearson standardized
1018     // moment coefficient, which is the version found in Excel and several
1019     // statistical packages including Minitab, SAS and SPSS.
1020     //
1021     // Depends on `sum_nth_power_deviations()` and `sample_standard_deviation`
1022     function sample_skewness(x) {
1023         // The skewness of less than three arguments is null
1024         if (x.length < 3) return null;
1025
1026         var n = x.length,
1027             cubed_s = Math.pow(sample_standard_deviation(x), 3),
1028             sum_cubed_deviations = sum_nth_power_deviations(x, 3);
1029
1030         return n * sum_cubed_deviations / ((n - 1) * (n - 2) * cubed_s);
1031     }
1032
1033     // # Standard Normal Table
1034     // A standard normal table, also called the unit normal table or Z table,
1035     // is a mathematical table for the values of Φ (phi), which are the values of
1036     // the cumulative distribution function of the normal distribution.
1037     // It is used to find the probability that a statistic is observed below,
1038     // above, or between values on the standard normal distribution, and by
1039     // extension, any normal distribution.
1040     //
1041     // The probabilities are taken from http://en.wikipedia.org/wiki/Standard_normal_table
1042     // The table used is the cumulative, and not cumulative from 0 to mean
1043     // (even though the latter has 5 digits precision, instead of 4).
1044     var standard_normal_table = [
1045         /*  z      0.00    0.01    0.02    0.03    0.04    0.05    0.06    0.07    0.08    0.09 */
1046         /* 0.0 */
1047         0.5000, 0.5040, 0.5080, 0.5120, 0.5160, 0.5199, 0.5239, 0.5279, 0.5319, 0.5359,
1048         /* 0.1 */
1049         0.5398, 0.5438, 0.5478, 0.5517, 0.5557, 0.5596, 0.5636, 0.5675, 0.5714, 0.5753,
1050         /* 0.2 */
1051         0.5793, 0.5832, 0.5871, 0.5910, 0.5948, 0.5987, 0.6026, 0.6064, 0.6103, 0.6141,
1052         /* 0.3 */
1053         0.6179, 0.6217, 0.6255, 0.6293, 0.6331, 0.6368, 0.6406, 0.6443, 0.6480, 0.6517,
1054         /* 0.4 */
1055         0.6554, 0.6591, 0.6628, 0.6664, 0.6700, 0.6736, 0.6772, 0.6808, 0.6844, 0.6879,
1056         /* 0.5 */
1057         0.6915, 0.6950, 0.6985, 0.7019, 0.7054, 0.7088, 0.7123, 0.7157, 0.7190, 0.7224,
1058         /* 0.6 */
1059         0.7257, 0.7291, 0.7324, 0.7357, 0.7389, 0.7422, 0.7454, 0.7486, 0.7517, 0.7549,
1060         /* 0.7 */
1061         0.7580, 0.7611, 0.7642, 0.7673, 0.7704, 0.7734, 0.7764, 0.7794, 0.7823, 0.7852,
1062         /* 0.8 */
1063         0.7881, 0.7910, 0.7939, 0.7967, 0.7995, 0.8023, 0.8051, 0.8078, 0.8106, 0.8133,
1064         /* 0.9 */
1065         0.8159, 0.8186, 0.8212, 0.8238, 0.8264, 0.8289, 0.8315, 0.8340, 0.8365, 0.8389,
1066         /* 1.0 */
1067         0.8413, 0.8438, 0.8461, 0.8485, 0.8508, 0.8531, 0.8554, 0.8577, 0.8599, 0.8621,
1068         /* 1.1 */
1069         0.8643, 0.8665, 0.8686, 0.8708, 0.8729, 0.8749, 0.8770, 0.8790, 0.8810, 0.8830,
1070         /* 1.2 */
1071         0.8849, 0.8869, 0.8888, 0.8907, 0.8925, 0.8944, 0.8962, 0.8980, 0.8997, 0.9015,
1072         /* 1.3 */
1073         0.9032, 0.9049, 0.9066, 0.9082, 0.9099, 0.9115, 0.9131, 0.9147, 0.9162, 0.9177,
1074         /* 1.4 */
1075         0.9192, 0.9207, 0.9222, 0.9236, 0.9251, 0.9265, 0.9279, 0.9292, 0.9306, 0.9319,
1076         /* 1.5 */
1077         0.9332, 0.9345, 0.9357, 0.9370, 0.9382, 0.9394, 0.9406, 0.9418, 0.9429, 0.9441,
1078         /* 1.6 */
1079         0.9452, 0.9463, 0.9474, 0.9484, 0.9495, 0.9505, 0.9515, 0.9525, 0.9535, 0.9545,
1080         /* 1.7 */
1081         0.9554, 0.9564, 0.9573, 0.9582, 0.9591, 0.9599, 0.9608, 0.9616, 0.9625, 0.9633,
1082         /* 1.8 */
1083         0.9641, 0.9649, 0.9656, 0.9664, 0.9671, 0.9678, 0.9686, 0.9693, 0.9699, 0.9706,
1084         /* 1.9 */
1085         0.9713, 0.9719, 0.9726, 0.9732, 0.9738, 0.9744, 0.9750, 0.9756, 0.9761, 0.9767,
1086         /* 2.0 */
1087         0.9772, 0.9778, 0.9783, 0.9788, 0.9793, 0.9798, 0.9803, 0.9808, 0.9812, 0.9817,
1088         /* 2.1 */
1089         0.9821, 0.9826, 0.9830, 0.9834, 0.9838, 0.9842, 0.9846, 0.9850, 0.9854, 0.9857,
1090         /* 2.2 */
1091         0.9861, 0.9864, 0.9868, 0.9871, 0.9875, 0.9878, 0.9881, 0.9884, 0.9887, 0.9890,
1092         /* 2.3 */
1093         0.9893, 0.9896, 0.9898, 0.9901, 0.9904, 0.9906, 0.9909, 0.9911, 0.9913, 0.9916,
1094         /* 2.4 */
1095         0.9918, 0.9920, 0.9922, 0.9925, 0.9927, 0.9929, 0.9931, 0.9932, 0.9934, 0.9936,
1096         /* 2.5 */
1097         0.9938, 0.9940, 0.9941, 0.9943, 0.9945, 0.9946, 0.9948, 0.9949, 0.9951, 0.9952,
1098         /* 2.6 */
1099         0.9953, 0.9955, 0.9956, 0.9957, 0.9959, 0.9960, 0.9961, 0.9962, 0.9963, 0.9964,
1100         /* 2.7 */
1101         0.9965, 0.9966, 0.9967, 0.9968, 0.9969, 0.9970, 0.9971, 0.9972, 0.9973, 0.9974,
1102         /* 2.8 */
1103         0.9974, 0.9975, 0.9976, 0.9977, 0.9977, 0.9978, 0.9979, 0.9979, 0.9980, 0.9981,
1104         /* 2.9 */
1105         0.9981, 0.9982, 0.9982, 0.9983, 0.9984, 0.9984, 0.9985, 0.9985, 0.9986, 0.9986,
1106         /* 3.0 */
1107         0.9987, 0.9987, 0.9987, 0.9988, 0.9988, 0.9989, 0.9989, 0.9989, 0.9990, 0.9990
1108     ];
1109
1110     // # [Cumulative Standard Normal Probability](http://en.wikipedia.org/wiki/Standard_normal_table)
1111     //
1112     // Since probability tables cannot be
1113     // printed for every normal distribution, as there are an infinite variety
1114     // of normal distributions, it is common practice to convert a normal to a
1115     // standard normal and then use the standard normal table to find probabilities
1116     function cumulative_std_normal_probability(z) {
1117
1118         // Calculate the position of this value.
1119         var absZ = Math.abs(z),
1120             // Each row begins with a different
1121             // significant digit: 0.5, 0.6, 0.7, and so on. So the row is simply
1122             // this value's significant digit: 0.567 will be in row 0, so row=0,
1123             // 0.643 will be in row 1, so row=10.
1124             row = Math.floor(absZ * 10),
1125             column = 10 * (Math.floor(absZ * 100) / 10 - Math.floor(absZ * 100 / 10)),
1126             index = Math.min((row * 10) + column, standard_normal_table.length - 1);
1127
1128         // The index we calculate must be in the table as a positive value,
1129         // but we still pay attention to whether the input is positive
1130         // or negative, and flip the output value as a last step.
1131         if (z >= 0) {
1132             return standard_normal_table[index];
1133         } else {
1134             // due to floating-point arithmetic, values in the table with
1135             // 4 significant figures can nevertheless end up as repeating
1136             // fractions when they're computed here.
1137             return +(1 - standard_normal_table[index]).toFixed(4);
1138         }
1139     }
1140
1141     // # [Z-Score, or Standard Score](http://en.wikipedia.org/wiki/Standard_score)
1142     //
1143     // The standard score is the number of standard deviations an observation
1144     // or datum is above or below the mean. Thus, a positive standard score
1145     // represents a datum above the mean, while a negative standard score
1146     // represents a datum below the mean. It is a dimensionless quantity
1147     // obtained by subtracting the population mean from an individual raw
1148     // score and then dividing the difference by the population standard
1149     // deviation.
1150     //
1151     // The z-score is only defined if one knows the population parameters;
1152     // if one only has a sample set, then the analogous computation with
1153     // sample mean and sample standard deviation yields the
1154     // Student's t-statistic.
1155     function z_score(x, mean, standard_deviation) {
1156         return (x - mean) / standard_deviation;
1157     }
1158
1159     // We use `ε`, epsilon, as a stopping criterion when we want to iterate
1160     // until we're "close enough".
1161     var epsilon = 0.0001;
1162
1163     // # [Factorial](https://en.wikipedia.org/wiki/Factorial)
1164     //
1165     // A factorial, usually written n!, is the product of all positive
1166     // integers less than or equal to n. Often factorial is implemented
1167     // recursively, but this iterative approach is significantly faster
1168     // and simpler.
1169     function factorial(n) {
1170
1171         // factorial is mathematically undefined for negative numbers
1172         if (n < 0 ) { return null; }
1173
1174         // typically you'll expand the factorial function going down, like
1175         // 5! = 5 * 4 * 3 * 2 * 1. This is going in the opposite direction,
1176         // counting from 2 up to the number in question, and since anything
1177         // multiplied by 1 is itself, the loop only needs to start at 2.
1178         var accumulator = 1;
1179         for (var i = 2; i <= n; i++) {
1180             // for each number up to and including the number `n`, multiply
1181             // the accumulator my that number.
1182             accumulator *= i;
1183         }
1184         return accumulator;
1185     }
1186
1187     // # Bernoulli Distribution
1188     //
1189     // The [Bernoulli distribution](http://en.wikipedia.org/wiki/Bernoulli_distribution)
1190     // is the probability discrete
1191     // distribution of a random variable which takes value 1 with success
1192     // probability `p` and value 0 with failure
1193     // probability `q` = 1 - `p`. It can be used, for example, to represent the
1194     // toss of a coin, where "1" is defined to mean "heads" and "0" is defined
1195     // to mean "tails" (or vice versa). It is
1196     // a special case of a Binomial Distribution
1197     // where `n` = 1.
1198     function bernoulli_distribution(p) {
1199         // Check that `p` is a valid probability (0 ≤ p ≤ 1)
1200         if (p < 0 || p > 1 ) { return null; }
1201
1202         return binomial_distribution(1, p);
1203     }
1204
1205     // # Binomial Distribution
1206     //
1207     // The [Binomial Distribution](http://en.wikipedia.org/wiki/Binomial_distribution) is the discrete probability
1208     // distribution of the number of successes in a sequence of n independent yes/no experiments, each of which yields
1209     // success with probability `probability`. Such a success/failure experiment is also called a Bernoulli experiment or
1210     // Bernoulli trial; when trials = 1, the Binomial Distribution is a Bernoulli Distribution.
1211     function binomial_distribution(trials, probability) {
1212         // Check that `p` is a valid probability (0 ≤ p ≤ 1),
1213         // that `n` is an integer, strictly positive.
1214         if (probability < 0 || probability > 1 ||
1215             trials <= 0 || trials % 1 !== 0) {
1216             return null;
1217         }
1218
1219         // a [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function)
1220         function probability_mass(x, trials, probability) {
1221             return factorial(trials) /
1222                 (factorial(x) * factorial(trials - x)) *
1223                 (Math.pow(probability, x) * Math.pow(1 - probability, trials - x));
1224         }
1225
1226         // We initialize `x`, the random variable, and `accumulator`, an accumulator
1227         // for the cumulative distribution function to 0. `distribution_functions`
1228         // is the object we'll return with the `probability_of_x` and the
1229         // `cumulative_probability_of_x`, as well as the calculated mean &
1230         // variance. We iterate until the `cumulative_probability_of_x` is
1231         // within `epsilon` of 1.0.
1232         var x = 0,
1233             cumulative_probability = 0,
1234             cells = {};
1235
1236         // This algorithm iterates through each potential outcome,
1237         // until the `cumulative_probability` is very close to 1, at
1238         // which point we've defined the vast majority of outcomes
1239         do {
1240             cells[x] = probability_mass(x, trials, probability);
1241             cumulative_probability += cells[x];
1242             x++;
1243         // when the cumulative_probability is nearly 1, we've calculated
1244         // the useful range of this distribution
1245         } while (cumulative_probability < 1 - epsilon);
1246
1247         return cells;
1248     }
1249
1250     // # Poisson Distribution
1251     //
1252     // The [Poisson Distribution](http://en.wikipedia.org/wiki/Poisson_distribution)
1253     // is a discrete probability distribution that expresses the probability
1254     // of a given number of events occurring in a fixed interval of time
1255     // and/or space if these events occur with a known average rate and
1256     // independently of the time since the last event.
1257     //
1258     // The Poisson Distribution is characterized by the strictly positive
1259     // mean arrival or occurrence rate, `λ`.
1260     function poisson_distribution(lambda) {
1261         // Check that lambda is strictly positive
1262         if (lambda <= 0) { return null; }
1263
1264         // our current place in the distribution
1265         var x = 0,
1266             // and we keep track of the current cumulative probability, in
1267             // order to know when to stop calculating chances.
1268             cumulative_probability = 0,
1269             // the calculated cells to be returned
1270             cells = {};
1271
1272         // a [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function)
1273         function probability_mass(x, lambda) {
1274             return (Math.pow(Math.E, -lambda) * Math.pow(lambda, x)) /
1275                 factorial(x);
1276         }
1277
1278         // This algorithm iterates through each potential outcome,
1279         // until the `cumulative_probability` is very close to 1, at
1280         // which point we've defined the vast majority of outcomes
1281         do {
1282             cells[x] = probability_mass(x, lambda);
1283             cumulative_probability += cells[x];
1284             x++;
1285         // when the cumulative_probability is nearly 1, we've calculated
1286         // the useful range of this distribution
1287         } while (cumulative_probability < 1 - epsilon);
1288
1289         return cells;
1290     }
1291
1292     // # Percentage Points of the χ2 (Chi-Squared) Distribution
1293     // The [χ2 (Chi-Squared) Distribution](http://en.wikipedia.org/wiki/Chi-squared_distribution) is used in the common
1294     // chi-squared tests for goodness of fit of an observed distribution to a theoretical one, the independence of two
1295     // criteria of classification of qualitative data, and in confidence interval estimation for a population standard
1296     // deviation of a normal distribution from a sample standard deviation.
1297     //
1298     // Values from Appendix 1, Table III of William W. Hines & Douglas C. Montgomery, "Probability and Statistics in
1299     // Engineering and Management Science", Wiley (1980).
1300     var chi_squared_distribution_table = {
1301         1: { 0.995:  0.00, 0.99:  0.00, 0.975:  0.00, 0.95:  0.00, 0.9:  0.02, 0.5:  0.45, 0.1:  2.71, 0.05:  3.84, 0.025:  5.02, 0.01:  6.63, 0.005:  7.88 },
1302         2: { 0.995:  0.01, 0.99:  0.02, 0.975:  0.05, 0.95:  0.10, 0.9:  0.21, 0.5:  1.39, 0.1:  4.61, 0.05:  5.99, 0.025:  7.38, 0.01:  9.21, 0.005: 10.60 },
1303         3: { 0.995:  0.07, 0.99:  0.11, 0.975:  0.22, 0.95:  0.35, 0.9:  0.58, 0.5:  2.37, 0.1:  6.25, 0.05:  7.81, 0.025:  9.35, 0.01: 11.34, 0.005: 12.84 },
1304         4: { 0.995:  0.21, 0.99:  0.30, 0.975:  0.48, 0.95:  0.71, 0.9:  1.06, 0.5:  3.36, 0.1:  7.78, 0.05:  9.49, 0.025: 11.14, 0.01: 13.28, 0.005: 14.86 },
1305         5: { 0.995:  0.41, 0.99:  0.55, 0.975:  0.83, 0.95:  1.15, 0.9:  1.61, 0.5:  4.35, 0.1:  9.24, 0.05: 11.07, 0.025: 12.83, 0.01: 15.09, 0.005: 16.75 },
1306         6: { 0.995:  0.68, 0.99:  0.87, 0.975:  1.24, 0.95:  1.64, 0.9:  2.20, 0.5:  5.35, 0.1: 10.65, 0.05: 12.59, 0.025: 14.45, 0.01: 16.81, 0.005: 18.55 },
1307         7: { 0.995:  0.99, 0.99:  1.25, 0.975:  1.69, 0.95:  2.17, 0.9:  2.83, 0.5:  6.35, 0.1: 12.02, 0.05: 14.07, 0.025: 16.01, 0.01: 18.48, 0.005: 20.28 },
1308         8: { 0.995:  1.34, 0.99:  1.65, 0.975:  2.18, 0.95:  2.73, 0.9:  3.49, 0.5:  7.34, 0.1: 13.36, 0.05: 15.51, 0.025: 17.53, 0.01: 20.09, 0.005: 21.96 },
1309         9: { 0.995:  1.73, 0.99:  2.09, 0.975:  2.70, 0.95:  3.33, 0.9:  4.17, 0.5:  8.34, 0.1: 14.68, 0.05: 16.92, 0.025: 19.02, 0.01: 21.67, 0.005: 23.59 },
1310         10: { 0.995:  2.16, 0.99:  2.56, 0.975:  3.25, 0.95:  3.94, 0.9:  4.87, 0.5:  9.34, 0.1: 15.99, 0.05: 18.31, 0.025: 20.48, 0.01: 23.21, 0.005: 25.19 },
1311         11: { 0.995:  2.60, 0.99:  3.05, 0.975:  3.82, 0.95:  4.57, 0.9:  5.58, 0.5: 10.34, 0.1: 17.28, 0.05: 19.68, 0.025: 21.92, 0.01: 24.72, 0.005: 26.76 },
1312         12: { 0.995:  3.07, 0.99:  3.57, 0.975:  4.40, 0.95:  5.23, 0.9:  6.30, 0.5: 11.34, 0.1: 18.55, 0.05: 21.03, 0.025: 23.34, 0.01: 26.22, 0.005: 28.30 },
1313         13: { 0.995:  3.57, 0.99:  4.11, 0.975:  5.01, 0.95:  5.89, 0.9:  7.04, 0.5: 12.34, 0.1: 19.81, 0.05: 22.36, 0.025: 24.74, 0.01: 27.69, 0.005: 29.82 },
1314         14: { 0.995:  4.07, 0.99:  4.66, 0.975:  5.63, 0.95:  6.57, 0.9:  7.79, 0.5: 13.34, 0.1: 21.06, 0.05: 23.68, 0.025: 26.12, 0.01: 29.14, 0.005: 31.32 },
1315         15: { 0.995:  4.60, 0.99:  5.23, 0.975:  6.27, 0.95:  7.26, 0.9:  8.55, 0.5: 14.34, 0.1: 22.31, 0.05: 25.00, 0.025: 27.49, 0.01: 30.58, 0.005: 32.80 },
1316         16: { 0.995:  5.14, 0.99:  5.81, 0.975:  6.91, 0.95:  7.96, 0.9:  9.31, 0.5: 15.34, 0.1: 23.54, 0.05: 26.30, 0.025: 28.85, 0.01: 32.00, 0.005: 34.27 },
1317         17: { 0.995:  5.70, 0.99:  6.41, 0.975:  7.56, 0.95:  8.67, 0.9: 10.09, 0.5: 16.34, 0.1: 24.77, 0.05: 27.59, 0.025: 30.19, 0.01: 33.41, 0.005: 35.72 },
1318         18: { 0.995:  6.26, 0.99:  7.01, 0.975:  8.23, 0.95:  9.39, 0.9: 10.87, 0.5: 17.34, 0.1: 25.99, 0.05: 28.87, 0.025: 31.53, 0.01: 34.81, 0.005: 37.16 },
1319         19: { 0.995:  6.84, 0.99:  7.63, 0.975:  8.91, 0.95: 10.12, 0.9: 11.65, 0.5: 18.34, 0.1: 27.20, 0.05: 30.14, 0.025: 32.85, 0.01: 36.19, 0.005: 38.58 },
1320         20: { 0.995:  7.43, 0.99:  8.26, 0.975:  9.59, 0.95: 10.85, 0.9: 12.44, 0.5: 19.34, 0.1: 28.41, 0.05: 31.41, 0.025: 34.17, 0.01: 37.57, 0.005: 40.00 },
1321         21: { 0.995:  8.03, 0.99:  8.90, 0.975: 10.28, 0.95: 11.59, 0.9: 13.24, 0.5: 20.34, 0.1: 29.62, 0.05: 32.67, 0.025: 35.48, 0.01: 38.93, 0.005: 41.40 },
1322         22: { 0.995:  8.64, 0.99:  9.54, 0.975: 10.98, 0.95: 12.34, 0.9: 14.04, 0.5: 21.34, 0.1: 30.81, 0.05: 33.92, 0.025: 36.78, 0.01: 40.29, 0.005: 42.80 },
1323         23: { 0.995:  9.26, 0.99: 10.20, 0.975: 11.69, 0.95: 13.09, 0.9: 14.85, 0.5: 22.34, 0.1: 32.01, 0.05: 35.17, 0.025: 38.08, 0.01: 41.64, 0.005: 44.18 },
1324         24: { 0.995:  9.89, 0.99: 10.86, 0.975: 12.40, 0.95: 13.85, 0.9: 15.66, 0.5: 23.34, 0.1: 33.20, 0.05: 36.42, 0.025: 39.36, 0.01: 42.98, 0.005: 45.56 },
1325         25: { 0.995: 10.52, 0.99: 11.52, 0.975: 13.12, 0.95: 14.61, 0.9: 16.47, 0.5: 24.34, 0.1: 34.28, 0.05: 37.65, 0.025: 40.65, 0.01: 44.31, 0.005: 46.93 },
1326         26: { 0.995: 11.16, 0.99: 12.20, 0.975: 13.84, 0.95: 15.38, 0.9: 17.29, 0.5: 25.34, 0.1: 35.56, 0.05: 38.89, 0.025: 41.92, 0.01: 45.64, 0.005: 48.29 },
1327         27: { 0.995: 11.81, 0.99: 12.88, 0.975: 14.57, 0.95: 16.15, 0.9: 18.11, 0.5: 26.34, 0.1: 36.74, 0.05: 40.11, 0.025: 43.19, 0.01: 46.96, 0.005: 49.65 },
1328         28: { 0.995: 12.46, 0.99: 13.57, 0.975: 15.31, 0.95: 16.93, 0.9: 18.94, 0.5: 27.34, 0.1: 37.92, 0.05: 41.34, 0.025: 44.46, 0.01: 48.28, 0.005: 50.99 },
1329         29: { 0.995: 13.12, 0.99: 14.26, 0.975: 16.05, 0.95: 17.71, 0.9: 19.77, 0.5: 28.34, 0.1: 39.09, 0.05: 42.56, 0.025: 45.72, 0.01: 49.59, 0.005: 52.34 },
1330         30: { 0.995: 13.79, 0.99: 14.95, 0.975: 16.79, 0.95: 18.49, 0.9: 20.60, 0.5: 29.34, 0.1: 40.26, 0.05: 43.77, 0.025: 46.98, 0.01: 50.89, 0.005: 53.67 },
1331         40: { 0.995: 20.71, 0.99: 22.16, 0.975: 24.43, 0.95: 26.51, 0.9: 29.05, 0.5: 39.34, 0.1: 51.81, 0.05: 55.76, 0.025: 59.34, 0.01: 63.69, 0.005: 66.77 },
1332         50: { 0.995: 27.99, 0.99: 29.71, 0.975: 32.36, 0.95: 34.76, 0.9: 37.69, 0.5: 49.33, 0.1: 63.17, 0.05: 67.50, 0.025: 71.42, 0.01: 76.15, 0.005: 79.49 },
1333         60: { 0.995: 35.53, 0.99: 37.48, 0.975: 40.48, 0.95: 43.19, 0.9: 46.46, 0.5: 59.33, 0.1: 74.40, 0.05: 79.08, 0.025: 83.30, 0.01: 88.38, 0.005: 91.95 },
1334         70: { 0.995: 43.28, 0.99: 45.44, 0.975: 48.76, 0.95: 51.74, 0.9: 55.33, 0.5: 69.33, 0.1: 85.53, 0.05: 90.53, 0.025: 95.02, 0.01: 100.42, 0.005: 104.22 },
1335         80: { 0.995: 51.17, 0.99: 53.54, 0.975: 57.15, 0.95: 60.39, 0.9: 64.28, 0.5: 79.33, 0.1: 96.58, 0.05: 101.88, 0.025: 106.63, 0.01: 112.33, 0.005: 116.32 },
1336         90: { 0.995: 59.20, 0.99: 61.75, 0.975: 65.65, 0.95: 69.13, 0.9: 73.29, 0.5: 89.33, 0.1: 107.57, 0.05: 113.14, 0.025: 118.14, 0.01: 124.12, 0.005: 128.30 },
1337         100: { 0.995: 67.33, 0.99: 70.06, 0.975: 74.22, 0.95: 77.93, 0.9: 82.36, 0.5: 99.33, 0.1: 118.50, 0.05: 124.34, 0.025: 129.56, 0.01: 135.81, 0.005: 140.17 }
1338     };
1339
1340     // # χ2 (Chi-Squared) Goodness-of-Fit Test
1341     //
1342     // The [χ2 (Chi-Squared) Goodness-of-Fit Test](http://en.wikipedia.org/wiki/Goodness_of_fit#Pearson.27s_chi-squared_test)
1343     // uses a measure of goodness of fit which is the sum of differences between observed and expected outcome frequencies
1344     // (that is, counts of observations), each squared and divided by the number of observations expected given the
1345     // hypothesized distribution. The resulting χ2 statistic, `chi_squared`, can be compared to the chi-squared distribution
1346     // to determine the goodness of fit. In order to determine the degrees of freedom of the chi-squared distribution, one
1347     // takes the total number of observed frequencies and subtracts the number of estimated parameters. The test statistic
1348     // follows, approximately, a chi-square distribution with (k − c) degrees of freedom where `k` is the number of non-empty
1349     // cells and `c` is the number of estimated parameters for the distribution.
1350     function chi_squared_goodness_of_fit(data, distribution_type, significance) {
1351         // Estimate from the sample data, a weighted mean.
1352         var input_mean = mean(data),
1353             // Calculated value of the χ2 statistic.
1354             chi_squared = 0,
1355             // Degrees of freedom, calculated as (number of class intervals -
1356             // number of hypothesized distribution parameters estimated - 1)
1357             degrees_of_freedom,
1358             // Number of hypothesized distribution parameters estimated, expected to be supplied in the distribution test.
1359             // Lose one degree of freedom for estimating `lambda` from the sample data.
1360             c = 1,
1361             // The hypothesized distribution.
1362             // Generate the hypothesized distribution.
1363             hypothesized_distribution = distribution_type(input_mean),
1364             observed_frequencies = [],
1365             expected_frequencies = [],
1366             k;
1367
1368         // Create an array holding a histogram from the sample data, of
1369         // the form `{ value: numberOfOcurrences }`
1370         for (var i = 0; i < data.length; i++) {
1371             if (observed_frequencies[data[i]] === undefined) {
1372                 observed_frequencies[data[i]] = 0;
1373             }
1374             observed_frequencies[data[i]]++;
1375         }
1376
1377         // The histogram we created might be sparse - there might be gaps
1378         // between values. So we iterate through the histogram, making
1379         // sure that instead of undefined, gaps have 0 values.
1380         for (i = 0; i < observed_frequencies.length; i++) {
1381             if (observed_frequencies[i] === undefined) {
1382                 observed_frequencies[i] = 0;
1383             }
1384         }
1385
1386         // Create an array holding a histogram of expected data given the
1387         // sample size and hypothesized distribution.
1388         for (k in hypothesized_distribution) {
1389             if (k in observed_frequencies) {
1390                 expected_frequencies[k] = hypothesized_distribution[k] * data.length;
1391             }
1392         }
1393
1394         // Working backward through the expected frequencies, collapse classes
1395         // if less than three observations are expected for a class.
1396         // This transformation is applied to the observed frequencies as well.
1397         for (k = expected_frequencies.length - 1; k >= 0; k--) {
1398             if (expected_frequencies[k] < 3) {
1399                 expected_frequencies[k - 1] += expected_frequencies[k];
1400                 expected_frequencies.pop();
1401
1402                 observed_frequencies[k - 1] += observed_frequencies[k];
1403                 observed_frequencies.pop();
1404             }
1405         }
1406
1407         // Iterate through the squared differences between observed & expected
1408         // frequencies, accumulating the `chi_squared` statistic.
1409         for (k = 0; k < observed_frequencies.length; k++) {
1410             chi_squared += Math.pow(
1411                 observed_frequencies[k] - expected_frequencies[k], 2) /
1412                 expected_frequencies[k];
1413         }
1414
1415         // Calculate degrees of freedom for this test and look it up in the
1416         // `chi_squared_distribution_table` in order to
1417         // accept or reject the goodness-of-fit of the hypothesized distribution.
1418         degrees_of_freedom = observed_frequencies.length - c - 1;
1419         return chi_squared_distribution_table[degrees_of_freedom][significance] < chi_squared;
1420     }
1421
1422     // # Mixin
1423     //
1424     // Mixin simple_statistics to a single Array instance if provided
1425     // or the Array native object if not. This is an optional
1426     // feature that lets you treat simple_statistics as a native feature
1427     // of Javascript.
1428     function mixin(array) {
1429         var support = !!(Object.defineProperty && Object.defineProperties);
1430         if (!support) throw new Error('without defineProperty, simple-statistics cannot be mixed in');
1431
1432         // only methods which work on basic arrays in a single step
1433         // are supported
1434         var arrayMethods = ['median', 'standard_deviation', 'sum',
1435             'sample_skewness',
1436             'mean', 'min', 'max', 'quantile', 'geometric_mean',
1437             'harmonic_mean', 'root_mean_square'];
1438
1439         // create a closure with a method name so that a reference
1440         // like `arrayMethods[i]` doesn't follow the loop increment
1441         function wrap(method) {
1442             return function() {
1443                 // cast any arguments into an array, since they're
1444                 // natively objects
1445                 var args = Array.prototype.slice.apply(arguments);
1446                 // make the first argument the array itself
1447                 args.unshift(this);
1448                 // return the result of the ss method
1449                 return ss[method].apply(ss, args);
1450             };
1451         }
1452
1453         // select object to extend
1454         var extending;
1455         if (array) {
1456             // create a shallow copy of the array so that our internal
1457             // operations do not change it by reference
1458             extending = array.slice();
1459         } else {
1460             extending = Array.prototype;
1461         }
1462
1463         // for each array function, define a function that gets
1464         // the array as the first argument.
1465         // We use [defineProperty](https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Global_Objects/Object/defineProperty)
1466         // because it allows these properties to be non-enumerable:
1467         // `for (var in x)` loops will not run into problems with this
1468         // implementation.
1469         for (var i = 0; i < arrayMethods.length; i++) {
1470             Object.defineProperty(extending, arrayMethods[i], {
1471                 value: wrap(arrayMethods[i]),
1472                 configurable: true,
1473                 enumerable: false,
1474                 writable: true
1475             });
1476         }
1477
1478         return extending;
1479     }
1480
1481     ss.linear_regression = linear_regression;
1482     ss.standard_deviation = standard_deviation;
1483     ss.r_squared = r_squared;
1484     ss.median = median;
1485     ss.mean = mean;
1486     ss.mode = mode;
1487     ss.min = min;
1488     ss.max = max;
1489     ss.sum = sum;
1490     ss.quantile = quantile;
1491     ss.quantile_sorted = quantile_sorted;
1492     ss.iqr = iqr;
1493     ss.mad = mad;
1494
1495     ss.chunk = chunk;
1496     ss.shuffle = shuffle;
1497     ss.shuffle_in_place = shuffle_in_place;
1498
1499     ss.sample = sample;
1500
1501     ss.sample_covariance = sample_covariance;
1502     ss.sample_correlation = sample_correlation;
1503     ss.sample_variance = sample_variance;
1504     ss.sample_standard_deviation = sample_standard_deviation;
1505     ss.sample_skewness = sample_skewness;
1506
1507     ss.geometric_mean = geometric_mean;
1508     ss.harmonic_mean = harmonic_mean;
1509     ss.root_mean_square = root_mean_square;
1510     ss.variance = variance;
1511     ss.t_test = t_test;
1512     ss.t_test_two_sample = t_test_two_sample;
1513
1514     // jenks
1515     ss.jenksMatrices = jenksMatrices;
1516     ss.jenksBreaks = jenksBreaks;
1517     ss.jenks = jenks;
1518
1519     ss.bayesian = bayesian;
1520
1521     // Distribution-related methods
1522     ss.epsilon = epsilon; // We make ε available to the test suite.
1523     ss.factorial = factorial;
1524     ss.bernoulli_distribution = bernoulli_distribution;
1525     ss.binomial_distribution = binomial_distribution;
1526     ss.poisson_distribution = poisson_distribution;
1527     ss.chi_squared_goodness_of_fit = chi_squared_goodness_of_fit;
1528
1529     // Normal distribution
1530     ss.z_score = z_score;
1531     ss.cumulative_std_normal_probability = cumulative_std_normal_probability;
1532     ss.standard_normal_table = standard_normal_table;
1533
1534     // Alias this into its common name
1535     ss.average = mean;
1536     ss.interquartile_range = iqr;
1537     ss.mixin = mixin;
1538     ss.median_absolute_deviation = mad;
1539     ss.rms = root_mean_square;
1540
1541 })(this);