unstack, sort: cleanup and improvement
[minix.git] / usr.bin / ministat / ministat.c
blob252d3f99fed99e98e0972a9c6cd14026f5521876
1 /*
2 * ----------------------------------------------------------------------------
3 * "THE BEER-WARE LICENSE" (Revision 42):
4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
5 * can do whatever you want with this stuff. If we meet some day, and you think
6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
7 * ----------------------------------------------------------------------------
9 */
11 #include <sys/cdefs.h>
12 #if 0
13 __FBSDID("$FreeBSD$");
14 #endif
16 #include <stdio.h>
17 #include <math.h>
18 #include <err.h>
19 #include <string.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <sys/ioctl.h>
23 #include <sys/queue.h>
24 #include <sys/ttycom.h>
26 #define NSTUDENT 100
27 #define NCONF 6
28 double const studentpct[] = { 80, 90, 95, 98, 99, 99.5 };
29 double student [NSTUDENT + 1][NCONF] = {
30 /* inf */ { 1.282, 1.645, 1.960, 2.326, 2.576, 3.090 },
31 /* 1. */ { 3.078, 6.314, 12.706, 31.821, 63.657, 318.313 },
32 /* 2. */ { 1.886, 2.920, 4.303, 6.965, 9.925, 22.327 },
33 /* 3. */ { 1.638, 2.353, 3.182, 4.541, 5.841, 10.215 },
34 /* 4. */ { 1.533, 2.132, 2.776, 3.747, 4.604, 7.173 },
35 /* 5. */ { 1.476, 2.015, 2.571, 3.365, 4.032, 5.893 },
36 /* 6. */ { 1.440, 1.943, 2.447, 3.143, 3.707, 5.208 },
37 /* 7. */ { 1.415, 1.895, 2.365, 2.998, 3.499, 4.782 },
38 /* 8. */ { 1.397, 1.860, 2.306, 2.896, 3.355, 4.499 },
39 /* 9. */ { 1.383, 1.833, 2.262, 2.821, 3.250, 4.296 },
40 /* 10. */ { 1.372, 1.812, 2.228, 2.764, 3.169, 4.143 },
41 /* 11. */ { 1.363, 1.796, 2.201, 2.718, 3.106, 4.024 },
42 /* 12. */ { 1.356, 1.782, 2.179, 2.681, 3.055, 3.929 },
43 /* 13. */ { 1.350, 1.771, 2.160, 2.650, 3.012, 3.852 },
44 /* 14. */ { 1.345, 1.761, 2.145, 2.624, 2.977, 3.787 },
45 /* 15. */ { 1.341, 1.753, 2.131, 2.602, 2.947, 3.733 },
46 /* 16. */ { 1.337, 1.746, 2.120, 2.583, 2.921, 3.686 },
47 /* 17. */ { 1.333, 1.740, 2.110, 2.567, 2.898, 3.646 },
48 /* 18. */ { 1.330, 1.734, 2.101, 2.552, 2.878, 3.610 },
49 /* 19. */ { 1.328, 1.729, 2.093, 2.539, 2.861, 3.579 },
50 /* 20. */ { 1.325, 1.725, 2.086, 2.528, 2.845, 3.552 },
51 /* 21. */ { 1.323, 1.721, 2.080, 2.518, 2.831, 3.527 },
52 /* 22. */ { 1.321, 1.717, 2.074, 2.508, 2.819, 3.505 },
53 /* 23. */ { 1.319, 1.714, 2.069, 2.500, 2.807, 3.485 },
54 /* 24. */ { 1.318, 1.711, 2.064, 2.492, 2.797, 3.467 },
55 /* 25. */ { 1.316, 1.708, 2.060, 2.485, 2.787, 3.450 },
56 /* 26. */ { 1.315, 1.706, 2.056, 2.479, 2.779, 3.435 },
57 /* 27. */ { 1.314, 1.703, 2.052, 2.473, 2.771, 3.421 },
58 /* 28. */ { 1.313, 1.701, 2.048, 2.467, 2.763, 3.408 },
59 /* 29. */ { 1.311, 1.699, 2.045, 2.462, 2.756, 3.396 },
60 /* 30. */ { 1.310, 1.697, 2.042, 2.457, 2.750, 3.385 },
61 /* 31. */ { 1.309, 1.696, 2.040, 2.453, 2.744, 3.375 },
62 /* 32. */ { 1.309, 1.694, 2.037, 2.449, 2.738, 3.365 },
63 /* 33. */ { 1.308, 1.692, 2.035, 2.445, 2.733, 3.356 },
64 /* 34. */ { 1.307, 1.691, 2.032, 2.441, 2.728, 3.348 },
65 /* 35. */ { 1.306, 1.690, 2.030, 2.438, 2.724, 3.340 },
66 /* 36. */ { 1.306, 1.688, 2.028, 2.434, 2.719, 3.333 },
67 /* 37. */ { 1.305, 1.687, 2.026, 2.431, 2.715, 3.326 },
68 /* 38. */ { 1.304, 1.686, 2.024, 2.429, 2.712, 3.319 },
69 /* 39. */ { 1.304, 1.685, 2.023, 2.426, 2.708, 3.313 },
70 /* 40. */ { 1.303, 1.684, 2.021, 2.423, 2.704, 3.307 },
71 /* 41. */ { 1.303, 1.683, 2.020, 2.421, 2.701, 3.301 },
72 /* 42. */ { 1.302, 1.682, 2.018, 2.418, 2.698, 3.296 },
73 /* 43. */ { 1.302, 1.681, 2.017, 2.416, 2.695, 3.291 },
74 /* 44. */ { 1.301, 1.680, 2.015, 2.414, 2.692, 3.286 },
75 /* 45. */ { 1.301, 1.679, 2.014, 2.412, 2.690, 3.281 },
76 /* 46. */ { 1.300, 1.679, 2.013, 2.410, 2.687, 3.277 },
77 /* 47. */ { 1.300, 1.678, 2.012, 2.408, 2.685, 3.273 },
78 /* 48. */ { 1.299, 1.677, 2.011, 2.407, 2.682, 3.269 },
79 /* 49. */ { 1.299, 1.677, 2.010, 2.405, 2.680, 3.265 },
80 /* 50. */ { 1.299, 1.676, 2.009, 2.403, 2.678, 3.261 },
81 /* 51. */ { 1.298, 1.675, 2.008, 2.402, 2.676, 3.258 },
82 /* 52. */ { 1.298, 1.675, 2.007, 2.400, 2.674, 3.255 },
83 /* 53. */ { 1.298, 1.674, 2.006, 2.399, 2.672, 3.251 },
84 /* 54. */ { 1.297, 1.674, 2.005, 2.397, 2.670, 3.248 },
85 /* 55. */ { 1.297, 1.673, 2.004, 2.396, 2.668, 3.245 },
86 /* 56. */ { 1.297, 1.673, 2.003, 2.395, 2.667, 3.242 },
87 /* 57. */ { 1.297, 1.672, 2.002, 2.394, 2.665, 3.239 },
88 /* 58. */ { 1.296, 1.672, 2.002, 2.392, 2.663, 3.237 },
89 /* 59. */ { 1.296, 1.671, 2.001, 2.391, 2.662, 3.234 },
90 /* 60. */ { 1.296, 1.671, 2.000, 2.390, 2.660, 3.232 },
91 /* 61. */ { 1.296, 1.670, 2.000, 2.389, 2.659, 3.229 },
92 /* 62. */ { 1.295, 1.670, 1.999, 2.388, 2.657, 3.227 },
93 /* 63. */ { 1.295, 1.669, 1.998, 2.387, 2.656, 3.225 },
94 /* 64. */ { 1.295, 1.669, 1.998, 2.386, 2.655, 3.223 },
95 /* 65. */ { 1.295, 1.669, 1.997, 2.385, 2.654, 3.220 },
96 /* 66. */ { 1.295, 1.668, 1.997, 2.384, 2.652, 3.218 },
97 /* 67. */ { 1.294, 1.668, 1.996, 2.383, 2.651, 3.216 },
98 /* 68. */ { 1.294, 1.668, 1.995, 2.382, 2.650, 3.214 },
99 /* 69. */ { 1.294, 1.667, 1.995, 2.382, 2.649, 3.213 },
100 /* 70. */ { 1.294, 1.667, 1.994, 2.381, 2.648, 3.211 },
101 /* 71. */ { 1.294, 1.667, 1.994, 2.380, 2.647, 3.209 },
102 /* 72. */ { 1.293, 1.666, 1.993, 2.379, 2.646, 3.207 },
103 /* 73. */ { 1.293, 1.666, 1.993, 2.379, 2.645, 3.206 },
104 /* 74. */ { 1.293, 1.666, 1.993, 2.378, 2.644, 3.204 },
105 /* 75. */ { 1.293, 1.665, 1.992, 2.377, 2.643, 3.202 },
106 /* 76. */ { 1.293, 1.665, 1.992, 2.376, 2.642, 3.201 },
107 /* 77. */ { 1.293, 1.665, 1.991, 2.376, 2.641, 3.199 },
108 /* 78. */ { 1.292, 1.665, 1.991, 2.375, 2.640, 3.198 },
109 /* 79. */ { 1.292, 1.664, 1.990, 2.374, 2.640, 3.197 },
110 /* 80. */ { 1.292, 1.664, 1.990, 2.374, 2.639, 3.195 },
111 /* 81. */ { 1.292, 1.664, 1.990, 2.373, 2.638, 3.194 },
112 /* 82. */ { 1.292, 1.664, 1.989, 2.373, 2.637, 3.193 },
113 /* 83. */ { 1.292, 1.663, 1.989, 2.372, 2.636, 3.191 },
114 /* 84. */ { 1.292, 1.663, 1.989, 2.372, 2.636, 3.190 },
115 /* 85. */ { 1.292, 1.663, 1.988, 2.371, 2.635, 3.189 },
116 /* 86. */ { 1.291, 1.663, 1.988, 2.370, 2.634, 3.188 },
117 /* 87. */ { 1.291, 1.663, 1.988, 2.370, 2.634, 3.187 },
118 /* 88. */ { 1.291, 1.662, 1.987, 2.369, 2.633, 3.185 },
119 /* 89. */ { 1.291, 1.662, 1.987, 2.369, 2.632, 3.184 },
120 /* 90. */ { 1.291, 1.662, 1.987, 2.368, 2.632, 3.183 },
121 /* 91. */ { 1.291, 1.662, 1.986, 2.368, 2.631, 3.182 },
122 /* 92. */ { 1.291, 1.662, 1.986, 2.368, 2.630, 3.181 },
123 /* 93. */ { 1.291, 1.661, 1.986, 2.367, 2.630, 3.180 },
124 /* 94. */ { 1.291, 1.661, 1.986, 2.367, 2.629, 3.179 },
125 /* 95. */ { 1.291, 1.661, 1.985, 2.366, 2.629, 3.178 },
126 /* 96. */ { 1.290, 1.661, 1.985, 2.366, 2.628, 3.177 },
127 /* 97. */ { 1.290, 1.661, 1.985, 2.365, 2.627, 3.176 },
128 /* 98. */ { 1.290, 1.661, 1.984, 2.365, 2.627, 3.175 },
129 /* 99. */ { 1.290, 1.660, 1.984, 2.365, 2.626, 3.175 },
130 /* 100. */ { 1.290, 1.660, 1.984, 2.364, 2.626, 3.174 }
133 #define MAX_DS 8
134 static char symbol[MAX_DS] = { ' ', 'x', '+', '*', '%', '#', '@', 'O' };
136 struct dataset {
137 char *name;
138 double *points;
139 unsigned lpoints;
140 double sy, syy;
141 unsigned n;
144 static struct dataset *
145 NewSet(void)
147 struct dataset *ds;
149 ds = calloc(1, sizeof *ds);
150 ds->lpoints = 100000;
151 ds->points = calloc(sizeof *ds->points, ds->lpoints);
152 return(ds);
155 static void
156 AddPoint(struct dataset *ds, double a)
158 double *dp;
160 if (ds->n >= ds->lpoints) {
161 dp = ds->points;
162 ds->lpoints *= 4;
163 ds->points = calloc(sizeof *ds->points, ds->lpoints);
164 memcpy(ds->points, dp, sizeof *dp * ds->n);
165 free(dp);
167 ds->points[ds->n++] = a;
168 ds->sy += a;
169 ds->syy += a * a;
172 static double
173 Min(struct dataset *ds)
176 return (ds->points[0]);
179 static double
180 Max(struct dataset *ds)
183 return (ds->points[ds->n -1]);
186 static double
187 Avg(struct dataset *ds)
190 return(ds->sy / ds->n);
193 static double
194 Median(struct dataset *ds)
197 return (ds->points[ds->n / 2]);
200 static double
201 Var(struct dataset *ds)
204 return (ds->syy - ds->sy * ds->sy / ds->n) / (ds->n - 1.0);
207 static double
208 Stddev(struct dataset *ds)
211 return sqrt(Var(ds));
214 static void
215 VitalsHead(void)
218 printf(" N Min Max Median Avg Stddev\n");
221 static void
222 Vitals(struct dataset *ds, int flag)
225 printf("%c %3d %17.12g %17.12g %17.12g %17.12g %17.12g", symbol[flag],
226 ds->n, Min(ds), Max(ds), Median(ds), Avg(ds), Stddev(ds));
227 printf("\n");
230 static void
231 Relative(struct dataset *ds, struct dataset *rs, int confidx)
233 double spool, s, d, e, t;
234 int i;
236 i = ds->n + rs->n - 2;
237 if (i > NSTUDENT)
238 t = student[0][confidx];
239 else
240 t = student[i][confidx];
241 spool = (ds->n - 1) * Var(ds) + (rs->n - 1) * Var(rs);
242 spool /= ds->n + rs->n - 2;
243 spool = sqrt(spool);
244 s = spool * sqrt(1.0 / ds->n + 1.0 / rs->n);
245 d = Avg(ds) - Avg(rs);
246 e = t * s;
248 if (fabs(d) > e) {
250 printf("Difference at %.1f%% confidence\n", studentpct[confidx]);
251 printf(" %g +/- %g\n", d, e);
252 printf(" %g%% +/- %g%%\n", d * 100 / Avg(rs), e * 100 / Avg(rs));
253 printf(" (Student's t, pooled s = %g)\n", spool);
254 } else {
255 printf("No difference proven at %.1f%% confidence\n",
256 studentpct[confidx]);
260 struct plot {
261 double min;
262 double max;
263 double span;
264 int width;
266 double x0, dx;
267 int height;
268 char *data;
269 char **bar;
270 int separate_bars;
271 int num_datasets;
274 static struct plot plot;
276 static void
277 SetupPlot(int width, int separate, int num_datasets)
279 struct plot *pl;
281 pl = &plot;
282 pl->width = width;
283 pl->height = 0;
284 pl->data = NULL;
285 pl->bar = NULL;
286 pl->separate_bars = separate;
287 pl->num_datasets = num_datasets;
288 pl->min = 999e99;
289 pl->max = -999e99;
292 static void
293 AdjPlot(double a)
295 struct plot *pl;
297 pl = &plot;
298 if (a < pl->min)
299 pl->min = a;
300 if (a > pl->max)
301 pl->max = a;
302 pl->span = pl->max - pl->min;
303 pl->dx = pl->span / (pl->width - 1.0);
304 pl->x0 = pl->min - .5 * pl->dx;
307 static void
308 DimPlot(struct dataset *ds)
310 AdjPlot(Min(ds));
311 AdjPlot(Max(ds));
312 AdjPlot(Avg(ds) - Stddev(ds));
313 AdjPlot(Avg(ds) + Stddev(ds));
316 static void
317 PlotSet(struct dataset *ds, int val)
319 struct plot *pl;
320 int i, j, m, x;
321 unsigned n;
322 int bar;
324 pl = &plot;
325 if (pl->span == 0)
326 return;
328 if (pl->separate_bars)
329 bar = val-1;
330 else
331 bar = 0;
333 if (pl->bar == NULL) {
334 pl->bar = malloc(sizeof(char *) * pl->num_datasets);
335 memset(pl->bar, 0, sizeof(char*) * pl->num_datasets);
337 if (pl->bar[bar] == NULL) {
338 pl->bar[bar] = malloc(pl->width);
339 memset(pl->bar[bar], 0, pl->width);
342 m = 1;
343 i = -1;
344 j = 0;
345 for (n = 0; n < ds->n; n++) {
346 x = (ds->points[n] - pl->x0) / pl->dx;
347 if (x == i) {
348 j++;
349 if (j > m)
350 m = j;
351 } else {
352 j = 1;
353 i = x;
356 m += 1;
357 if (m > pl->height) {
358 pl->data = realloc(pl->data, pl->width * m);
359 memset(pl->data + pl->height * pl->width, 0,
360 (m - pl->height) * pl->width);
362 pl->height = m;
363 i = -1;
364 for (n = 0; n < ds->n; n++) {
365 x = (ds->points[n] - pl->x0) / pl->dx;
366 if (x == i) {
367 j++;
368 } else {
369 j = 1;
370 i = x;
372 pl->data[j * pl->width + x] |= val;
374 if (!isnan(Stddev(ds))) {
375 x = ((Avg(ds) - Stddev(ds)) - pl->x0) / pl->dx;
376 m = ((Avg(ds) + Stddev(ds)) - pl->x0) / pl->dx;
377 pl->bar[bar][m] = '|';
378 pl->bar[bar][x] = '|';
379 for (i = x + 1; i < m; i++)
380 if (pl->bar[bar][i] == 0)
381 pl->bar[bar][i] = '_';
383 x = (Median(ds) - pl->x0) / pl->dx;
384 pl->bar[bar][x] = 'M';
385 x = (Avg(ds) - pl->x0) / pl->dx;
386 pl->bar[bar][x] = 'A';
389 static void
390 DumpPlot(void)
392 struct plot *pl;
393 int i, j, k;
395 pl = &plot;
396 if (pl->span == 0) {
397 printf("[no plot, span is zero width]\n");
398 return;
401 putchar('+');
402 for (i = 0; i < pl->width; i++)
403 putchar('-');
404 putchar('+');
405 putchar('\n');
406 for (i = 1; i < pl->height; i++) {
407 putchar('|');
408 for (j = 0; j < pl->width; j++) {
409 k = pl->data[(pl->height - i) * pl->width + j];
410 if (k >= 0 && k < MAX_DS)
411 putchar(symbol[k]);
412 else
413 printf("[%02x]", k);
415 putchar('|');
416 putchar('\n');
418 for (i = 0; i < pl->num_datasets; i++) {
419 if (pl->bar[i] == NULL)
420 continue;
421 putchar('|');
422 for (j = 0; j < pl->width; j++) {
423 k = pl->bar[i][j];
424 if (k == 0)
425 k = ' ';
426 putchar(k);
428 putchar('|');
429 putchar('\n');
431 putchar('+');
432 for (i = 0; i < pl->width; i++)
433 putchar('-');
434 putchar('+');
435 putchar('\n');
438 static int
439 dbl_cmp(const void *a, const void *b)
441 const double *aa = a;
442 const double *bb = b;
444 if (*aa < *bb)
445 return (-1);
446 else if (*aa > *bb)
447 return (1);
448 else
449 return (0);
452 static struct dataset *
453 ReadSet(const char *n, int column, const char *delim)
455 FILE *f;
456 char buf[BUFSIZ], *p, *t;
457 struct dataset *s;
458 double d;
459 int line;
460 int i;
462 if (n == NULL) {
463 f = stdin;
464 n = "<stdin>";
465 } else if (!strcmp(n, "-")) {
466 f = stdin;
467 n = "<stdin>";
468 } else {
469 f = fopen(n, "r");
471 if (f == NULL)
472 err(1, "Cannot open %s", n);
473 s = NewSet();
474 s->name = strdup(n);
475 line = 0;
476 while (fgets(buf, sizeof buf, f) != NULL) {
477 line++;
479 i = strlen(buf);
480 if (buf[i-1] == '\n')
481 buf[i-1] = '\0';
482 for (i = 1, t = strtok(buf, delim);
483 t != NULL && *t != '#';
484 i++, t = strtok(NULL, delim)) {
485 if (i == column)
486 break;
488 if (t == NULL || *t == '#')
489 continue;
491 d = strtod(t, &p);
492 if (p != NULL && *p != '\0')
493 err(2, "Invalid data on line %d in %s\n", line, n);
494 if (*buf != '\0')
495 AddPoint(s, d);
497 fclose(f);
498 if (s->n < 3) {
499 fprintf(stderr,
500 "Dataset %s must contain at least 3 data points\n", n);
501 exit (2);
503 qsort(s->points, s->n, sizeof *s->points, dbl_cmp);
504 return (s);
507 static void
508 usage(char const *whine)
510 int i;
512 fprintf(stderr, "%s\n", whine);
513 fprintf(stderr,
514 "Usage: ministat [-C column] [-c confidence] [-d delimiter(s)] [-ns] [-w width] [file [file ...]]\n");
515 fprintf(stderr, "\tconfidence = {");
516 for (i = 0; i < NCONF; i++) {
517 fprintf(stderr, "%s%g%%",
518 i ? ", " : "",
519 studentpct[i]);
521 fprintf(stderr, "}\n");
522 fprintf(stderr, "\t-C : column number to extract (starts and defaults to 1)\n");
523 fprintf(stderr, "\t-d : delimiter(s) string, default to \" \\t\"\n");
524 fprintf(stderr, "\t-n : print summary statistics only, no graph/test\n");
525 fprintf(stderr, "\t-s : print avg/median/stddev bars on separate lines\n");
526 fprintf(stderr, "\t-w : width of graph/test output (default 74 or terminal width)\n");
527 exit (2);
531 main(int argc, char **argv)
533 struct dataset *ds[7];
534 int nds;
535 double a;
536 const char *delim = " \t";
537 char *p;
538 int c, i, ci;
539 int column = 1;
540 int flag_s = 0;
541 int flag_n = 0;
542 int termwidth = 74;
544 if (isatty(STDOUT_FILENO)) {
545 struct winsize wsz;
547 if ((p = getenv("COLUMNS")) != NULL && *p != '\0')
548 termwidth = atoi(p);
549 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &wsz) != -1 &&
550 wsz.ws_col > 0)
551 termwidth = wsz.ws_col - 2;
554 ci = -1;
555 while ((c = getopt(argc, argv, "C:c:d:snw:")) != -1)
556 switch (c) {
557 case 'C':
558 column = strtol(optarg, &p, 10);
559 if (p != NULL && *p != '\0')
560 usage("Invalid column number.");
561 if (column <= 0)
562 usage("Column number should be positive.");
563 break;
564 case 'c':
565 a = strtod(optarg, &p);
566 if (p != NULL && *p != '\0')
567 usage("Not a floating point number");
568 for (i = 0; i < NCONF; i++)
569 if (a == studentpct[i])
570 ci = i;
571 if (ci == -1)
572 usage("No support for confidence level");
573 break;
574 case 'd':
575 if (*optarg == '\0')
576 usage("Can't use empty delimiter string");
577 delim = optarg;
578 break;
579 case 'n':
580 flag_n = 1;
581 break;
582 case 's':
583 flag_s = 1;
584 break;
585 case 'w':
586 termwidth = strtol(optarg, &p, 10);
587 if (p != NULL && *p != '\0')
588 usage("Invalid width, not a number.");
589 if (termwidth < 0)
590 usage("Unable to move beyond left margin.");
591 break;
592 default:
593 usage("Unknown option");
594 break;
596 if (ci == -1)
597 ci = 2;
598 argc -= optind;
599 argv += optind;
601 if (argc == 0) {
602 ds[0] = ReadSet("-", column, delim);
603 nds = 1;
604 } else {
605 if (argc > (MAX_DS - 1))
606 usage("Too many datasets.");
607 nds = argc;
608 for (i = 0; i < nds; i++)
609 ds[i] = ReadSet(argv[i], column, delim);
612 for (i = 0; i < nds; i++)
613 printf("%c %s\n", symbol[i+1], ds[i]->name);
615 if (!flag_n) {
616 SetupPlot(termwidth, flag_s, nds);
617 for (i = 0; i < nds; i++)
618 DimPlot(ds[i]);
619 for (i = 0; i < nds; i++)
620 PlotSet(ds[i], i + 1);
621 DumpPlot();
623 VitalsHead();
624 Vitals(ds[0], 1);
625 for (i = 1; i < nds; i++) {
626 Vitals(ds[i], i + 1);
627 if (!flag_n)
628 Relative(ds[i], ds[0], ci);
630 exit(0);