unstack, sort: cleanup and improvement
[minix.git] / commands / sprofdiff / sprofdiff.c
blob9232a089ee142141c7fe97f6003aa5ac59b81878
1 #include <assert.h>
2 #include <errno.h>
3 #include <math.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <unistd.h>
9 #include "tdist.h"
11 /* user-configurable settings */
12 #define DEBUG 0
14 #define PROC_NAME_WIDTH 10
16 #define SYMBOL_NAME_WIDTH 24
18 /* types */
19 #define SYMBOL_HASHTAB_SIZE 1024
21 #define SYMBOL_NAME_SIZE 52
23 struct symbol_count {
24 unsigned long sum;
25 unsigned long long sum2;
26 unsigned long min;
27 unsigned long max;
30 enum symbol_class {
31 sc_total,
32 sc_idle,
33 sc_system,
34 sc_user,
35 sc_process,
36 sc_symbol
39 struct symbol_info {
40 struct symbol_info *next;
41 struct symbol_info *hashtab_next;
42 char binary[PROC_NAME_LEN];
43 char name[SYMBOL_NAME_SIZE];
44 struct symbol_count count[2];
45 long diff;
46 enum symbol_class class;
49 /* global variables */
50 static unsigned n1, n2;
51 static struct symbol_info *symbols;
52 static struct symbol_info *symbol_hashtab[SYMBOL_HASHTAB_SIZE];
54 /* prototypes */
55 static double compute_sig(double avg1, double var1, double avg2, double var2);
56 static void compute_stats(const struct symbol_count *count, unsigned n,
57 double *avg, double *var);
58 static void load_file(const char *path, int count_index);
59 static void *malloc_checked(size_t size);
60 static void print_report(void);
61 static void print_report_line(const struct symbol_info *symbol);
62 static int read_line(FILE *file, const char *path, int line, char *binary,
63 char *name, unsigned long *samples);
64 static enum symbol_class symbol_classify(const char *binary, const char *name);
65 static unsigned string_hash(const char *s, size_t size);
66 static struct symbol_info *symbol_find_or_add(const char *binary,
67 const char *name);
68 static unsigned symbol_hash(const char *binary, const char *name);
69 static int symbol_qsort_compare(const void *p1, const void *p2);
70 static void symbol_tally(const char *binary, const char *name,
71 unsigned long samples, int count_index);
72 static unsigned symbols_count(void);
73 static void usage(const char *argv0);
75 #define MALLOC_CHECKED(type, count) \
76 ((type *) malloc_checked(sizeof(type) * (count)))
78 #if DEBUG
79 #define dprintf(...) do { \
80 fprintf(stderr, "debug(%s:%d): ", __FUNCTION__, __LINE__); \
81 fprintf(stderr, __VA_ARGS__); \
82 } while(0)
83 #else
84 #define dprintf(...)
85 #endif
87 int main(int argc, char **argv) {
88 int i;
90 #ifdef DEBUG
91 /* disable buffering so the output mixes correctly */
92 setvbuf(stdout, NULL, _IONBF, 0);
93 setvbuf(stderr, NULL, _IONBF, 0);
94 #endif
96 if (argc < 3) usage(argv[0]);
98 /* load left-hand files */
99 for (i = 1; i < argc; i++) {
100 if (strcmp(argv[i], "-r") == 0) {
101 i++;
102 break;
104 if (argc == 3 && i == 2) break;
105 load_file(argv[i], 0);
106 n1++;
109 /* load right-hand files */
110 for (; i < argc; i++) {
111 load_file(argv[i], 1);
112 n2++;
115 if (n1 < 1 || n2 < 1) usage(argv[0]);
117 /* report analysis results */
118 print_report();
119 return 0;
122 static double compute_sig(double avg1, double var1, double avg2, double var2) {
123 double df, t, var;
125 /* prevent division by zero with lack of variance */
126 var = var1 / n1 + var2 / n2;
127 if (var <= 0 || n1 <= 1 || n2 <= 1) return -1;
129 /* do we have enough degrees of freedom? */
130 df = var * var / (
131 var1 * var1 / (n1 * n1 * (n1 - 1)) +
132 var2 * var2 / (n2 * n2 * (n2 - 1)));
133 if (df < 1) return -1;
135 /* perform t-test */
136 t = (avg1 - avg2) / sqrt(var);
137 return student_t_p_2tail(t, df);
140 static void compute_stats(const struct symbol_count *count, unsigned n,
141 double *avg, double *var) {
142 double sum;
144 assert(count);
145 assert(avg);
146 assert(var);
148 sum = count->sum;
149 if (n < 1) {
150 *avg = 0;
151 } else {
152 *avg = sum / n;
155 if (n < 2) {
156 *var = 0;
157 } else {
158 *var = (count->sum2 - sum * sum / n) / (n - 1);
162 static void load_file(const char *path, int count_index) {
163 char binary[PROC_NAME_LEN];
164 FILE *file;
165 int line;
166 char name[SYMBOL_NAME_SIZE];
167 unsigned long samples;
169 assert(path);
170 assert(count_index == 0 || count_index == 1);
172 file = fopen(path, "r");
173 if (!file) {
174 fprintf(stderr, "error: cannot open \"%s\": %s\n",
175 path, strerror(errno));
176 exit(1);
179 line = 1;
180 while (read_line(file, path, line++, binary, name, &samples)) {
181 symbol_tally(binary, name, samples, count_index);
184 fclose(file);
187 static void *malloc_checked(size_t size) {
188 void *p;
189 if (!size) return NULL;
190 p = malloc(size);
191 if (!p) {
192 fprintf(stderr, "error: malloc cannot allocate %lu bytes: %s\n",
193 (unsigned long) size, strerror(errno));
194 exit(-1);
196 return p;
199 static void print_report(void) {
200 unsigned i, index, symbol_count;
201 struct symbol_info *symbol, **symbol_list;
203 /* list the symbols in an array for sorting */
204 symbol_count = symbols_count();
205 symbol_list = MALLOC_CHECKED(struct symbol_info *, symbol_count);
206 index = 0;
207 for (symbol = symbols; symbol; symbol = symbol->next) {
208 symbol_list[index++] = symbol;
210 /* sort by difference in average, multiply both sides by
211 * n1 * n2 to avoid division
213 symbol->diff = (long) (symbol->count[1].sum * n1) -
214 (long) (symbol->count[0].sum * n2);
216 assert(index == symbol_count);
218 /* sort symbols */
219 qsort(symbol_list, symbol_count, sizeof(struct symbol_info *),
220 symbol_qsort_compare);
222 printf("%-*s %-*s ------avg------ ----stdev---- diff sig\n",
223 PROC_NAME_WIDTH, "binary", SYMBOL_NAME_WIDTH, "symbol");
224 printf("%-*s left right left right\n",
225 PROC_NAME_WIDTH + SYMBOL_NAME_WIDTH + 1, "");
226 printf("\n");
227 for (i = 0; i < symbol_count; i++) {
228 if (i > 0 && symbol_list[i]->class >= sc_process &&
229 symbol_list[i]->class != symbol_list[i - 1]->class) {
230 printf("\n");
232 print_report_line(symbol_list[i]);
234 printf("\n");
235 printf("significance levels (two-tailed):\n");
236 printf(" * p < 0.05\n");
237 printf(" ** p < 0.01\n");
238 printf(" *** p < 0.001\n");
239 free(symbol_list);
242 static void print_report_line(const struct symbol_info *symbol) {
243 double avg1, avg2, p, var1, var2;
245 /* compute statistics; t is Welch's t, which is a t-test that allows
246 * for unpaired samples with unequal variance; df is the degrees of
247 * freedom as given by the Welch-Satterthwaite equation
249 compute_stats(&symbol->count[0], n1, &avg1, &var1);
250 compute_stats(&symbol->count[1], n2, &avg2, &var2);
251 p = compute_sig(avg1, var1, avg2, var2);
253 /* list applicable values */
254 assert(PROC_NAME_WIDTH <= PROC_NAME_LEN);
255 assert(SYMBOL_NAME_WIDTH <= SYMBOL_NAME_SIZE);
256 printf("%-*.*s %-*.*s",
257 PROC_NAME_WIDTH, PROC_NAME_WIDTH, symbol->binary,
258 SYMBOL_NAME_WIDTH, SYMBOL_NAME_WIDTH, symbol->name);
259 if (symbol->count[0].sum > 0) {
260 printf("%8.0f", avg1);
261 } else {
262 printf(" ");
264 if (symbol->count[1].sum > 0) {
265 printf("%8.0f", avg2);
266 } else {
267 printf(" ");
269 if (symbol->count[0].sum > 0 && n1 >= 2) {
270 printf("%7.0f", sqrt(var1));
271 } else {
272 printf(" ");
274 if (symbol->count[1].sum > 0 && n2 >= 2) {
275 printf("%7.0f", sqrt(var2));
276 } else {
277 printf(" ");
279 printf("%8.0f ", avg2 - avg1);
280 if (p >= 0) {
281 if (p <= 0.05) printf("*");
282 if (p <= 0.01) printf("*");
283 if (p <= 0.001) printf("*");
285 printf("\n");
288 static int read_line(FILE *file, const char *path, int line, char *binary,
289 char *name, unsigned long *samples) {
290 int c, index;
292 assert(file);
293 assert(binary);
294 assert(name);
295 assert(samples);
297 c = fgetc(file);
298 if (c == EOF) return 0;
300 /* read binary name, truncating if necessary */
301 index = 0;
302 while (c != '\t' && c != '\n') {
303 if (index < PROC_NAME_LEN) binary[index++] = c;
304 c = fgetc(file);
306 if (index < PROC_NAME_LEN) binary[index] = 0;
308 /* read tab */
309 if (c != '\t') {
310 fprintf(stderr, "error: garbage %d after binary name "
311 "(\"%s\", line %d)\n", c, path, line);
312 exit(1);
314 c = fgetc(file);
316 /* read symbol name, truncating if necessary */
317 index = 0;
318 while (c != '\t' && c != '\n') {
319 if (index < SYMBOL_NAME_SIZE) name[index++] = c;
320 c = fgetc(file);
322 if (index < SYMBOL_NAME_SIZE) name[index] = 0;
324 /* read tab */
325 if (c != '\t') {
326 fprintf(stderr, "error: garbage %d after symbol name "
327 "(\"%s\", line %d)\n", c, path, line);
328 exit(1);
330 c = fgetc(file);
332 /* read number of samples */
333 *samples = 0;
334 while (c >= '0' && c <= '9') {
335 *samples = *samples * 10 + (c - '0');
336 c = fgetc(file);
339 /* read newline */
340 if (c != '\n') {
341 fprintf(stderr, "error: garbage %d after sample count "
342 "(\"%s\", line %d)\n", c, path, line);
343 exit(1);
345 return 1;
348 static unsigned string_hash(const char *s, size_t size) {
349 unsigned result = 0;
351 assert(s);
353 while (*s && size-- > 0) {
354 result = result * 31 + *(s++);
356 return result;
359 static enum symbol_class symbol_classify(const char *binary, const char *name) {
360 if (strncmp(binary, "(total)", PROC_NAME_LEN) == 0) return sc_total;
361 if (strncmp(binary, "(idle)", PROC_NAME_LEN) == 0) return sc_idle;
362 if (strncmp(binary, "(system)", PROC_NAME_LEN) == 0) return sc_system;
363 if (strncmp(binary, "(user)", PROC_NAME_LEN) == 0) return sc_user;
364 if (strncmp(name, "(total)", SYMBOL_NAME_SIZE) == 0) return sc_process;
365 return sc_symbol;
368 static struct symbol_info *symbol_find_or_add(const char *binary,
369 const char *name) {
370 struct symbol_info **ptr, *symbol;
372 assert(binary);
373 assert(name);
375 /* look up symbol in hash table */
376 ptr = &symbol_hashtab[symbol_hash(binary, name) % SYMBOL_HASHTAB_SIZE];
377 while ((symbol = *ptr)) {
378 if (strncmp(symbol->binary, binary, PROC_NAME_LEN) == 0 &&
379 strncmp(symbol->name, name, SYMBOL_NAME_SIZE) == 0) {
380 return symbol;
382 ptr = &symbol->hashtab_next;
385 /* unknown symbol, add it */
386 *ptr = symbol = MALLOC_CHECKED(struct symbol_info, 1);
387 memset(symbol, 0, sizeof(struct symbol_info));
388 strncpy(symbol->binary, binary, PROC_NAME_LEN);
389 strncpy(symbol->name, name, SYMBOL_NAME_SIZE);
390 symbol->count[0].min = ~0UL;
391 symbol->count[1].min = ~0UL;
392 symbol->class = symbol_classify(binary, name);
394 /* also add to linked list */
395 symbol->next = symbols;
396 symbols = symbol;
397 return symbol;
400 static unsigned symbol_hash(const char *binary, const char *name) {
401 return string_hash(binary, PROC_NAME_LEN) +
402 string_hash(name, SYMBOL_NAME_SIZE);
405 static int symbol_qsort_compare(const void *p1, const void *p2) {
406 int r;
407 const struct symbol_info *s1, *s2;
409 assert(p1);
410 assert(p2);
411 s1 = *(const struct symbol_info **) p1;
412 s2 = *(const struct symbol_info **) p2;
413 assert(s1);
414 assert(s2);
416 /* totals come first */
417 if (s1->class < s2->class) return -1;
418 if (s1->class > s2->class) return 1;
420 /* sort by difference in average */
421 if (s1->diff < s2->diff) return -1;
422 if (s1->diff > s2->diff) return 1;
424 /* otherwise, by name */
425 r = strncmp(s1->binary, s2->binary, PROC_NAME_LEN);
426 if (r) return r;
428 return strncmp(s1->name, s2->name, SYMBOL_NAME_SIZE);
431 static void symbol_tally(const char *binary, const char *name,
432 unsigned long samples, int count_index) {
433 struct symbol_count *count;
434 struct symbol_info *symbol;
436 /* look up or add symbol */
437 symbol = symbol_find_or_add(binary, name);
439 /* update count */
440 count = &symbol->count[count_index];
441 count->sum += samples;
442 count->sum2 += (unsigned long long) samples * samples;
443 if (count->min > samples) count->min = samples;
444 if (count->max < samples) count->max = samples;
447 static unsigned symbols_count(void) {
448 int count = 0;
449 const struct symbol_info *symbol;
451 for (symbol = symbols; symbol; symbol = symbol->next) {
452 count++;
454 return count;
457 static void usage(const char *argv0) {
458 printf("usage:\n");
459 printf(" %s leftfile rightfile\n", argv0);
460 printf(" %s leftfile... -r rightfile...\n", argv0);
461 printf("\n");
462 printf("sprofdiff compares the sprofile information from multiple\n");
463 printf("output files of sprofalyze -d.\n");
464 exit(1);