8 #include <minix/type.h>
12 /* user-configurable settings */
15 #define PROC_NAME_WIDTH 10
17 #define SYMBOL_NAME_WIDTH 24
20 #define SYMBOL_HASHTAB_SIZE 1024
22 #define SYMBOL_NAME_SIZE 52
26 unsigned long long sum2
;
41 struct symbol_info
*next
;
42 struct symbol_info
*hashtab_next
;
43 char binary
[PROC_NAME_LEN
];
44 char name
[SYMBOL_NAME_SIZE
];
45 struct symbol_count count
[2];
47 enum symbol_class
class;
50 /* global variables */
51 static unsigned n1
, n2
;
52 static struct symbol_info
*symbols
;
53 static struct symbol_info
*symbol_hashtab
[SYMBOL_HASHTAB_SIZE
];
56 static double compute_sig(double avg1
, double var1
, double avg2
, double var2
);
57 static void compute_stats(const struct symbol_count
*count
, unsigned n
,
58 double *avg
, double *var
);
59 static void load_file(const char *path
, int count_index
);
60 static void *malloc_checked(size_t size
);
61 static void print_report(void);
62 static void print_report_line(const struct symbol_info
*symbol
);
63 static int read_line(FILE *file
, const char *path
, int line
, char *binary
,
64 char *name
, unsigned long *samples
);
65 static enum symbol_class
symbol_classify(const char *binary
, const char *name
);
66 static unsigned string_hash(const char *s
, size_t size
);
67 static struct symbol_info
*symbol_find_or_add(const char *binary
,
69 static unsigned symbol_hash(const char *binary
, const char *name
);
70 static int symbol_qsort_compare(const void *p1
, const void *p2
);
71 static void symbol_tally(const char *binary
, const char *name
,
72 unsigned long samples
, int count_index
);
73 static unsigned symbols_count(void);
74 static void usage(const char *argv0
);
76 #define MALLOC_CHECKED(type, count) \
77 ((type *) malloc_checked(sizeof(type) * (count)))
80 #define dprintf(...) do { \
81 fprintf(stderr, "debug(%s:%d): ", __FUNCTION__, __LINE__); \
82 fprintf(stderr, __VA_ARGS__); \
88 int main(int argc
, char **argv
) {
92 /* disable buffering so the output mixes correctly */
93 setvbuf(stdout
, NULL
, _IONBF
, 0);
94 setvbuf(stderr
, NULL
, _IONBF
, 0);
97 if (argc
< 3) usage(argv
[0]);
99 /* load left-hand files */
100 for (i
= 1; i
< argc
; i
++) {
101 if (strcmp(argv
[i
], "-r") == 0) {
105 if (argc
== 3 && i
== 2) break;
106 load_file(argv
[i
], 0);
110 /* load right-hand files */
111 for (; i
< argc
; i
++) {
112 load_file(argv
[i
], 1);
116 if (n1
< 1 || n2
< 1) usage(argv
[0]);
118 /* report analysis results */
123 static double compute_sig(double avg1
, double var1
, double avg2
, double var2
) {
126 /* prevent division by zero with lack of variance */
127 var
= var1
/ n1
+ var2
/ n2
;
128 if (var
<= 0 || n1
<= 1 || n2
<= 1) return -1;
130 /* do we have enough degrees of freedom? */
132 var1
* var1
/ (n1
* n1
* (n1
- 1)) +
133 var2
* var2
/ (n2
* n2
* (n2
- 1)));
134 if (df
< 1) return -1;
137 t
= (avg1
- avg2
) / sqrt(var
);
138 return student_t_p_2tail(t
, df
);
141 static void compute_stats(const struct symbol_count
*count
, unsigned n
,
142 double *avg
, double *var
) {
159 *var
= (count
->sum2
- sum
* sum
/ n
) / (n
- 1);
163 static void load_file(const char *path
, int count_index
) {
164 char binary
[PROC_NAME_LEN
];
167 char name
[SYMBOL_NAME_SIZE
];
168 unsigned long samples
;
171 assert(count_index
== 0 || count_index
== 1);
173 file
= fopen(path
, "r");
175 fprintf(stderr
, "error: cannot open \"%s\": %s\n",
176 path
, strerror(errno
));
181 while (read_line(file
, path
, line
++, binary
, name
, &samples
)) {
182 symbol_tally(binary
, name
, samples
, count_index
);
188 static void *malloc_checked(size_t size
) {
190 if (!size
) return NULL
;
193 fprintf(stderr
, "error: malloc cannot allocate %lu bytes: %s\n",
194 (unsigned long) size
, strerror(errno
));
200 static void print_report(void) {
201 unsigned i
, index
, symbol_count
;
202 struct symbol_info
*symbol
, **symbol_list
;
204 /* list the symbols in an array for sorting */
205 symbol_count
= symbols_count();
206 symbol_list
= MALLOC_CHECKED(struct symbol_info
*, symbol_count
);
208 for (symbol
= symbols
; symbol
; symbol
= symbol
->next
) {
209 symbol_list
[index
++] = symbol
;
211 /* sort by difference in average, multiply both sides by
212 * n1 * n2 to avoid division
214 symbol
->diff
= (long) (symbol
->count
[1].sum
* n1
) -
215 (long) (symbol
->count
[0].sum
* n2
);
217 assert(index
== symbol_count
);
220 qsort(symbol_list
, symbol_count
, sizeof(struct symbol_info
*),
221 symbol_qsort_compare
);
223 printf("%-*s %-*s ------avg------ ----stdev---- diff sig\n",
224 PROC_NAME_WIDTH
, "binary", SYMBOL_NAME_WIDTH
, "symbol");
225 printf("%-*s left right left right\n",
226 PROC_NAME_WIDTH
+ SYMBOL_NAME_WIDTH
+ 1, "");
228 for (i
= 0; i
< symbol_count
; i
++) {
229 if (i
> 0 && symbol_list
[i
]->class >= sc_process
&&
230 symbol_list
[i
]->class != symbol_list
[i
- 1]->class) {
233 print_report_line(symbol_list
[i
]);
236 printf("significance levels (two-tailed):\n");
237 printf(" * p < 0.05\n");
238 printf(" ** p < 0.01\n");
239 printf(" *** p < 0.001\n");
243 static void print_report_line(const struct symbol_info
*symbol
) {
244 double avg1
, avg2
, p
, var1
, var2
;
246 /* compute statistics; t is Welch's t, which is a t-test that allows
247 * for unpaired samples with unequal variance; df is the degrees of
248 * freedom as given by the Welch-Satterthwaite equation
250 compute_stats(&symbol
->count
[0], n1
, &avg1
, &var1
);
251 compute_stats(&symbol
->count
[1], n2
, &avg2
, &var2
);
252 p
= compute_sig(avg1
, var1
, avg2
, var2
);
254 /* list applicable values */
255 assert(PROC_NAME_WIDTH
<= PROC_NAME_LEN
);
256 assert(SYMBOL_NAME_WIDTH
<= SYMBOL_NAME_SIZE
);
257 printf("%-*.*s %-*.*s",
258 PROC_NAME_WIDTH
, PROC_NAME_WIDTH
, symbol
->binary
,
259 SYMBOL_NAME_WIDTH
, SYMBOL_NAME_WIDTH
, symbol
->name
);
260 if (symbol
->count
[0].sum
> 0) {
261 printf("%8.0f", avg1
);
265 if (symbol
->count
[1].sum
> 0) {
266 printf("%8.0f", avg2
);
270 if (symbol
->count
[0].sum
> 0 && n1
>= 2) {
271 printf("%7.0f", sqrt(var1
));
275 if (symbol
->count
[1].sum
> 0 && n2
>= 2) {
276 printf("%7.0f", sqrt(var2
));
280 printf("%8.0f ", avg2
- avg1
);
282 if (p
<= 0.05) printf("*");
283 if (p
<= 0.01) printf("*");
284 if (p
<= 0.001) printf("*");
289 static int read_line(FILE *file
, const char *path
, int line
, char *binary
,
290 char *name
, unsigned long *samples
) {
299 if (c
== EOF
) return 0;
301 /* read binary name, truncating if necessary */
303 while (c
!= '\t' && c
!= '\n') {
304 if (index
< PROC_NAME_LEN
) binary
[index
++] = c
;
307 if (index
< PROC_NAME_LEN
) binary
[index
] = 0;
311 fprintf(stderr
, "error: garbage %d after binary name "
312 "(\"%s\", line %d)\n", c
, path
, line
);
317 /* read symbol name, truncating if necessary */
319 while (c
!= '\t' && c
!= '\n') {
320 if (index
< SYMBOL_NAME_SIZE
) name
[index
++] = c
;
323 if (index
< SYMBOL_NAME_SIZE
) name
[index
] = 0;
327 fprintf(stderr
, "error: garbage %d after symbol name "
328 "(\"%s\", line %d)\n", c
, path
, line
);
333 /* read number of samples */
335 while (c
>= '0' && c
<= '9') {
336 *samples
= *samples
* 10 + (c
- '0');
342 fprintf(stderr
, "error: garbage %d after sample count "
343 "(\"%s\", line %d)\n", c
, path
, line
);
349 static unsigned string_hash(const char *s
, size_t size
) {
354 while (*s
&& size
-- > 0) {
355 result
= result
* 31 + *(s
++);
360 static enum symbol_class
symbol_classify(const char *binary
, const char *name
) {
361 if (strncmp(binary
, "(total)", PROC_NAME_LEN
) == 0) return sc_total
;
362 if (strncmp(binary
, "(idle)", PROC_NAME_LEN
) == 0) return sc_idle
;
363 if (strncmp(binary
, "(system)", PROC_NAME_LEN
) == 0) return sc_system
;
364 if (strncmp(binary
, "(user)", PROC_NAME_LEN
) == 0) return sc_user
;
365 if (strncmp(name
, "(total)", SYMBOL_NAME_SIZE
) == 0) return sc_process
;
369 static struct symbol_info
*symbol_find_or_add(const char *binary
,
371 struct symbol_info
**ptr
, *symbol
;
376 /* look up symbol in hash table */
377 ptr
= &symbol_hashtab
[symbol_hash(binary
, name
) % SYMBOL_HASHTAB_SIZE
];
378 while ((symbol
= *ptr
)) {
379 if (strncmp(symbol
->binary
, binary
, PROC_NAME_LEN
) == 0 &&
380 strncmp(symbol
->name
, name
, SYMBOL_NAME_SIZE
) == 0) {
383 ptr
= &symbol
->hashtab_next
;
386 /* unknown symbol, add it */
387 *ptr
= symbol
= MALLOC_CHECKED(struct symbol_info
, 1);
388 memset(symbol
, 0, sizeof(struct symbol_info
));
389 strncpy(symbol
->binary
, binary
, PROC_NAME_LEN
);
390 strncpy(symbol
->name
, name
, SYMBOL_NAME_SIZE
);
391 symbol
->count
[0].min
= ~0UL;
392 symbol
->count
[1].min
= ~0UL;
393 symbol
->class = symbol_classify(binary
, name
);
395 /* also add to linked list */
396 symbol
->next
= symbols
;
401 static unsigned symbol_hash(const char *binary
, const char *name
) {
402 return string_hash(binary
, PROC_NAME_LEN
) +
403 string_hash(name
, SYMBOL_NAME_SIZE
);
406 static int symbol_qsort_compare(const void *p1
, const void *p2
) {
408 const struct symbol_info
*s1
, *s2
;
412 s1
= *(const struct symbol_info
**) p1
;
413 s2
= *(const struct symbol_info
**) p2
;
417 /* totals come first */
418 if (s1
->class < s2
->class) return -1;
419 if (s1
->class > s2
->class) return 1;
421 /* sort by difference in average */
422 if (s1
->diff
< s2
->diff
) return -1;
423 if (s1
->diff
> s2
->diff
) return 1;
425 /* otherwise, by name */
426 r
= strncmp(s1
->binary
, s2
->binary
, PROC_NAME_LEN
);
429 return strncmp(s1
->name
, s2
->name
, SYMBOL_NAME_SIZE
);
432 static void symbol_tally(const char *binary
, const char *name
,
433 unsigned long samples
, int count_index
) {
434 struct symbol_count
*count
;
435 struct symbol_info
*symbol
;
437 /* look up or add symbol */
438 symbol
= symbol_find_or_add(binary
, name
);
441 count
= &symbol
->count
[count_index
];
442 count
->sum
+= samples
;
443 count
->sum2
+= (unsigned long long) samples
* samples
;
444 if (count
->min
> samples
) count
->min
= samples
;
445 if (count
->max
< samples
) count
->max
= samples
;
448 static unsigned symbols_count(void) {
450 const struct symbol_info
*symbol
;
452 for (symbol
= symbols
; symbol
; symbol
= symbol
->next
) {
458 static void usage(const char *argv0
) {
460 printf(" %s leftfile rightfile\n", argv0
);
461 printf(" %s leftfile... -r rightfile...\n", argv0
);
463 printf("sprofdiff compares the sprofile information from multiple\n");
464 printf("output files of sprofalyze -d.\n");