11 /* user-configurable settings */
14 #define PROC_NAME_WIDTH 10
16 #define SYMBOL_NAME_WIDTH 24
19 #define SYMBOL_HASHTAB_SIZE 1024
21 #define SYMBOL_NAME_SIZE 52
25 unsigned long long sum2
;
40 struct symbol_info
*next
;
41 struct symbol_info
*hashtab_next
;
42 char binary
[PROC_NAME_LEN
];
43 char name
[SYMBOL_NAME_SIZE
];
44 struct symbol_count count
[2];
46 enum symbol_class
class;
49 /* global variables */
50 static unsigned n1
, n2
;
51 static struct symbol_info
*symbols
;
52 static struct symbol_info
*symbol_hashtab
[SYMBOL_HASHTAB_SIZE
];
55 static double compute_sig(double avg1
, double var1
, double avg2
, double var2
);
56 static void compute_stats(const struct symbol_count
*count
, unsigned n
,
57 double *avg
, double *var
);
58 static void load_file(const char *path
, int count_index
);
59 static void *malloc_checked(size_t size
);
60 static void print_report(void);
61 static void print_report_line(const struct symbol_info
*symbol
);
62 static int read_line(FILE *file
, const char *path
, int line
, char *binary
,
63 char *name
, unsigned long *samples
);
64 static enum symbol_class
symbol_classify(const char *binary
, const char *name
);
65 static unsigned string_hash(const char *s
, size_t size
);
66 static struct symbol_info
*symbol_find_or_add(const char *binary
,
68 static unsigned symbol_hash(const char *binary
, const char *name
);
69 static int symbol_qsort_compare(const void *p1
, const void *p2
);
70 static void symbol_tally(const char *binary
, const char *name
,
71 unsigned long samples
, int count_index
);
72 static unsigned symbols_count(void);
73 static void usage(const char *argv0
);
75 #define MALLOC_CHECKED(type, count) \
76 ((type *) malloc_checked(sizeof(type) * (count)))
79 #define dprintf(...) do { \
80 fprintf(stderr, "debug(%s:%d): ", __FUNCTION__, __LINE__); \
81 fprintf(stderr, __VA_ARGS__); \
87 int main(int argc
, char **argv
) {
91 /* disable buffering so the output mixes correctly */
92 setvbuf(stdout
, NULL
, _IONBF
, 0);
93 setvbuf(stderr
, NULL
, _IONBF
, 0);
96 if (argc
< 3) usage(argv
[0]);
98 /* load left-hand files */
99 for (i
= 1; i
< argc
; i
++) {
100 if (strcmp(argv
[i
], "-r") == 0) {
104 if (argc
== 3 && i
== 2) break;
105 load_file(argv
[i
], 0);
109 /* load right-hand files */
110 for (; i
< argc
; i
++) {
111 load_file(argv
[i
], 1);
115 if (n1
< 1 || n2
< 1) usage(argv
[0]);
117 /* report analysis results */
122 static double compute_sig(double avg1
, double var1
, double avg2
, double var2
) {
125 /* prevent division by zero with lack of variance */
126 var
= var1
/ n1
+ var2
/ n2
;
127 if (var
<= 0 || n1
<= 1 || n2
<= 1) return -1;
129 /* do we have enough degrees of freedom? */
131 var1
* var1
/ (n1
* n1
* (n1
- 1)) +
132 var2
* var2
/ (n2
* n2
* (n2
- 1)));
133 if (df
< 1) return -1;
136 t
= (avg1
- avg2
) / sqrt(var
);
137 return student_t_p_2tail(t
, df
);
140 static void compute_stats(const struct symbol_count
*count
, unsigned n
,
141 double *avg
, double *var
) {
158 *var
= (count
->sum2
- sum
* sum
/ n
) / (n
- 1);
162 static void load_file(const char *path
, int count_index
) {
163 char binary
[PROC_NAME_LEN
];
166 char name
[SYMBOL_NAME_SIZE
];
167 unsigned long samples
;
170 assert(count_index
== 0 || count_index
== 1);
172 file
= fopen(path
, "r");
174 fprintf(stderr
, "error: cannot open \"%s\": %s\n",
175 path
, strerror(errno
));
180 while (read_line(file
, path
, line
++, binary
, name
, &samples
)) {
181 symbol_tally(binary
, name
, samples
, count_index
);
187 static void *malloc_checked(size_t size
) {
189 if (!size
) return NULL
;
192 fprintf(stderr
, "error: malloc cannot allocate %lu bytes: %s\n",
193 (unsigned long) size
, strerror(errno
));
199 static void print_report(void) {
200 unsigned i
, index
, symbol_count
;
201 struct symbol_info
*symbol
, **symbol_list
;
203 /* list the symbols in an array for sorting */
204 symbol_count
= symbols_count();
205 symbol_list
= MALLOC_CHECKED(struct symbol_info
*, symbol_count
);
207 for (symbol
= symbols
; symbol
; symbol
= symbol
->next
) {
208 symbol_list
[index
++] = symbol
;
210 /* sort by difference in average, multiply both sides by
211 * n1 * n2 to avoid division
213 symbol
->diff
= (long) (symbol
->count
[1].sum
* n1
) -
214 (long) (symbol
->count
[0].sum
* n2
);
216 assert(index
== symbol_count
);
219 qsort(symbol_list
, symbol_count
, sizeof(struct symbol_info
*),
220 symbol_qsort_compare
);
222 printf("%-*s %-*s ------avg------ ----stdev---- diff sig\n",
223 PROC_NAME_WIDTH
, "binary", SYMBOL_NAME_WIDTH
, "symbol");
224 printf("%-*s left right left right\n",
225 PROC_NAME_WIDTH
+ SYMBOL_NAME_WIDTH
+ 1, "");
227 for (i
= 0; i
< symbol_count
; i
++) {
228 if (i
> 0 && symbol_list
[i
]->class >= sc_process
&&
229 symbol_list
[i
]->class != symbol_list
[i
- 1]->class) {
232 print_report_line(symbol_list
[i
]);
235 printf("significance levels (two-tailed):\n");
236 printf(" * p < 0.05\n");
237 printf(" ** p < 0.01\n");
238 printf(" *** p < 0.001\n");
242 static void print_report_line(const struct symbol_info
*symbol
) {
243 double avg1
, avg2
, p
, var1
, var2
;
245 /* compute statistics; t is Welch's t, which is a t-test that allows
246 * for unpaired samples with unequal variance; df is the degrees of
247 * freedom as given by the Welch-Satterthwaite equation
249 compute_stats(&symbol
->count
[0], n1
, &avg1
, &var1
);
250 compute_stats(&symbol
->count
[1], n2
, &avg2
, &var2
);
251 p
= compute_sig(avg1
, var1
, avg2
, var2
);
253 /* list applicable values */
254 assert(PROC_NAME_WIDTH
<= PROC_NAME_LEN
);
255 assert(SYMBOL_NAME_WIDTH
<= SYMBOL_NAME_SIZE
);
256 printf("%-*.*s %-*.*s",
257 PROC_NAME_WIDTH
, PROC_NAME_WIDTH
, symbol
->binary
,
258 SYMBOL_NAME_WIDTH
, SYMBOL_NAME_WIDTH
, symbol
->name
);
259 if (symbol
->count
[0].sum
> 0) {
260 printf("%8.0f", avg1
);
264 if (symbol
->count
[1].sum
> 0) {
265 printf("%8.0f", avg2
);
269 if (symbol
->count
[0].sum
> 0 && n1
>= 2) {
270 printf("%7.0f", sqrt(var1
));
274 if (symbol
->count
[1].sum
> 0 && n2
>= 2) {
275 printf("%7.0f", sqrt(var2
));
279 printf("%8.0f ", avg2
- avg1
);
281 if (p
<= 0.05) printf("*");
282 if (p
<= 0.01) printf("*");
283 if (p
<= 0.001) printf("*");
288 static int read_line(FILE *file
, const char *path
, int line
, char *binary
,
289 char *name
, unsigned long *samples
) {
298 if (c
== EOF
) return 0;
300 /* read binary name, truncating if necessary */
302 while (c
!= '\t' && c
!= '\n') {
303 if (index
< PROC_NAME_LEN
) binary
[index
++] = c
;
306 if (index
< PROC_NAME_LEN
) binary
[index
] = 0;
310 fprintf(stderr
, "error: garbage %d after binary name "
311 "(\"%s\", line %d)\n", c
, path
, line
);
316 /* read symbol name, truncating if necessary */
318 while (c
!= '\t' && c
!= '\n') {
319 if (index
< SYMBOL_NAME_SIZE
) name
[index
++] = c
;
322 if (index
< SYMBOL_NAME_SIZE
) name
[index
] = 0;
326 fprintf(stderr
, "error: garbage %d after symbol name "
327 "(\"%s\", line %d)\n", c
, path
, line
);
332 /* read number of samples */
334 while (c
>= '0' && c
<= '9') {
335 *samples
= *samples
* 10 + (c
- '0');
341 fprintf(stderr
, "error: garbage %d after sample count "
342 "(\"%s\", line %d)\n", c
, path
, line
);
348 static unsigned string_hash(const char *s
, size_t size
) {
353 while (*s
&& size
-- > 0) {
354 result
= result
* 31 + *(s
++);
359 static enum symbol_class
symbol_classify(const char *binary
, const char *name
) {
360 if (strncmp(binary
, "(total)", PROC_NAME_LEN
) == 0) return sc_total
;
361 if (strncmp(binary
, "(idle)", PROC_NAME_LEN
) == 0) return sc_idle
;
362 if (strncmp(binary
, "(system)", PROC_NAME_LEN
) == 0) return sc_system
;
363 if (strncmp(binary
, "(user)", PROC_NAME_LEN
) == 0) return sc_user
;
364 if (strncmp(name
, "(total)", SYMBOL_NAME_SIZE
) == 0) return sc_process
;
368 static struct symbol_info
*symbol_find_or_add(const char *binary
,
370 struct symbol_info
**ptr
, *symbol
;
375 /* look up symbol in hash table */
376 ptr
= &symbol_hashtab
[symbol_hash(binary
, name
) % SYMBOL_HASHTAB_SIZE
];
377 while ((symbol
= *ptr
)) {
378 if (strncmp(symbol
->binary
, binary
, PROC_NAME_LEN
) == 0 &&
379 strncmp(symbol
->name
, name
, SYMBOL_NAME_SIZE
) == 0) {
382 ptr
= &symbol
->hashtab_next
;
385 /* unknown symbol, add it */
386 *ptr
= symbol
= MALLOC_CHECKED(struct symbol_info
, 1);
387 memset(symbol
, 0, sizeof(struct symbol_info
));
388 strncpy(symbol
->binary
, binary
, PROC_NAME_LEN
);
389 strncpy(symbol
->name
, name
, SYMBOL_NAME_SIZE
);
390 symbol
->count
[0].min
= ~0UL;
391 symbol
->count
[1].min
= ~0UL;
392 symbol
->class = symbol_classify(binary
, name
);
394 /* also add to linked list */
395 symbol
->next
= symbols
;
400 static unsigned symbol_hash(const char *binary
, const char *name
) {
401 return string_hash(binary
, PROC_NAME_LEN
) +
402 string_hash(name
, SYMBOL_NAME_SIZE
);
405 static int symbol_qsort_compare(const void *p1
, const void *p2
) {
407 const struct symbol_info
*s1
, *s2
;
411 s1
= *(const struct symbol_info
**) p1
;
412 s2
= *(const struct symbol_info
**) p2
;
416 /* totals come first */
417 if (s1
->class < s2
->class) return -1;
418 if (s1
->class > s2
->class) return 1;
420 /* sort by difference in average */
421 if (s1
->diff
< s2
->diff
) return -1;
422 if (s1
->diff
> s2
->diff
) return 1;
424 /* otherwise, by name */
425 r
= strncmp(s1
->binary
, s2
->binary
, PROC_NAME_LEN
);
428 return strncmp(s1
->name
, s2
->name
, SYMBOL_NAME_SIZE
);
431 static void symbol_tally(const char *binary
, const char *name
,
432 unsigned long samples
, int count_index
) {
433 struct symbol_count
*count
;
434 struct symbol_info
*symbol
;
436 /* look up or add symbol */
437 symbol
= symbol_find_or_add(binary
, name
);
440 count
= &symbol
->count
[count_index
];
441 count
->sum
+= samples
;
442 count
->sum2
+= (unsigned long long) samples
* samples
;
443 if (count
->min
> samples
) count
->min
= samples
;
444 if (count
->max
< samples
) count
->max
= samples
;
447 static unsigned symbols_count(void) {
449 const struct symbol_info
*symbol
;
451 for (symbol
= symbols
; symbol
; symbol
= symbol
->next
) {
457 static void usage(const char *argv0
) {
459 printf(" %s leftfile rightfile\n", argv0
);
460 printf(" %s leftfile... -r rightfile...\n", argv0
);
462 printf("sprofdiff compares the sprofile information from multiple\n");
463 printf("output files of sprofalyze -d.\n");