2 * profile.c - library functions for call profiling
4 * For processes that were compiled using ACK with the -Rcem-p option,
5 * procentry and procexit will be called on entry and exit of their
6 * functions. Procentry/procexit are implemented here as generic library
10 * 14 Aug, 2006 Created (Rogier Meurs)
18 #include <minix/profile.h>
19 #include <minix/sysutil.h>
20 #include <minix/u64.h>
22 PRIVATE
char cpath
[CPROF_CPATH_MAX_LEN
]; /* current call path string */
23 PRIVATE
int cpath_len
; /* current call path len */
24 PRIVATE
struct cprof_tbl_s
*cprof_slot
; /* slot of current function */
25 struct stack_s
{ /* stack entry */
26 int cpath_len
; /* call path len */
27 struct cprof_tbl_s
*slot
; /* table slot */
28 u64_t start_1
; /* count @ begin of procentry */
29 u64_t start_2
; /* count @ end of procentry */
30 u64_t spent_deeper
; /* spent in called functions */
32 PRIVATE
struct stack_s cprof_stk
[CPROF_STACK_SIZE
]; /* stack */
33 PRIVATE
int cprof_stk_top
; /* top of stack */
34 EXTERN
struct cprof_tbl_s cprof_tbl
[]; /* hash table */
35 PRIVATE
int cprof_tbl_size
; /* nr of slots */
36 PRIVATE
struct cprof_tbl_s
*idx
[CPROF_INDEX_SIZE
]; /* index to table */
37 PRIVATE
struct cprof_ctl_s control
; /* for comms with kernel */
38 PRIVATE
int cprof_announce
; /* announce on n-th execution
40 PRIVATE
int cprof_locked
; /* for reentrancy */
42 FORWARD
_PROTOTYPE(void cprof_init
, (void) );
43 FORWARD
_PROTOTYPE(void reset
, (void) );
44 FORWARD
_PROTOTYPE(void clear_tbl
, (void) );
47 PUBLIC
void procentry (char *name
)
50 unsigned hash
= 0, x
= 0;
52 struct cprof_tbl_s
*last
;
56 /* Procentry is not reentrant. */
57 if (cprof_locked
) return; else cprof_locked
= 1;
59 /* Read CPU cycle count into local variable. */
62 /* Run init code once after system boot. */
69 if (init
> -1 && init
++ == cprof_announce
) {
70 /* Tell kernel about control structure and table locations.
72 * In userspace processes, the library function profile_register
73 * will be used. This function does a kernel call (sys_profbuf) to
74 * announce to the kernel the location of the control struct and
75 * hash table. The control struct is used by the kernel to write
76 * a flag if resetting of the table is requested. The location of
77 * the table is needed to copy the information to the user process
80 * Kernelspace processes don't use the library function but have
81 * their own implemention that executes logic similar to sys_profbuf.
82 * The reason for this is that the kernel is non-reentrant, therefore
83 * a kernelspace process is not able to do a kernel call itself since
84 * this would cause a deadlock.
86 profile_register((void *) &control
, (void *) &cprof_tbl
);
90 /* Only continue if sane. */
91 if (control
.err
) return;
93 /* Check if kernel instructed to reset profiling data. */
94 if (control
.reset
) reset();
97 if (++cprof_stk_top
== CPROF_STACK_SIZE
) {
98 printf("CPROFILE error: stack overrun\n");
99 control
.err
|= CPROF_STACK_OVERRUN
;
103 /* Save initial cycle count on stack. */
104 cprof_stk
[cprof_stk_top
].start_1
= start
;
106 /* Check available call path len. */
107 if (cpath_len
+ strlen(name
) + 1 > CPROF_CPATH_MAX_LEN
) {
108 printf("CPROFILE error: call path overrun\n");
109 control
.err
|= CPROF_CPATH_OVERRUN
;
113 /* Save previous call path length on stack. */
114 cprof_stk
[cprof_stk_top
].cpath_len
= cpath_len
;
116 /* Generate new call path string and length.*/
117 if (cprof_stk_top
> 0) /* Path is space separated. */
118 cpath
[cpath_len
++] = ' ';
119 while ((c
= *(name
++)) != '\0') /* Append function name. */
120 cpath
[cpath_len
++] = c
;
121 cpath
[cpath_len
] = '\0'; /* Null-termination. */
123 /* Calculate hash for call path string (algorithm: ELF). */
124 for (i
=0; i
<cpath_len
; i
++) {
125 hash
= (hash
<< 4) + cpath
[i
];
126 if ((x
= hash
& 0xF0000000L
) != 0) {
131 hash
%= CPROF_INDEX_SIZE
;
133 /* Look up the slot for this call path in the hash table. */
134 for (cprof_slot
= idx
[hash
]; cprof_slot
!= 0; cprof_slot
= cprof_slot
->next
)
135 if (strcmp(cprof_slot
->cpath
, cpath
) == 0) break;
138 cprof_slot
->calls
++; /* found slot: update call counter */
140 /* Not found: insert path into hash table. */
141 if (control
.slots_used
== cprof_tbl_size
) {
142 printf("CPROFILE error: table overrun\n");
143 control
.err
|= CPROF_TABLE_OVERRUN
;
146 /* Set values for new slot. */
147 cprof_slot
= &cprof_tbl
[control
.slots_used
++];
148 strcpy(cprof_slot
->cpath
, cpath
);
149 cprof_slot
->calls
= 1;
152 if (idx
[hash
] == 0) {
153 /* No collision: simple update. */
154 idx
[hash
] = cprof_slot
;
156 /* Collision: update last in chain. */
157 for (last
= idx
[hash
]; last
->next
!= 0; last
= last
->next
);
158 last
->next
= cprof_slot
;
161 /* Save slot on stack. */
162 cprof_stk
[cprof_stk_top
].slot
= cprof_slot
;
164 /* Again save CPU cycle count on stack. */
165 read_tsc_64(&cprof_stk
[cprof_stk_top
].start_2
);
170 PUBLIC
void procexit (char *UNUSED(name
))
174 /* Procexit is not reentrant. */
175 if (cprof_locked
) return; else cprof_locked
= 1;
177 /* First thing: read CPU cycle count into local variable. */
178 read_tsc(&stop
.hi
, &stop
.lo
);
180 /* Only continue if sane. */
181 if (control
.err
) return;
183 /* Update cycle count for this call path. Exclude time spent in procentry/
184 * procexit by using measurements taken at end of procentry and begin of
185 * procexit (the "small" difference). This way, only the call overhead for
186 * the procentry/procexit functions will be attributed to this call path,
187 * not the procentry/procexit cycles.
190 /* Calculate "small" difference. */
191 spent
= sub64(stop
, cprof_stk
[cprof_stk_top
].start_2
);
192 cprof_stk
[cprof_stk_top
].slot
->cycles
=
193 add64(cprof_stk
[cprof_stk_top
].slot
->cycles
,
194 sub64(spent
, cprof_stk
[cprof_stk_top
].spent_deeper
));
196 /* Clear spent_deeper for call level we're leaving. */
197 cprof_stk
[cprof_stk_top
].spent_deeper
= cvu64(0);
199 /* Adjust call path string and stack. */
200 cpath_len
= cprof_stk
[cprof_stk_top
].cpath_len
;
201 cpath
[cpath_len
] = '\0';
203 /* Update spent_deeper for call level below. Include time spent in
204 * procentry/procexit by using measurements taken at begin of procentry
205 * and end of procexit (the "big" difference). This way the time spent in
206 * procentry/procexit will be included in spent_deeper and therefore, since
207 * this value is substracted from the lower call level, it will not be
208 * attributed to any call path. This way, pollution of the statistics
209 * because of procentry/procexit is kept to a minimum.
212 /* Read CPU cycle count. */
213 read_tsc(&stop
.hi
, &stop
.lo
);
215 /* Calculate "big" difference. */
216 spent
= sub64(stop
, cprof_stk
[cprof_stk_top
].start_1
);
217 cprof_stk_top
--; /* decrease stack */
218 if (cprof_stk_top
>= 0) /* don't update non-existent level -1 */
219 cprof_stk
[cprof_stk_top
].spent_deeper
=
220 add64(cprof_stk
[cprof_stk_top
].spent_deeper
, spent
);
225 PRIVATE
void cprof_init()
234 cprof_tbl_size
= profile_get_tbl_size();
235 cprof_announce
= profile_get_announce();
238 for (i
=0; i
<CPROF_STACK_SIZE
; i
++) {
239 cprof_stk
[i
].cpath_len
= 0;
240 cprof_stk
[i
].slot
= 0;
241 cprof_stk
[i
].start_1
= cvu64(0);
242 cprof_stk
[i
].start_2
= cvu64(0);
243 cprof_stk
[i
].spent_deeper
= cvu64(0);
255 PRIVATE
void clear_tbl()
259 /* Reset profiling table. */
260 control
.slots_used
= 0;
261 for (i
=0; i
<CPROF_INDEX_SIZE
; i
++) idx
[i
] = 0; /* clear index */
262 for (i
=0; i
<cprof_tbl_size
; i
++) { /* clear table */
263 memset(cprof_tbl
[i
].cpath
, '\0', CPROF_CPATH_MAX_LEN
);
264 cprof_tbl
[i
].next
= 0;
265 cprof_tbl
[i
].calls
= 0;
266 cprof_tbl
[i
].cycles
.lo
= 0;
267 cprof_tbl
[i
].cycles
.hi
= 0;