4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #pragma ident "%Z%%M% %I% %E% SMI"
30 * All routines in this file are for processing new-style, *versioned*
31 * mon.out format. Together with rdelf.c, lookup.c and profv.h, these
32 * form the complete set of files to profile new-style mon.out files.
40 bool time_in_ticks
= FALSE
;
41 size_t n_pcsamples
, n_accounted_ticks
, n_zeros
, total_funcs
;
42 unsigned char sort_flag
;
45 size_t n_modules
= 1; /* always include the aout object */
47 struct stat aout_stat
, monout_stat
;
51 cmp_by_name(const void *arg1
, const void *arg2
)
53 profrec_t
*a
= (profrec_t
*)arg1
;
54 profrec_t
*b
= (profrec_t
*)arg2
;
56 return (strcmp(a
->demangled_name
, b
->demangled_name
));
60 setup_demangled_names(void)
63 char *nbp
, *nbe
, *namebuf
;
64 size_t cur_len
= 0, namebuf_sz
= BUCKET_SZ
;
67 if ((namebuf
= malloc(namebuf_sz
)) == NULL
) {
68 (void) fprintf(stderr
, "%s: can't allocate %d bytes\n",
74 nbe
= namebuf
+ namebuf_sz
;
76 for (i
= 0; i
< total_funcs
; i
++) {
77 if ((p
= conv_demangle_name(profsym
[i
].name
)) == NULL
)
81 if ((nbp
+ namelen
+ 1) > nbe
) {
82 namebuf_sz
+= BUCKET_SZ
;
83 namebuf
= realloc(namebuf
, namebuf_sz
);
84 if (namebuf
== NULL
) {
85 (void) fprintf(stderr
,
86 "%s: can't alloc %d bytes\n",
91 nbp
= namebuf
+ cur_len
;
92 nbe
= namebuf
+ namebuf_sz
;
95 (void) strcpy(nbp
, p
);
96 profsym
[i
].demangled_name
= nbp
;
99 cur_len
+= namelen
+ 1;
104 cmp_by_time(const void *arg1
, const void *arg2
)
106 profrec_t
*a
= (profrec_t
*)arg1
;
107 profrec_t
*b
= (profrec_t
*)arg2
;
109 if (a
->percent_time
> b
->percent_time
)
111 else if (a
->percent_time
< b
->percent_time
)
118 cmp_by_ncalls(const void *arg1
, const void *arg2
)
120 profrec_t
*a
= (profrec_t
*)arg1
;
121 profrec_t
*b
= (profrec_t
*)arg2
;
123 if (a
->ncalls
> b
->ncalls
)
125 else if (a
->ncalls
< b
->ncalls
)
133 print_profile_data(void)
136 int (*sort_func
)(const void *, const void *);
142 * Sort the compiled data; the sort flags are mutually exclusive.
146 sort_func
= cmp_by_ncalls
;
151 setup_demangled_names();
152 sort_func
= cmp_by_name
;
156 sort_flag
|= BY_ADDRESS
;
157 sort_func
= NULL
; /* already sorted by addr */
160 case BY_TIME
: /* default is to sort by time */
162 sort_func
= cmp_by_time
;
167 qsort(profsym
, total_funcs
, sizeof (profrec_t
), sort_func
);
171 * If we're sorting by name, and if it is a verbose print, we wouldn't
172 * have set up the print_mid fields yet.
174 if ((flags
& F_VERBOSE
) && (sort_flag
== BY_NAME
)) {
175 for (i
= 0; i
< total_funcs
; i
++) {
177 * same as previous or next (if there's one) ?
179 if (i
&& (strcmp(profsym
[i
].demangled_name
,
180 profsym
[i
-1].demangled_name
) == 0)) {
181 profsym
[i
].print_mid
= TRUE
;
182 } else if ((i
< (total_funcs
- 1)) &&
183 (strcmp(profsym
[i
].demangled_name
,
184 profsym
[i
+1].demangled_name
) == 0)) {
185 profsym
[i
].print_mid
= TRUE
;
191 * The actual printing part.
193 if (!(flags
& F_NHEAD
)) {
195 (void) printf(" %s", atitle
);
199 " %Time Tiks Cumtiks #Calls tiks/call Name");
202 " %Time Seconds Cumsecs #Calls msec/call Name");
206 for (i
= 0; i
< total_funcs
; i
++) {
208 * Since the same value may denote different symbols in
209 * different shared objects, it is debatable if it is
210 * meaningful to print addresses at all. Especially so
211 * if we were asked to sort by symbol addresses.
213 * If we've to sort by address, I think it is better to sort
214 * it on a per-module basis and if verbose mode is on too,
215 * print a newline to separate out modules.
217 if ((flags
& F_VERBOSE
) && (sort_flag
== BY_ADDRESS
)) {
218 if (mi
!= profsym
[i
].module
) {
220 mi
= profsym
[i
].module
;
224 if (flags
& F_PADDR
) {
225 if (aformat
[2] == 'x')
226 (void) printf("%16llx ", profsym
[i
].addr
);
228 (void) printf("%16llo ", profsym
[i
].addr
);
231 cumsecs
+= profsym
[i
].seconds
;
232 (void) printf("%6.1f%8.2f%8.2f", profsym
[i
].percent_time
,
233 profsym
[i
].seconds
, cumsecs
);
235 (void) printf("%8d%12.4f ",
236 profsym
[i
].ncalls
, profsym
[i
].msecs_per_call
);
238 if (profsym
[i
].print_mid
)
239 (void) printf("%d:", (profsym
[i
].module
)->id
);
241 (void) printf("%s\n", profsym
[i
].demangled_name
);
245 (void) sprintf(filler
, "%16s", "");
249 if (flags
& F_VERBOSE
) {
251 (void) printf("%s Total Object Modules %7d\n",
253 (void) printf("%s Qualified Symbols %7d\n",
254 filler
, total_funcs
);
255 (void) printf("%s Symbols with zero usage %7d\n",
257 (void) printf("%s Total pc-hits %7d\n",
258 filler
, n_pcsamples
);
259 (void) printf("%s Accounted pc-hits %7d\n",
260 filler
, n_accounted_ticks
);
261 if ((!gflag
) && (n_pcsamples
- n_accounted_ticks
)) {
262 (void) printf("%s Missed pc-hits (try -g) %7d\n\n",
263 filler
, n_pcsamples
- n_accounted_ticks
);
265 (void) printf("%s Missed pc-hits %7d\n\n",
266 filler
, n_pcsamples
- n_accounted_ticks
);
268 (void) printf("%s Module info\n", filler
);
269 for (mi
= &modules
; mi
; mi
= mi
->next
)
270 (void) printf("%s %d: `%s'\n", filler
,
276 name_cmp(const void *arg1
, const void *arg2
)
278 profnames_t
*a
= (profnames_t
*)arg1
;
279 profnames_t
*b
= (profnames_t
*)arg2
;
281 return (strcmp(a
->name
, b
->name
));
290 pn
= calloc(total_funcs
, sizeof (profnames_t
));
292 (void) fprintf(stderr
, "%s: no room for %d bytes\n",
293 cmdname
, total_funcs
* sizeof (profnames_t
));
297 for (i
= 0; i
< total_funcs
; i
++) {
298 pn
[i
].name
= profsym
[i
].demangled_name
;
299 pn
[i
].pfrec
= &profsym
[i
];
302 qsort(pn
, total_funcs
, sizeof (profnames_t
), name_cmp
);
304 for (i
= 0; i
< total_funcs
; i
++) {
306 * same as previous or next (if there's one) ?
308 if (i
&& (strcmp(pn
[i
].name
, pn
[i
-1].name
) == 0))
309 (pn
[i
].pfrec
)->print_mid
= TRUE
;
310 else if ((i
< (total_funcs
- 1)) &&
311 (strcmp(pn
[i
].name
, pn
[i
+1].name
) == 0)) {
312 (pn
[i
].pfrec
)->print_mid
= TRUE
;
320 compute_times(nltype
*nl
, profrec_t
*psym
)
322 static int first_time
= TRUE
;
326 if ((hz
= sysconf(_SC_CLK_TCK
)) == -1)
327 time_in_ticks
= TRUE
;
332 psym
->seconds
= (double)nl
->nticks
;
334 psym
->msecs_per_call
= (double)nl
->nticks
/
337 psym
->msecs_per_call
= (double)0.0;
339 psym
->seconds
= (double)nl
->nticks
/ (double)hz
;
341 psym
->msecs_per_call
=
342 ((double)psym
->seconds
* 1000.0) /
345 psym
->msecs_per_call
= (double)0.0;
350 ((double)nl
->nticks
/ (double)n_pcsamples
) * 100;
355 collect_profsyms(void)
362 for (mi
= &modules
; mi
; mi
= mi
->next
)
363 total_funcs
+= mi
->nfuncs
;
365 profsym
= calloc(total_funcs
, sizeof (profrec_t
));
366 if (profsym
== NULL
) {
367 (void) fprintf(stderr
, "%s: no room for %d bytes\n",
368 cmdname
, total_funcs
* sizeof (profrec_t
));
373 for (mi
= &modules
; mi
; mi
= mi
->next
) {
375 for (i
= 0; i
< mi
->nfuncs
; i
++) {
377 * I think F_ZSYMS doesn't make sense for the new
378 * mon.out format, since we don't have a profiling
379 * *range*, per se. But the man page demands it,
382 if ((nl
[i
].ncalls
== 0) && (nl
[i
].nticks
== 0)) {
384 if (!(flags
& F_ZSYMS
))
389 * Initially, we set demangled_name to be
390 * the same as name. If Cflag is set, we later
391 * change this to be the demangled name ptr.
393 profsym
[ndx
].addr
= nl
[i
].value
;
394 profsym
[ndx
].ncalls
= nl
[i
].ncalls
;
395 profsym
[ndx
].name
= nl
[i
].name
;
396 profsym
[ndx
].demangled_name
= nl
[i
].name
;
397 profsym
[ndx
].module
= mi
;
398 profsym
[ndx
].print_mid
= FALSE
;
399 compute_times(&nl
[i
], &profsym
[ndx
]);
405 * Adjust total_funcs to actual printable funcs
411 assign_pcsamples(mod_info_t
*module
, Address
*pcsmpl
,
414 Address
*pcptr
, *pcse
= pcsmpl
+ n_samples
;
419 /* Locate the first pc-hit for this module */
420 if ((pcptr
= locate(pcsmpl
, n_samples
, module
->load_base
)) == NULL
)
421 return; /* no pc-hits in this module */
423 /* Assign all pc-hits in this module to appropriate functions */
424 while ((pcptr
< pcse
) && (*pcptr
< module
->load_end
)) {
426 /* Update the corresponding function's time */
427 if (nl
= nllookup(module
, *pcptr
, &nxt_func
)) {
429 * Collect all pc-hits in this function. Each
430 * pc-hit counts as 1 tick.
433 while ((pcptr
< pcse
) && (*pcptr
< nxt_func
)) {
438 nl
->nticks
+= nticks
;
439 n_accounted_ticks
+= nticks
;
442 * pc sample could not be assigned to function;
451 pc_cmp(const void *arg1
, const void *arg2
)
453 Address
*pc1
= (Address
*)arg1
;
454 Address
*pc2
= (Address
*)arg2
;
466 process_pcsamples(ProfBuffer
*bufp
)
470 size_t nelem
= bufp
->bufsize
;
472 /* buffer with no pc samples ? */
476 /* Allocate for the pcsample chunk */
477 pc_samples
= (Address
*) calloc(nelem
, sizeof (Address
));
478 if (pc_samples
== NULL
) {
479 (void) fprintf(stderr
, "%s: no room for %d sample pc's\n",
484 (void) memcpy(pc_samples
, (caddr_t
)bufp
+ bufp
->buffer
,
485 nelem
* sizeof (Address
));
487 /* Sort the pc samples */
488 qsort(pc_samples
, nelem
, sizeof (Address
), pc_cmp
);
491 * Assign pcsamples to functions in the currently active
494 for (mi
= &modules
; mi
; mi
= mi
->next
) {
495 if (mi
->active
== FALSE
)
497 assign_pcsamples(mi
, pc_samples
, nelem
);
502 /* Update total number of pcsamples read so far */
503 n_pcsamples
+= nelem
;
507 process_cgraph(ProfCallGraph
*cgp
)
512 ProfFunction
*calleep
;
515 for (callee_off
= cgp
->functions
; callee_off
;
516 callee_off
= calleep
->next_to
) {
518 /* LINTED: pointer cast */
519 calleep
= (ProfFunction
*)((char *)cgp
+ callee_off
);
520 if (calleep
->count
== 0)
524 * If we cannot identify a callee with a module, we
525 * cannot get to its namelist, just skip it.
527 for (mi
= &modules
; mi
; mi
= mi
->next
) {
528 if (mi
->active
== FALSE
)
531 if (calleep
->topc
>= mi
->load_base
&&
532 calleep
->topc
< mi
->load_end
) {
534 * nllookup() returns the next lower entry
535 * point on a miss. So just make sure the
536 * callee's pc is not outside this function
538 if (nl
= nllookup(mi
, calleep
->topc
, 0)) {
539 f_end
= mi
->load_base
+ (nl
->value
-
540 mi
->txt_origin
) + nl
->size
;
541 if (calleep
->topc
< f_end
)
542 nl
->ncalls
+= calleep
->count
;
550 get_shobj_syms(char *pathname
, GElf_Addr ld_base
, GElf_Addr ld_end
)
554 /* Create a new module element */
555 if ((mi
= malloc(sizeof (mod_info_t
))) == NULL
) {
556 (void) fprintf(stderr
, "%s: no room for %d bytes\n",
557 cmdname
, sizeof (mod_info_t
));
561 mi
->path
= malloc(strlen(pathname
) + 1);
562 if (mi
->path
== NULL
) {
563 (void) fprintf(stderr
, "%s: can't allocate %d bytes\n",
564 cmdname
, strlen(pathname
) + 1);
567 (void) strcpy(mi
->path
, pathname
);
570 get_syms(pathname
, mi
);
572 /* and fill in info... */
573 mi
->id
= n_modules
+ 1;
574 mi
->load_base
= ld_base
;
575 mi
->load_end
= ld_end
;
584 * Two modules overlap each other if they don't lie completely *outside*
588 does_overlap(ProfModule
*new, mod_info_t
*old
)
590 /* case 1: new module lies completely *before* the old one */
591 if (new->startaddr
< old
->load_base
&& new->endaddr
<= old
->load_base
)
594 /* case 2: new module lies completely *after* the old one */
595 if (new->startaddr
>= old
->load_end
&& new->endaddr
>= old
->load_end
)
598 /* probably a dlopen: the modules overlap each other */
603 is_same_as_aout(char *modpath
, struct stat
*buf
)
605 if (stat(modpath
, buf
) == -1) {
610 if ((buf
->st_dev
== aout_stat
.st_dev
) &&
611 (buf
->st_ino
== aout_stat
.st_ino
)) {
618 process_modules(ProfModuleList
*modlp
)
621 mod_info_t
*mi
, *last
, *new_module
;
623 bool more_modules
= TRUE
;
624 struct stat so_statbuf
;
626 /* Check version of module type object */
627 if (modlp
->version
> PROF_MODULES_VER
) {
628 (void) fprintf(stderr
,
629 "%s: unsupported version %d for modules\n",
630 cmdname
, modlp
->version
);
636 * Scan the PROF_MODULES_T list and add modules to current list
637 * of modules, if they're not present already
639 /* LINTED: pointer cast */
640 newmodp
= (ProfModule
*)((caddr_t
)modlp
+ modlp
->modules
);
643 * Since the aout could've been renamed after its run, we
644 * should see if current module overlaps aout. If it does, it
645 * is probably the renamed aout. We should also skip any other
646 * non-sharedobj's that we see (or should we report an error ?)
648 so_path
= (caddr_t
)modlp
+ newmodp
->path
;
649 if (does_overlap(newmodp
, &modules
) ||
650 is_same_as_aout(so_path
, &so_statbuf
) ||
651 (!is_shared_obj(so_path
))) {
653 more_modules
= FALSE
;
655 /* LINTED: pointer cast */
656 newmodp
= (ProfModule
*)
657 ((caddr_t
)modlp
+ newmodp
->next
);
662 * Check all modules (leave the first one, 'cos that
663 * is the program executable info). If this module is already
664 * there in the list, skip it.
667 while ((mi
= last
->next
) != NULL
) {
669 * We expect the full pathname for all shared objects
670 * needed by the program executable. In this case, we
671 * simply need to compare the paths to see if they are
674 if (strcmp(mi
->path
, so_path
) == 0)
678 * Check if this new shared object will overlap any
679 * existing module. If yes, deactivate the old one.
681 if (does_overlap(newmodp
, mi
))
687 /* Module already there, skip it */
689 mi
->load_base
= newmodp
->startaddr
;
690 mi
->load_end
= newmodp
->endaddr
;
693 more_modules
= FALSE
;
695 /* LINTED: pointer cast */
696 newmodp
= (ProfModule
*)
697 ((caddr_t
)modlp
+ newmodp
->next
);
702 * Check if mon.out is outdated with respect to the new
703 * module we want to add
705 if (monout_stat
.st_mtime
< so_statbuf
.st_mtime
) {
706 (void) fprintf(stderr
,
707 "%s: newer shared obj %s outdates profile info\n",
712 /* Create this module's nameslist */
713 new_module
= get_shobj_syms(so_path
,
714 newmodp
->startaddr
, newmodp
->endaddr
);
716 /* Add it to the tail of active module list */
717 last
->next
= new_module
;
720 * Move to the next module in the PROF_MODULES_T list
724 more_modules
= FALSE
;
726 /* LINTED: pointer cast */
727 newmodp
= (ProfModule
*)((caddr_t
)modlp
+ newmodp
->next
);
729 } while (more_modules
);
733 process_mon_out(caddr_t memp
, size_t fsz
)
737 bool found_pcsamples
= FALSE
, found_cgraph
= FALSE
;
740 * Save file end pointer and start after header
742 file_end
= memp
+ fsz
;
743 /* LINTED: pointer cast */
744 objp
= (ProfObject
*)(memp
+ ((ProfHeader
*)memp
)->size
);
745 while ((caddr_t
)objp
< file_end
) {
746 switch (objp
->type
) {
747 case PROF_MODULES_T
:
748 process_modules((ProfModuleList
*)objp
);
751 case PROF_CALLGRAPH_T
:
752 process_cgraph((ProfCallGraph
*)objp
);
757 process_pcsamples((ProfBuffer
*)objp
);
758 found_pcsamples
= TRUE
;
762 (void) fprintf(stderr
,
763 "%s: unknown prof object type=%d\n",
764 cmdname
, objp
->type
);
767 /* LINTED: pointer cast */
768 objp
= (ProfObject
*)((caddr_t
)objp
+ objp
->size
);
771 if (!found_cgraph
|| !found_pcsamples
) {
772 (void) fprintf(stderr
,
773 "%s: missing callgraph/pcsamples in `%s'\n",
778 if ((caddr_t
)objp
> file_end
) {
779 (void) fprintf(stderr
, "%s: malformed file `%s'\n",
786 get_aout_syms(char *pathname
, mod_info_t
*mi
)
788 mi
->path
= malloc(strlen(pathname
) + 1);
789 if (mi
->path
== NULL
) {
790 (void) fprintf(stderr
, "%s: can't allocate %d bytes\n",
791 cmdname
, strlen(pathname
) + 1);
795 (void) strcpy(mi
->path
, pathname
);
798 get_syms(pathname
, mi
);
801 mi
->load_base
= mi
->txt_origin
;
802 mi
->load_end
= mi
->data_end
;
810 unsigned int magic_num
;
811 bool invalid_version
;
816 * Check the magic and see if this is versioned or *old-style*
819 if ((fd
= open(mon_fn
, O_RDONLY
)) == -1) {
823 if (read(fd
, (char *)&magic_num
, sizeof (unsigned int)) == -1) {
827 if (magic_num
!= (unsigned int) PROF_MAGIC
) {
835 * Check versioning info. For now, let's say we provide
836 * backward compatibility, so we accept all older versions.
838 (void) lseek(fd
, 0L, SEEK_SET
);
839 if (read(fd
, (char *)&prof_hdr
, sizeof (ProfHeader
)) == -1) {
843 invalid_version
= FALSE
;
844 if (prof_hdr
.h_major_ver
> PROF_MAJOR_VERSION
)
845 invalid_version
= TRUE
;
846 else if (prof_hdr
.h_major_ver
== PROF_MAJOR_VERSION
) {
847 if (prof_hdr
.h_minor_ver
> PROF_MINOR_VERSION
)
848 invalid_version
= FALSE
;
850 if (invalid_version
) {
851 (void) fprintf(stderr
,
852 "%s: mon.out version %d.%d not supported\n",
853 cmdname
, prof_hdr
.h_major_ver
, prof_hdr
.h_minor_ver
);
860 * Map mon.out onto memory.
862 if (stat(mon_fn
, &monout_stat
) == -1) {
866 if ((fmem
= mmap(NULL
, monout_stat
.st_size
,
867 PROT_READ
, MAP_PRIVATE
, fd
, 0)) == MAP_FAILED
) {
875 * Now, read program executable's symbol table. Also save it's
876 * stat in aout_stat for use while processing mon.out
878 if (stat(sym_fn
, &aout_stat
) == -1) {
882 get_aout_syms(sym_fn
, &modules
);
885 * Process the mon.out, all shared objects it references
886 * and collect statistics on ticks spent in each function,
887 * number of calls, etc.
889 process_mon_out(fmem
, monout_stat
.st_size
);
892 * Based on the flags and the statistics we've got, create
893 * a list of relevant symbols whose profiling details should
899 * Check for duplicate names in output. We need to print the
900 * module id's if verbose. Also, if we are sorting by name anyway,
901 * we don't need to check for duplicates here. We'll do that later.
903 if ((flags
& F_VERBOSE
) && (sort_flag
!= BY_NAME
))
909 print_profile_data();
912 (void) munmap(fmem
, monout_stat
.st_size
);