4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
35 char *whoami
= "gprof";
36 static pctype lowpc
, highpc
; /* range profiled, in UNIT's */
39 * things which get -E excluded by default.
41 static char *defaultEs
[] = {
49 static char *objname
[] = {
56 #define MAX_OBJTYPES 3
68 max(pctype a
, pctype b
)
76 min(pctype a
, pctype b
)
84 * calculate scaled entry point addresses (to save time in asgnsamples),
85 * and possibly push the scaled entry points over the entry mask,
86 * if it turns out that the entry point is in one bucket and the code
87 * for a routine is in the next bucket.
95 pctype bucket_of_entry
;
96 pctype bucket_of_code
;
99 /* for old-style gmon.out, nameslist is only in modules.nl */
101 for (nlp
= modules
.nl
; nlp
< modules
.npe
; nlp
++) {
102 nlp
->svalue
= nlp
->value
/ sizeof (UNIT
);
104 bucket_of_entry
= (nlp
->svalue
- lowpc
) / scale
;
105 bucket_of_code
= (nlp
->svalue
+ UNITS_TO_CODE
- lowpc
) / scale
;
106 if (bucket_of_entry
< bucket_of_code
) {
107 if (debug
& SAMPLEDEBUG
) {
109 "[alignentries] pushing svalue 0x%llx "
110 "to 0x%llx\n", nlp
->svalue
,
111 nlp
->svalue
+ UNITS_TO_CODE
);
122 * Assign samples to the procedures to which they belong.
124 * There are three cases as to where pcl and pch can be
125 * with respect to the routine entry addresses svalue0 and svalue1
126 * as shown in the following diagram. overlap computes the
127 * distance between the arrows, the fraction of the sample
128 * that is to be credited to the routine which starts at svalue0.
134 * +-----------------------------------------------+
136 * | ->| |<- ->| |<- ->| |<- |
138 * +---------+ +---------+ +---------+
142 * pcl pch pcl pch pcl pch
144 * For the vax we assert that samples will never fall in the first
145 * two bytes of any routine, since that is the entry mask,
146 * thus we give call alignentries() to adjust the entry points if
147 * the entry mask falls in one bucket but the code for the routine
148 * doesn't start until the next bucket. In conjunction with the
149 * alignment of routine addresses, this should allow us to have
150 * only one sample for every four bytes of text space and never
151 * have any overlap (the two end cases, above).
161 pctype svalue0
, svalue1
;
163 extern mod_info_t modules
;
164 nltype
*nl
= modules
.nl
;
165 sztype nname
= modules
.nname
;
167 /* read samples and assign to namelist symbols */
168 scale
= highpc
- lowpc
;
171 for (i
= 0, j
= 1; i
< nsamples
; i
++) {
175 /*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/
176 pcl
= lowpc
+ scale
* i
;
177 /*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/
178 pch
= lowpc
+ scale
* (i
+ 1);
181 if (debug
& SAMPLEDEBUG
) {
183 "[asgnsamples] pcl 0x%llx pch 0x%llx ccnt %d\n",
188 for (j
= (j
? j
- 1 : 0); j
< nname
; j
++) {
189 svalue0
= nl
[j
].svalue
;
190 svalue1
= nl
[j
+1].svalue
;
192 * if high end of tick is below entry address,
198 * if low end of tick into next routine,
199 * go for next routine.
203 overlap
= min(pch
, svalue1
) - max(pcl
, svalue0
);
206 if (debug
& SAMPLEDEBUG
) {
207 (void) printf("[asgnsamples] "
208 "(0x%llx->0x%llx-0x%llx) %s gets "
209 "%f ticks %lld overlap\n",
210 nl
[j
].value
/sizeof (UNIT
), svalue0
,
212 overlap
* time
/ scale
, overlap
);
215 nl
[j
].time
+= overlap
* time
/ scale
;
220 if (debug
& SAMPLEDEBUG
) {
221 (void) printf("[asgnsamples] totime %f\n", totime
);
228 dump_callgraph(FILE *fp
, char *filename
, unsigned long tarcs
,
229 unsigned long ncallees
)
231 ProfCallGraph prof_cgraph
;
232 ProfFunction prof_func
;
237 unsigned long caller_id
= 0, callee_id
= 0;
240 * Write the callgraph header
242 prof_cgraph
.type
= PROF_CALLGRAPH_T
;
243 prof_cgraph
.version
= PROF_CALLGRAPH_VER
;
244 prof_cgraph
.functions
= PROFCGRAPH_SZ
;
245 prof_cgraph
.size
= PROFCGRAPH_SZ
+ tarcs
* PROFFUNC_SZ
;
246 if (fwrite(&prof_cgraph
, sizeof (ProfCallGraph
), 1, fp
) != 1) {
252 (void) fseek(fp
, CGRAPH_FILLER
, SEEK_CUR
);
254 /* Current offset inside the callgraph object */
255 cur_offset
= prof_cgraph
.functions
;
257 for (mi
= &modules
; mi
; mi
= mi
->next
) {
258 for (nlp
= mi
->nl
; nlp
< mi
->npe
; nlp
++) {
259 if (nlp
->ncallers
== 0)
262 /* If this is the last callee, set next_to to 0 */
264 if (callee_id
== ncallees
)
265 prof_func
.next_to
= 0;
267 prof_func
.next_to
= cur_offset
+
268 nlp
->ncallers
* PROFFUNC_SZ
;
272 * Dump this callee's raw arc information with all
276 for (arcp
= nlp
->parents
; arcp
;
277 arcp
= arcp
->arc_parentlist
) {
279 * If no more callers for this callee, set
282 if (caller_id
== nlp
->ncallers
)
283 prof_func
.next_from
= 0;
285 prof_func
.next_from
= cur_offset
+
290 arcp
->arc_parentp
->module
->load_base
+
291 (arcp
->arc_parentp
->value
-
292 arcp
->arc_parentp
->module
->txt_origin
);
293 prof_func
.topc
= mi
->load_base
+
294 (nlp
->value
- mi
->txt_origin
);
295 prof_func
.count
= arcp
->arc_count
;
298 if (fwrite(&prof_func
, sizeof (ProfFunction
),
305 (void) fseek(fp
, FUNC_FILLER
, SEEK_CUR
);
307 cur_offset
+= PROFFUNC_SZ
;
315 * To save all pc-hits in all the gmon.out's is infeasible, as this
316 * may become quite huge even with a small number of files to sum.
317 * Instead, we'll dump *fictitious hits* to correct functions
318 * by scanning module namelists. Again, since this is summing
319 * pc-hits, we may have to dump the pcsamples out in chunks if the
320 * number of pc-hits is high.
323 dump_hits(FILE *fp
, char *filename
, nltype
*nlp
)
326 size_t i
, nelem
, ntowrite
;
328 if ((nelem
= nlp
->nticks
) > PROF_BUFFER_SIZE
)
329 nelem
= PROF_BUFFER_SIZE
;
331 if ((p
= (Address
*) calloc(nelem
, sizeof (Address
))) == NULL
) {
332 (void) fprintf(stderr
, "%s: no room for %d pcsamples\n",
338 * Set up *fictitious* hits (to function entry) buffer
340 hitpc
= nlp
->module
->load_base
+ (nlp
->value
- nlp
->module
->txt_origin
);
341 for (i
= 0; i
< nelem
; i
++)
344 for (ntowrite
= nlp
->nticks
; ntowrite
>= nelem
; ntowrite
-= nelem
) {
345 if (fwrite(p
, nelem
* sizeof (Address
), 1, fp
) != 1) {
352 if (fwrite(p
, ntowrite
* sizeof (Address
), 1, fp
) != 1) {
362 dump_pcsamples(FILE *fp
, char *filename
, unsigned long *tarcs
,
363 unsigned long *ncallees
)
365 ProfBuffer prof_buffer
;
370 prof_buffer
.type
= PROF_BUFFER_T
;
371 prof_buffer
.version
= PROF_BUFFER_VER
;
372 prof_buffer
.buffer
= PROFBUF_SZ
;
373 prof_buffer
.bufsize
= n_pcsamples
;
374 prof_buffer
.size
= PROFBUF_SZ
+ n_pcsamples
* sizeof (Address
);
375 if (fwrite(&prof_buffer
, sizeof (ProfBuffer
), 1, fp
) != 1) {
381 (void) fseek(fp
, BUF_FILLER
, SEEK_CUR
);
385 for (mi
= &modules
; mi
; mi
= mi
->next
) {
386 for (nlp
= mi
->nl
; nlp
< mi
->npe
; nlp
++) {
388 dump_hits(fp
, filename
, nlp
);
391 for (arcp
= nlp
->parents
; arcp
;
392 arcp
= arcp
->arc_parentlist
) {
397 (*tarcs
) += nlp
->ncallers
;
405 dump_modules(FILE *fp
, char *filename
, size_t pbuf_sz
)
409 Index off_nxt
, off_path
;
412 ProfModuleList prof_modlist
;
415 /* Allocate for path strings buffer */
416 pbuf_sz
= CEIL(pbuf_sz
, STRUCT_ALIGN
);
417 if ((p
= pbuf
= calloc(pbuf_sz
, sizeof (char))) == NULL
) {
418 (void) fprintf(stderr
, "%s: no room for %d bytes\n",
419 whoami
, pbuf_sz
* sizeof (char));
423 /* Dump out PROF_MODULE_T info for all non-aout modules */
424 prof_modlist
.type
= PROF_MODULES_T
;
425 prof_modlist
.version
= PROF_MODULES_VER
;
426 prof_modlist
.modules
= PROFMODLIST_SZ
;
427 prof_modlist
.size
= PROFMODLIST_SZ
+ (n_modules
- 1) * PROFMOD_SZ
+
429 if (fwrite(&prof_modlist
, sizeof (ProfModuleList
), 1, fp
) != 1) {
435 (void) fseek(fp
, MODLIST_FILLER
, SEEK_CUR
);
438 * Initialize offsets for ProfModule elements.
440 off_nxt
= PROFMODLIST_SZ
+ PROFMOD_SZ
;
441 off_path
= PROFMODLIST_SZ
+ (n_modules
- 1) * PROFMOD_SZ
;
443 for (mi
= modules
.next
; mi
; mi
= mi
->next
) {
445 prof_mod
.next
= off_nxt
;
448 prof_mod
.path
= off_path
;
449 prof_mod
.startaddr
= mi
->load_base
;
450 prof_mod
.endaddr
= mi
->load_end
;
452 if (fwrite(&prof_mod
, sizeof (ProfModule
), 1, fp
) != 1) {
459 (void) fseek(fp
, MOD_FILLER
, SEEK_CUR
);
461 (void) strcpy(p
, mi
->name
);
462 namelen
= strlen(mi
->name
);
465 /* Note that offset to every path str need not be aligned */
466 off_nxt
+= PROFMOD_SZ
;
467 off_path
+= namelen
+ 1;
470 /* Write out the module path strings */
472 if (fwrite(pbuf
, pbuf_sz
, 1, fp
) != 1) {
482 * If we have inactive modules, their current load addresses may overlap with
483 * active ones, and so we've to assign fictitious, non-overlapping addresses
484 * to all modules before we dump them.
487 fixup_maps(size_t *pathsz
)
489 unsigned int n_inactive
= 0;
490 Address lbase
= 0, lend
;
493 /* Pick the lowest load address among modules */
495 for (mi
= &modules
; mi
; mi
= mi
->next
) {
497 if (mi
->active
== FALSE
)
500 if (mi
== &modules
|| mi
->load_base
< lbase
)
501 lbase
= mi
->load_base
;
504 * Return total path size of non-aout modules only
507 *pathsz
= (*pathsz
) + strlen(mi
->name
) + 1;
511 * All module info is in fine shape already if there are no
518 * Assign fictitious load addresses to all (non-aout) modules so
519 * that sum info can be dumped out.
521 for (mi
= modules
.next
; mi
; mi
= mi
->next
) {
522 lend
= lbase
+ (mi
->data_end
- mi
->txt_origin
);
523 if ((lbase
< modules
.load_base
&& lend
< modules
.load_base
) ||
524 (lbase
> modules
.load_end
&& lend
> modules
.load_end
)) {
526 mi
->load_base
= lbase
;
529 /* just to give an appearance of reality */
530 lbase
= CEIL(lend
+ PGSZ
, PGSZ
);
533 * can't use this lbase & lend pair, as it
534 * overlaps with aout's addresses
536 mi
->load_base
= CEIL(modules
.load_end
+ PGSZ
, PGSZ
);
537 mi
->load_end
= mi
->load_base
+ (lend
- lbase
);
539 lbase
= CEIL(mi
->load_end
+ PGSZ
, PGSZ
);
545 dump_gprofhdr(FILE *fp
, char *filename
)
549 prof_hdr
.h_magic
= PROF_MAGIC
;
550 prof_hdr
.h_major_ver
= PROF_MAJOR_VERSION
;
551 prof_hdr
.h_minor_ver
= PROF_MINOR_VERSION
;
552 prof_hdr
.size
= PROFHDR_SZ
;
553 if (fwrite(&prof_hdr
, sizeof (prof_hdr
), 1, fp
) != 1) {
560 (void) fseek(fp
, HDR_FILLER
, SEEK_CUR
);
564 dumpsum_ostyle(char *sumfile
)
569 struct rawarc32 arc32
;
572 if ((sfile
= fopen(sumfile
, "w")) == NULL
) {
577 * dump the header; use the last header read in
580 if (fwrite(&h
, sizeof (h
), 1, sfile
) != 1) {
586 hdr
.lowpc
= (pctype32
)h
.lowpc
;
587 hdr
.highpc
= (pctype32
)h
.highpc
;
588 hdr
.ncnt
= (pctype32
)h
.ncnt
;
589 if (fwrite(&hdr
, sizeof (hdr
), 1, sfile
) != 1) {
597 if (fwrite(samples
, sizeof (unsigned_UNIT
), nsamples
, sfile
) !=
603 * dump the normalized raw arc information. For old-style dumping,
604 * the only namelist is in modules.nl
606 for (nlp
= modules
.nl
; nlp
< modules
.npe
; nlp
++) {
607 for (arcp
= nlp
->children
; arcp
;
608 arcp
= arcp
->arc_childlist
) {
610 arc
.raw_frompc
= arcp
->arc_parentp
->value
;
611 arc
.raw_selfpc
= arcp
->arc_childp
->value
;
612 arc
.raw_count
= arcp
->arc_count
;
613 if (fwrite(&arc
, sizeof (arc
), 1, sfile
) != 1) {
619 (pctype32
)arcp
->arc_parentp
->value
;
621 (pctype32
)arcp
->arc_childp
->value
;
622 arc32
.raw_count
= (actype32
)arcp
->arc_count
;
623 if (fwrite(&arc32
, sizeof (arc32
), 1, sfile
) !=
630 if (debug
& SAMPLEDEBUG
) {
632 "[dumpsum_ostyle] frompc 0x%llx selfpc "
633 "0x%llx count %lld\n", arc
.raw_frompc
,
634 arc
.raw_selfpc
, arc
.raw_count
);
639 (void) fclose(sfile
);
643 * dump out the gmon.sum file
646 dumpsum(char *sumfile
)
650 unsigned long total_arcs
; /* total number of arcs in all */
651 unsigned long ncallees
; /* no. of callees with parents */
654 dumpsum_ostyle(sumfile
);
658 if ((sfile
= fopen(sumfile
, "w")) == NULL
) {
664 * Dump the new-style gprof header. Even if one of the original
665 * profiled-files was of a older version, the summed file is of
666 * current version only.
668 dump_gprofhdr(sfile
, sumfile
);
671 * Fix up load-maps and dump out modules info
673 * Fix up module load maps so inactive modules get *some* address
674 * (and btw, could you get the total size of non-aout module path
677 fixup_maps(&pathbuf_sz
);
678 dump_modules(sfile
, sumfile
, pathbuf_sz
);
682 * Dump out the summ'd pcsamples
684 * For dumping call graph information later, we need certain
685 * statistics (like total arcs, number of callers for each node);
686 * collect these also while we are at it.
688 dump_pcsamples(sfile
, sumfile
, &total_arcs
, &ncallees
);
691 * Dump out the summ'd call graph information
693 dump_callgraph(sfile
, sumfile
, total_arcs
, ncallees
);
696 (void) fclose(sfile
);
700 tally(mod_info_t
*caller_mod
, mod_info_t
*callee_mod
, struct rawarc
*rawp
)
706 * if count == 0 this is a null arc and
707 * we don't need to tally it.
709 if (rawp
->raw_count
== 0)
713 * Lookup the caller and callee pcs in namelists of
714 * appropriate modules
716 parentp
= nllookup(caller_mod
, rawp
->raw_frompc
, NULL
);
717 childp
= nllookup(callee_mod
, rawp
->raw_selfpc
, NULL
);
718 if (childp
&& parentp
) {
720 childp
->ncall
+= rawp
->raw_count
;
723 childp
->ncall
+= rawp
->raw_count
;
725 childp
->ncall
-= rawp
->raw_count
;
726 if (childp
->ncall
< 0)
732 if (debug
& TALLYDEBUG
) {
733 (void) printf("[tally] arc from %s to %s traversed "
734 "%lld times\n", parentp
->name
,
735 childp
->name
, rawp
->raw_count
);
738 addarc(parentp
, childp
, rawp
->raw_count
);
743 * Look up a module's base address in a sorted list of pc-hits. Unlike
744 * nllookup(), this deals with misses by mapping them to the next *higher*
745 * pc-hit. This is so that we get into the module's first pc-hit rightaway,
746 * even if the module's entry-point (load_base) itself is not a hit.
749 locate(Address
*pclist
, size_t nelem
, Address keypc
)
751 size_t low
= 0, middle
, high
= nelem
- 1;
753 if (keypc
<= pclist
[low
])
756 if (keypc
> pclist
[high
])
759 while (low
!= high
) {
760 middle
= (high
+ low
) >> 1;
762 if ((pclist
[middle
] < keypc
) && (pclist
[middle
+ 1] >= keypc
))
763 return (&pclist
[middle
+ 1]);
765 if (pclist
[middle
] >= keypc
)
771 /* must never reach here! */
776 assign_pcsamples(mod_info_t
*module
, Address
*pcsmpl
, size_t n_samples
)
778 Address
*pcptr
, *pcse
= pcsmpl
+ n_samples
;
783 size_t n_hits_in_module
= 0;
786 /* Locate the first pc-hit for this module */
787 if ((pcptr
= locate(pcsmpl
, n_samples
, module
->load_base
)) == NULL
) {
789 if (debug
& PCSMPLDEBUG
) {
790 (void) printf("[assign_pcsamples] no pc-hits in\n");
792 " `%s'\n", module
->name
);
795 return; /* no pc-hits in this module */
798 /* Assign all pc-hits in this module to appropriate functions */
799 while ((pcptr
< pcse
) && (*pcptr
< module
->load_end
)) {
801 /* Update the corresponding function's time */
802 if (fnl
= nllookup(module
, (pctype
) *pcptr
, &nxt_func
)) {
804 * Collect all pc-hits in this function. Each
805 * pc-hit counts as 1 tick.
808 while ((pcptr
< pcse
) && (*pcptr
< nxt_func
)) {
813 if (func_nticks
== 0)
816 fnl
->nticks
+= func_nticks
;
817 fnl
->time
+= func_nticks
;
818 totime
+= func_nticks
;
822 n_hits_in_module
+= func_nticks
;
826 * pc sample could not be assigned to function;
834 if (debug
& PCSMPLDEBUG
) {
836 "[assign_pcsamples] %ld hits in\n", n_hits_in_module
);
837 (void) printf(" `%s'\n", module
->name
);
843 pc_cmp(const void *arg1
, const void *arg2
)
845 Address
*pc1
= (Address
*)arg1
;
846 Address
*pc2
= (Address
*)arg2
;
858 process_pcsamples(ProfBuffer
*bufp
)
863 size_t chunk_size
, nelem_read
, nelem_to_read
;
866 if (debug
& PCSMPLDEBUG
) {
868 "[process_pcsamples] number of pcsamples = %lld\n",
873 /* buffer with no pc samples ? */
874 if (bufp
->bufsize
== 0)
878 * If we're processing pcsamples of a profile sum, we could have
879 * more than PROF_BUFFER_SIZE number of samples. In such a case,
880 * we must read the pcsamples in chunks.
882 if ((chunk_size
= bufp
->bufsize
) > PROF_BUFFER_SIZE
)
883 chunk_size
= PROF_BUFFER_SIZE
;
885 /* Allocate for the pcsample chunk */
886 pc_samples
= (Address
*) calloc(chunk_size
, sizeof (Address
));
887 if (pc_samples
== NULL
) {
888 (void) fprintf(stderr
, "%s: no room for %d sample pc's\n",
893 /* Copy the current set of pcsamples */
895 nelem_to_read
= bufp
->bufsize
;
896 p
= (char *)bufp
+ bufp
->buffer
;
898 while (nelem_read
< nelem_to_read
) {
899 (void) memcpy((void *) pc_samples
, p
,
900 chunk_size
* sizeof (Address
));
902 /* Sort the pc samples */
903 qsort(pc_samples
, chunk_size
, sizeof (Address
), pc_cmp
);
906 * Assign pcsamples to functions in the currently active
909 for (mi
= &modules
; mi
; mi
= mi
->next
) {
910 if (mi
->active
== FALSE
)
912 assign_pcsamples(mi
, pc_samples
, chunk_size
);
915 p
+= (chunk_size
* sizeof (Address
));
916 nelem_read
+= chunk_size
;
918 if ((nelem_to_read
- nelem_read
) < chunk_size
)
919 chunk_size
= nelem_to_read
- nelem_read
;
924 /* Update total number of pcsamples read so far */
925 n_pcsamples
+= bufp
->bufsize
;
929 find_module(Address addr
)
933 for (mi
= &modules
; mi
; mi
= mi
->next
) {
934 if (mi
->active
== FALSE
)
937 if (addr
>= mi
->load_base
&& addr
< mi
->load_end
)
945 process_cgraph(ProfCallGraph
*cgp
)
948 mod_info_t
*callee_mi
, *caller_mi
;
949 ProfFunction
*calleep
, *callerp
;
950 Index caller_off
, callee_off
;
953 * Note that *callee_off* increment in the for loop below
954 * uses *calleep* and *calleep* doesn't get set until the for loop
955 * is entered. We don't expect the increment to be executed before
956 * the loop body is executed atleast once, so this should be ok.
958 for (callee_off
= cgp
->functions
; callee_off
;
959 callee_off
= calleep
->next_to
) {
961 /* LINTED: pointer cast */
962 calleep
= (ProfFunction
*)((char *)cgp
+ callee_off
);
965 * We could choose either to sort the {caller, callee}
966 * list twice and assign callee/caller to modules or inspect
967 * each callee/caller in the active modules list. Since
968 * the modules list is usually very small, we'l choose the
973 * If we cannot identify a callee with a module, there's
974 * no use worrying about who called it.
976 if ((callee_mi
= find_module(calleep
->topc
)) == NULL
) {
978 if (debug
& CGRAPHDEBUG
) {
980 "[process_cgraph] callee %#llx missed\n",
986 arc
.raw_selfpc
= calleep
->topc
;
988 for (caller_off
= callee_off
; caller_off
;
989 caller_off
= callerp
->next_from
) {
991 /* LINTED: pointer cast */
992 callerp
= (ProfFunction
*)((char *)cgp
+ caller_off
);
993 if ((caller_mi
= find_module(callerp
->frompc
)) ==
996 if (debug
& CGRAPHDEBUG
) {
998 "[process_cgraph] caller %#llx "
999 "missed\n", callerp
->frompc
);
1005 arc
.raw_frompc
= callerp
->frompc
;
1006 arc
.raw_count
= callerp
->count
;
1009 if (debug
& CGRAPHDEBUG
) {
1011 "[process_cgraph] arc <%#llx, %#llx, "
1012 "%lld>\n", arc
.raw_frompc
, arc
.raw_selfpc
,
1016 tally(caller_mi
, callee_mi
, &arc
);
1026 * Two modules overlap each other if they don't lie completely *outside*
1030 does_overlap(ProfModule
*new, mod_info_t
*old
)
1032 /* case 1: new module lies completely *before* the old one */
1033 if (new->startaddr
< old
->load_base
&& new->endaddr
<= old
->load_base
)
1036 /* case 2: new module lies completely *after* the old one */
1037 if (new->startaddr
>= old
->load_end
&& new->endaddr
>= old
->load_end
)
1040 /* probably a dlopen: the modules overlap each other */
1045 is_same_as_aout(char *modpath
, struct stat
*buf
)
1047 if (stat(modpath
, buf
) == -1) {
1048 (void) fprintf(stderr
, "%s: can't get info on `%s'\n",
1053 if ((buf
->st_dev
== aout_info
.dev
) && (buf
->st_ino
== aout_info
.ino
))
1060 process_modules(ProfModuleList
*modlp
)
1062 ProfModule
*newmodp
;
1063 mod_info_t
*mi
, *last
, *new_module
;
1065 bool more_modules
= TRUE
;
1066 struct stat so_statbuf
;
1069 if (debug
& MODULEDEBUG
) {
1070 (void) printf("[process_modules] module obj version %u\n",
1075 /* Check version of module type object */
1076 if (modlp
->version
> PROF_MODULES_VER
) {
1077 (void) fprintf(stderr
, "%s: version %d for module type objects"
1078 "is not supported\n", whoami
, modlp
->version
);
1084 * Scan the PROF_MODULES_T list and add modules to current list
1085 * of modules, if they're not present already
1087 /* LINTED: pointer cast */
1088 newmodp
= (ProfModule
*)((char *)modlp
+ modlp
->modules
);
1091 * Since the prog could've been renamed after its run, we
1092 * should see if this overlaps a.out. If it does, it is
1093 * probably the renamed aout. We should also skip any other
1094 * non-sharedobj's that we see (or should we report an error ?)
1096 so_path
= (caddr_t
)modlp
+ newmodp
->path
;
1097 if (does_overlap(newmodp
, &modules
) ||
1098 is_same_as_aout(so_path
, &so_statbuf
) ||
1099 (!is_shared_obj(so_path
))) {
1102 more_modules
= FALSE
;
1104 /* LINTED: pointer cast */
1105 newmodp
= (ProfModule
*)
1106 ((caddr_t
)modlp
+ newmodp
->next
);
1108 if (debug
& MODULEDEBUG
) {
1110 "[process_modules] `%s'\n", so_path
);
1111 (void) printf(" skipped\n");
1117 if (debug
& MODULEDEBUG
)
1118 (void) printf("[process_modules] `%s'...\n", so_path
);
1122 * Check all modules (leave the first one, 'cos that
1123 * is the program executable info). If this module is already
1124 * there in the list, update the load addresses and proceed.
1127 while ((mi
= last
->next
) != NULL
) {
1129 * We expect the full pathname for all shared objects
1130 * needed by the program executable. In this case, we
1131 * simply need to compare the paths to see if they are
1134 if (strcmp(mi
->name
, so_path
) == 0)
1138 * Check if this new shared object will overlap
1139 * any existing module. If yes, remove the old one
1140 * from the linked list (but don't free it, 'cos
1141 * there may be symbols referring to this module
1144 if (does_overlap(newmodp
, mi
)) {
1146 if (debug
& MODULEDEBUG
) {
1148 "[process_modules] `%s'\n",
1163 /* Module already there, skip it */
1165 mi
->load_base
= newmodp
->startaddr
;
1166 mi
->load_end
= newmodp
->endaddr
;
1169 more_modules
= FALSE
;
1171 /* LINTED: pointer cast */
1172 newmodp
= (ProfModule
*)
1173 ((caddr_t
)modlp
+ newmodp
->next
);
1176 if (debug
& MODULEDEBUG
) {
1177 (void) printf("[process_modules] base=%#llx, "
1178 "end=%#llx\n", mi
->load_base
, mi
->load_end
);
1185 * Check if gmon.out is outdated with respect to the new
1186 * module we want to add
1188 if (gmonout_info
.mtime
< so_statbuf
.st_mtime
) {
1189 (void) fprintf(stderr
,
1190 "%s: shared obj outdates prof info\n", whoami
);
1191 (void) fprintf(stderr
, "\t(newer %s)\n", so_path
);
1195 /* Create a new module element */
1196 new_module
= malloc(sizeof (mod_info_t
));
1197 if (new_module
== NULL
) {
1198 (void) fprintf(stderr
, "%s: no room for %d bytes\n",
1199 whoami
, sizeof (mod_info_t
));
1203 /* and fill in info... */
1204 new_module
->id
= n_modules
+ 1;
1205 new_module
->load_base
= newmodp
->startaddr
;
1206 new_module
->load_end
= newmodp
->endaddr
;
1207 new_module
->name
= malloc(strlen(so_path
) + 1);
1208 if (new_module
->name
== NULL
) {
1209 (void) fprintf(stderr
, "%s: no room for %d bytes\n",
1210 whoami
, strlen(so_path
) + 1);
1213 (void) strcpy(new_module
->name
, so_path
);
1215 if (debug
& MODULEDEBUG
) {
1217 "[process_modules] base=%#llx, end=%#llx\n",
1218 new_module
->load_base
, new_module
->load_end
);
1222 /* Create this module's nameslist */
1223 process_namelist(new_module
);
1225 /* Add it to the tail of active module list */
1226 last
->next
= new_module
;
1230 if (debug
& MODULEDEBUG
) {
1232 "[process_modules] total shared objects = %ld\n",
1237 * Move to the next module in the PROF_MODULES_T list
1241 more_modules
= FALSE
;
1243 /* LINTED: pointer cast */
1244 newmodp
= (ProfModule
*)((caddr_t
)modlp
+ newmodp
->next
);
1246 } while (more_modules
);
1250 reset_active_modules(void)
1254 /* Except the executable, no other module should remain active */
1255 for (mi
= modules
.next
; mi
; mi
= mi
->next
)
1260 getpfiledata(caddr_t memp
, size_t fsz
)
1264 bool found_pcsamples
= FALSE
, found_cgraph
= FALSE
;
1267 * Before processing a new gmon.out, all modules except the
1268 * program executable must be made inactive, so that symbols
1269 * are searched only in the program executable, if we don't
1270 * find a MODULES_T object. Don't do it *after* we read a gmon.out,
1271 * because we need the active module data after we're done with
1272 * the last gmon.out, if we're doing summing.
1274 reset_active_modules();
1276 file_end
= memp
+ fsz
;
1277 /* LINTED: pointer cast */
1278 objp
= (ProfObject
*)(memp
+ ((ProfHeader
*)memp
)->size
);
1279 while ((caddr_t
)objp
< file_end
) {
1282 unsigned int type
= 0;
1284 if (debug
& MONOUTDEBUG
) {
1285 if (objp
->type
<= MAX_OBJTYPES
)
1289 "\n[getpfiledata] object %s [%#lx]\n",
1290 objname
[type
], objp
->type
);
1294 switch (objp
->type
) {
1295 case PROF_MODULES_T
:
1296 process_modules((ProfModuleList
*) objp
);
1299 case PROF_CALLGRAPH_T
:
1300 process_cgraph((ProfCallGraph
*) objp
);
1301 found_cgraph
= TRUE
;
1304 case PROF_BUFFER_T
:
1305 process_pcsamples((ProfBuffer
*) objp
);
1306 found_pcsamples
= TRUE
;
1310 (void) fprintf(stderr
,
1311 "%s: unknown prof object type=%d\n",
1312 whoami
, objp
->type
);
1315 /* LINTED: pointer cast */
1316 objp
= (ProfObject
*)((caddr_t
)objp
+ objp
->size
);
1319 if (!found_cgraph
|| !found_pcsamples
) {
1320 (void) fprintf(stderr
,
1321 "%s: missing callgraph/pcsamples object\n", whoami
);
1325 if ((caddr_t
)objp
> file_end
) {
1326 (void) fprintf(stderr
, "%s: malformed profile file.\n", whoami
);
1335 readarcs(FILE *pfile
)
1338 * the rest of the file consists of
1339 * a bunch of <from,self,count> tuples.
1347 L_cgarc64 rtld_arc64
;
1350 * If rflag is set then this is an profiled
1351 * image generated by rtld. It needs to be
1352 * 'converted' to the standard data format.
1354 if (fread(&rtld_arc64
,
1355 sizeof (L_cgarc64
), 1, pfile
) != 1)
1358 if (rtld_arc64
.cg_from
== PRF_OUTADDR64
)
1359 arc
.raw_frompc
= s_highpc
+ 0x10;
1362 (pctype
)rtld_arc64
.cg_from
;
1363 arc
.raw_selfpc
= (pctype
)rtld_arc64
.cg_to
;
1364 arc
.raw_count
= (actype
)rtld_arc64
.cg_count
;
1369 * If rflag is set then this is an profiled
1370 * image generated by rtld. It needs to be
1371 * 'converted' to the standard data format.
1373 if (fread(&rtld_arc
,
1374 sizeof (L_cgarc
), 1, pfile
) != 1)
1377 if (rtld_arc
.cg_from
== PRF_OUTADDR
)
1378 arc
.raw_frompc
= s_highpc
+ 0x10;
1380 arc
.raw_frompc
= (pctype
)
1381 (uintptr_t)rtld_arc
.cg_from
;
1382 arc
.raw_selfpc
= (pctype
)
1383 (uintptr_t)rtld_arc
.cg_to
;
1384 arc
.raw_count
= (actype
)rtld_arc
.cg_count
;
1388 if (fread(&arc
, sizeof (struct rawarc
), 1,
1394 * If these aren't big %pc's, we need to read
1395 * into the 32-bit raw arc structure, and
1396 * assign the members into the actual arc.
1398 struct rawarc32 arc32
;
1399 if (fread(&arc32
, sizeof (struct rawarc32
),
1402 arc
.raw_frompc
= (pctype
)arc32
.raw_frompc
;
1403 arc
.raw_selfpc
= (pctype
)arc32
.raw_selfpc
;
1404 arc
.raw_count
= (actype
)arc32
.raw_count
;
1409 if (debug
& SAMPLEDEBUG
) {
1410 (void) printf("[getpfile] frompc 0x%llx selfpc "
1411 "0x%llx count %lld\n", arc
.raw_frompc
,
1412 arc
.raw_selfpc
, arc
.raw_count
);
1418 tally(&modules
, &modules
, &arc
);
1425 readsamples(FILE *pfile
)
1428 unsigned_UNIT sample
;
1431 samples
= (unsigned_UNIT
*) calloc(nsamples
,
1432 sizeof (unsigned_UNIT
));
1434 (void) fprintf(stderr
,
1435 "%s: No room for %d sample pc's\n",
1436 whoami
, sampbytes
/ sizeof (unsigned_UNIT
));
1441 for (i
= 0; i
< nsamples
; i
++) {
1442 (void) fread(&sample
, sizeof (unsigned_UNIT
), 1, pfile
);
1445 samples
[i
] += sample
;
1447 if (i
!= nsamples
) {
1448 (void) fprintf(stderr
,
1449 "%s: unexpected EOF after reading %d/%d samples\n",
1450 whoami
, --i
, nsamples
);
1456 handle_versioned(FILE *pfile
, char *filename
, size_t *fsz
)
1459 bool invalid_version
;
1462 ProfHeader prof_hdr
;
1466 * Check versioning info. For now, let's say we provide
1467 * backward compatibility, so we accept all older versions.
1469 if (fread(&prof_hdr
, sizeof (ProfHeader
), 1, pfile
) == 0) {
1474 invalid_version
= FALSE
;
1475 if (prof_hdr
.h_major_ver
> PROF_MAJOR_VERSION
)
1476 invalid_version
= TRUE
;
1477 else if (prof_hdr
.h_major_ver
== PROF_MAJOR_VERSION
) {
1478 if (prof_hdr
.h_minor_ver
> PROF_MINOR_VERSION
)
1479 invalid_version
= FALSE
;
1482 if (invalid_version
) {
1483 (void) fprintf(stderr
, "%s: version %d.%d not supported\n",
1484 whoami
, prof_hdr
.h_major_ver
, prof_hdr
.h_minor_ver
);
1489 * Map gmon.out onto memory.
1491 (void) fclose(pfile
);
1492 if ((fd
= open(filename
, O_RDONLY
)) == -1) {
1497 if ((lret
= lseek(fd
, 0, SEEK_END
)) == -1) {
1503 fmem
= mmap(NULL
, *fsz
, PROT_READ
, MAP_PRIVATE
, fd
, 0);
1504 if (fmem
== MAP_FAILED
) {
1505 (void) fprintf(stderr
, "%s: can't map %s\n", whoami
, filename
);
1510 * Before we close this fd, save this gmon.out's info to later verify
1511 * if the shared objects it references have changed since the time
1512 * they were used to generate this gmon.out
1514 if (fstat(fd
, &buf
) == -1) {
1515 (void) fprintf(stderr
, "%s: can't get info on `%s'\n",
1519 gmonout_info
.dev
= buf
.st_dev
;
1520 gmonout_info
.ino
= buf
.st_ino
;
1521 gmonout_info
.mtime
= buf
.st_mtime
;
1522 gmonout_info
.size
= buf
.st_size
;
1526 return ((void *) fmem
);
1530 openpfile(char *filename
, size_t *fsz
)
1534 unsigned long magic_num
;
1536 static bool first_time
= TRUE
;
1537 extern bool old_style
;
1539 if ((pfile
= fopen(filename
, "r")) == NULL
) {
1545 * Read in the magic. Note that we changed the cast "unsigned long"
1546 * to "unsigned int" because that's how h_magic is defined in the
1547 * new format ProfHeader.
1549 if (fread(&magic_num
, sizeof (unsigned int), 1, pfile
) == 0) {
1557 * First check if this is versioned or *old-style* gmon.out
1559 if (magic_num
== (unsigned int)PROF_MAGIC
) {
1560 if ((!first_time
) && (old_style
== TRUE
)) {
1561 (void) fprintf(stderr
, "%s: can't mix old & new format "
1562 "profiled files\n", whoami
);
1567 return (handle_versioned(pfile
, filename
, fsz
));
1570 if ((!first_time
) && (old_style
== FALSE
)) {
1571 (void) fprintf(stderr
, "%s: can't mix old & new format "
1572 "profiled files\n", whoami
);
1581 * Now, we need to determine if this is a run-time linker
1582 * profiled file or if it is a standard gmon.out.
1584 * We do this by checking if magic matches PRF_MAGIC. If it
1585 * does, then this is a run-time linker profiled file, if it
1586 * doesn't, it must be a gmon.out file.
1588 if (magic_num
== (unsigned long)PRF_MAGIC
)
1593 hdrsize
= Bflag
? sizeof (struct hdr
) : sizeof (struct hdr32
);
1600 * If the rflag is set then the input file is
1601 * rtld profiled data, we'll read it in and convert
1602 * it to the standard format (ie: make it look like
1605 if (fread(&l_hdr64
, sizeof (L_hdr64
), 1, pfile
) == 0) {
1609 if (l_hdr64
.hd_version
!= PRF_VERSION_64
) {
1610 (void) fprintf(stderr
,
1611 "%s: expected version %d, "
1612 "got version %d when processing 64-bit "
1613 "run-time linker profiled file.\n",
1614 whoami
, PRF_VERSION_64
, l_hdr64
.hd_version
);
1618 tmp
.highpc
= (pctype
)l_hdr64
.hd_hpc
;
1619 tmp
.ncnt
= hdrsize
+ l_hdr64
.hd_psize
;
1624 * If the rflag is set then the input file is
1625 * rtld profiled data, we'll read it in and convert
1626 * it to the standard format (ie: make it look like
1629 if (fread(&l_hdr
, sizeof (L_hdr
), 1, pfile
) == 0) {
1633 if (l_hdr
.hd_version
!= PRF_VERSION
) {
1634 (void) fprintf(stderr
,
1635 "%s: expected version %d, "
1636 "got version %d when processing "
1637 "run-time linker profiled file.\n",
1638 whoami
, PRF_VERSION
, l_hdr
.hd_version
);
1642 tmp
.highpc
= (pctype
)(uintptr_t)l_hdr
.hd_hpc
;
1643 tmp
.ncnt
= hdrsize
+ l_hdr
.hd_psize
;
1647 if (fread(&tmp
, sizeof (struct hdr
), 1, pfile
) == 0) {
1653 * If we're not reading big %pc's, we need to read
1654 * the 32-bit header, and assign the members to
1655 * the actual header.
1658 if (fread(&hdr32
, sizeof (hdr32
), 1, pfile
) == 0) {
1662 tmp
.lowpc
= hdr32
.lowpc
;
1663 tmp
.highpc
= hdr32
.highpc
;
1664 tmp
.ncnt
= hdr32
.ncnt
;
1669 * perform sanity check on profiled file we've opened.
1671 if (tmp
.lowpc
>= tmp
.highpc
) {
1673 (void) fprintf(stderr
,
1674 "%s: badly formed profiled data.\n",
1677 (void) fprintf(stderr
,
1678 "%s: badly formed gmon.out file.\n",
1683 if (s_highpc
!= 0 && (tmp
.lowpc
!= h
.lowpc
||
1684 tmp
.highpc
!= h
.highpc
|| tmp
.ncnt
!= h
.ncnt
)) {
1685 (void) fprintf(stderr
,
1686 "%s: incompatible with first gmon file\n",
1692 s_highpc
= h
.highpc
;
1693 lowpc
= h
.lowpc
/ sizeof (UNIT
);
1694 highpc
= h
.highpc
/ sizeof (UNIT
);
1695 sampbytes
= h
.ncnt
> hdrsize
? h
.ncnt
- hdrsize
: 0;
1696 nsamples
= sampbytes
/ sizeof (unsigned_UNIT
);
1699 if (debug
& SAMPLEDEBUG
) {
1700 (void) printf("[openpfile] hdr.lowpc 0x%llx hdr.highpc "
1701 "0x%llx hdr.ncnt %lld\n",
1702 h
.lowpc
, h
.highpc
, h
.ncnt
);
1704 "[openpfile] s_lowpc 0x%llx s_highpc 0x%llx\n",
1707 "[openpfile] lowpc 0x%llx highpc 0x%llx\n",
1709 (void) printf("[openpfile] sampbytes %d nsamples %d\n",
1710 sampbytes
, nsamples
);
1714 return ((void *) pfile
);
1718 * Information from a gmon.out file depends on whether it's versioned
1719 * or non-versioned, *old style* gmon.out. If old-style, it is in two
1720 * parts : an array of sampling hits within pc ranges, and the arcs. If
1721 * versioned, it contains a header, followed by any number of
1722 * modules/callgraph/pcsample_buffer objects.
1725 getpfile(char *filename
)
1730 handle
= openpfile(filename
, &fsz
);
1733 readsamples((FILE *)handle
);
1734 readarcs((FILE *)handle
);
1735 (void) fclose((FILE *)handle
);
1739 getpfiledata((caddr_t
)handle
, fsz
);
1740 (void) munmap(handle
, fsz
);
1744 main(int argc
, char **argv
)
1747 nltype
**timesortnlp
;
1751 prog_name
= *argv
; /* preserve program name */
1762 while ((c
= getopt(argc
, argv
, "abd:CcDE:e:F:f:ln:sz")) != EOF
)
1778 debug
|= atoi(optarg
);
1779 (void) printf("[main] debug = 0x%x\n", debug
);
1785 addlist(Elist
, optarg
);
1787 addlist(elist
, optarg
);
1791 addlist(elist
, optarg
);
1795 addlist(Flist
, optarg
);
1797 addlist(flist
, optarg
);
1801 addlist(flist
, optarg
);
1809 number_funcs_toprint
= atoi(optarg
);
1823 (void) fprintf(stderr
,
1824 "usage: gprof [ -abcCDlsz ] [ -e function-name ] "
1825 "[ -E function-name ]\n\t[ -f function-name ] "
1826 "[ -F function-name ]\n\t[ image-file "
1827 "[ profile-file ... ] ]\n");
1831 if (optind
< argc
) {
1832 a_outname
= argv
[optind
++];
1834 a_outname
= A_OUTNAME
;
1836 if (optind
< argc
) {
1837 gmonname
= argv
[optind
++];
1839 gmonname
= GMONNAME
;
1842 * turn off default functions
1844 for (sp
= &defaultEs
[0]; *sp
; sp
++) {
1846 addlist(Elist
, *sp
);
1848 addlist(elist
, *sp
);
1851 * how many ticks per second?
1852 * if we can't tell, report time in ticks.
1854 hz
= sysconf(_SC_CLK_TCK
);
1857 (void) fprintf(stderr
, "time is in ticks, not seconds\n");
1860 getnfile(a_outname
);
1863 * get information about mon.out file(s).
1868 gmonname
= argv
[optind
++];
1871 } while (optind
<= argc
);
1873 * dump out a gmon.sum file if requested
1880 * assign samples to procedures
1886 * assemble the dynamic profile
1888 timesortnlp
= doarcs();
1891 * print the dynamic profile
1894 if (debug
& ANYDEBUG
) {
1895 /* raw output of all symbols in all their glory */
1897 (void) printf(" Name, pc_entry_pt, svalue, tix_in_routine, "
1898 "#calls, selfcalls, index \n");
1899 for (i
= 0; i
< modules
.nname
; i
++) { /* Print each symbol */
1900 if (timesortnlp
[i
]->name
)
1901 (void) printf(" %s ", timesortnlp
[i
]->name
);
1903 (void) printf(" <cycle> ");
1904 (void) printf(" %lld ", timesortnlp
[i
]->value
);
1905 (void) printf(" %lld ", timesortnlp
[i
]->svalue
);
1906 (void) printf(" %f ", timesortnlp
[i
]->time
);
1907 (void) printf(" %lld ", timesortnlp
[i
]->ncall
);
1908 (void) printf(" %lld ", timesortnlp
[i
]->selfcalls
);
1909 (void) printf(" %d ", timesortnlp
[i
]->index
);
1910 (void) printf(" \n");
1915 printgprof(timesortnlp
);
1917 * print the flat profile