Ignore machine-check MSRs
[freebsd-src/fkvm-freebsd.git] / usr.sbin / pmcstat / pmcstat_log.c
blobd895ab70b51cab364ae0aecadfc0eb21e3559518
1 /*-
2 * Copyright (c) 2005-2007, Joseph Koshy
3 * Copyright (c) 2007 The FreeBSD Foundation
4 * All rights reserved.
6 * Portions of this software were developed by A. Joseph Koshy under
7 * sponsorship from the FreeBSD Foundation and Google, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
32 * Transform a hwpmc(4) log into human readable form, and into
33 * gprof(1) compatible profiles.
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
39 #include <sys/param.h>
40 #include <sys/endian.h>
41 #include <sys/gmon.h>
42 #include <sys/imgact_aout.h>
43 #include <sys/imgact_elf.h>
44 #include <sys/mman.h>
45 #include <sys/pmc.h>
46 #include <sys/queue.h>
47 #include <sys/socket.h>
48 #include <sys/stat.h>
49 #include <sys/wait.h>
51 #include <netinet/in.h>
53 #include <assert.h>
54 #include <err.h>
55 #include <errno.h>
56 #include <fcntl.h>
57 #include <gelf.h>
58 #include <libgen.h>
59 #include <limits.h>
60 #include <netdb.h>
61 #include <pmc.h>
62 #include <pmclog.h>
63 #include <sysexits.h>
64 #include <stdint.h>
65 #include <stdio.h>
66 #include <stdlib.h>
67 #include <string.h>
68 #include <unistd.h>
70 #include "pmcstat.h"
72 #define min(A,B) ((A) < (B) ? (A) : (B))
73 #define max(A,B) ((A) > (B) ? (A) : (B))
75 #define PMCSTAT_ALLOCATE 1
78 * PUBLIC INTERFACES
80 * pmcstat_initialize_logging() initialize this module, called first
81 * pmcstat_shutdown_logging() orderly shutdown, called last
82 * pmcstat_open_log() open an eventlog for processing
83 * pmcstat_process_log() print/convert an event log
84 * pmcstat_close_log() finish processing an event log
86 * IMPLEMENTATION NOTES
88 * We correlate each 'callchain' or 'sample' entry seen in the event
89 * log back to an executable object in the system. Executable objects
90 * include:
91 * - program executables,
92 * - shared libraries loaded by the runtime loader,
93 * - dlopen()'ed objects loaded by the program,
94 * - the runtime loader itself,
95 * - the kernel and kernel modules.
97 * Each process that we know about is treated as a set of regions that
98 * map to executable objects. Processes are described by
99 * 'pmcstat_process' structures. Executable objects are tracked by
100 * 'pmcstat_image' structures. The kernel and kernel modules are
101 * common to all processes (they reside at the same virtual addresses
102 * for all processes). Individual processes can have their text
103 * segments and shared libraries loaded at process-specific locations.
105 * A given executable object can be in use by multiple processes
106 * (e.g., libc.so) and loaded at a different address in each.
107 * pmcstat_pcmap structures track per-image mappings.
109 * The sample log could have samples from multiple PMCs; we
110 * generate one 'gmon.out' profile per PMC.
112 * IMPLEMENTATION OF GMON OUTPUT
114 * Each executable object gets one 'gmon.out' profile, per PMC in
115 * use. Creation of 'gmon.out' profiles is done lazily. The
116 * 'gmon.out' profiles generated for a given sampling PMC are
117 * aggregates of all the samples for that particular executable
118 * object.
120 * IMPLEMENTATION OF SYSTEM-WIDE CALLGRAPH OUTPUT
122 * Each active pmcid has its own callgraph structure, described by a
123 * 'struct pmcstat_callgraph'. Given a process id and a list of pc
124 * values, we map each pc value to a tuple (image, symbol), where
125 * 'image' denotes an executable object and 'symbol' is the closest
126 * symbol that precedes the pc value. Each pc value in the list is
127 * also given a 'rank' that reflects its depth in the call stack.
130 typedef const void *pmcstat_interned_string;
133 * 'pmcstat_pmcrecord' is a mapping from PMC ids to human-readable
134 * names.
137 struct pmcstat_pmcrecord {
138 LIST_ENTRY(pmcstat_pmcrecord) pr_next;
139 pmc_id_t pr_pmcid;
140 pmcstat_interned_string pr_pmcname;
143 static LIST_HEAD(,pmcstat_pmcrecord) pmcstat_pmcs =
144 LIST_HEAD_INITIALIZER(&pmcstat_pmcs);
148 * struct pmcstat_gmonfile tracks a given 'gmon.out' file. These
149 * files are mmap()'ed in as needed.
152 struct pmcstat_gmonfile {
153 LIST_ENTRY(pmcstat_gmonfile) pgf_next; /* list of entries */
154 int pgf_overflow; /* whether a count overflowed */
155 pmc_id_t pgf_pmcid; /* id of the associated pmc */
156 size_t pgf_nbuckets; /* #buckets in this gmon.out */
157 unsigned int pgf_nsamples; /* #samples in this gmon.out */
158 pmcstat_interned_string pgf_name; /* pathname of gmon.out file */
159 size_t pgf_ndatabytes; /* number of bytes mapped */
160 void *pgf_gmondata; /* pointer to mmap'ed data */
161 FILE *pgf_file; /* used when writing gmon arcs */
165 * A 'pmcstat_image' structure describes an executable program on
166 * disk. 'pi_execpath' is a cookie representing the pathname of
167 * the executable. 'pi_start' and 'pi_end' are the least and greatest
168 * virtual addresses for the text segments in the executable.
169 * 'pi_gmonlist' contains a linked list of gmon.out files associated
170 * with this image.
173 enum pmcstat_image_type {
174 PMCSTAT_IMAGE_UNKNOWN = 0, /* never looked at the image */
175 PMCSTAT_IMAGE_INDETERMINABLE, /* can't tell what the image is */
176 PMCSTAT_IMAGE_ELF32, /* ELF 32 bit object */
177 PMCSTAT_IMAGE_ELF64, /* ELF 64 bit object */
178 PMCSTAT_IMAGE_AOUT /* AOUT object */
181 struct pmcstat_image {
182 LIST_ENTRY(pmcstat_image) pi_next; /* hash link */
183 TAILQ_ENTRY(pmcstat_image) pi_lru; /* LRU list */
184 pmcstat_interned_string pi_execpath; /* cookie */
185 pmcstat_interned_string pi_samplename; /* sample path name */
186 pmcstat_interned_string pi_fullpath; /* path to FS object */
188 enum pmcstat_image_type pi_type; /* executable type */
191 * Executables have pi_start and pi_end; these are zero
192 * for shared libraries.
194 uintfptr_t pi_start; /* start address (inclusive) */
195 uintfptr_t pi_end; /* end address (exclusive) */
196 uintfptr_t pi_entry; /* entry address */
197 uintfptr_t pi_vaddr; /* virtual address where loaded */
198 int pi_isdynamic; /* whether a dynamic object */
199 int pi_iskernelmodule;
200 pmcstat_interned_string pi_dynlinkerpath; /* path in .interp */
202 /* All symbols associated with this object. */
203 struct pmcstat_symbol *pi_symbols;
204 size_t pi_symcount;
207 * An image can be associated with one or more gmon.out files;
208 * one per PMC.
210 LIST_HEAD(,pmcstat_gmonfile) pi_gmlist;
214 * All image descriptors are kept in a hash table.
216 static LIST_HEAD(,pmcstat_image) pmcstat_image_hash[PMCSTAT_NHASH];
219 * A 'pmcstat_pcmap' structure maps a virtual address range to an
220 * underlying 'pmcstat_image' descriptor.
222 struct pmcstat_pcmap {
223 TAILQ_ENTRY(pmcstat_pcmap) ppm_next;
224 uintfptr_t ppm_lowpc;
225 uintfptr_t ppm_highpc;
226 struct pmcstat_image *ppm_image;
230 * A 'pmcstat_process' structure models processes. Each process is
231 * associated with a set of pmcstat_pcmap structures that map
232 * addresses inside it to executable objects. This set is implemented
233 * as a list, kept sorted in ascending order of mapped addresses.
235 * 'pp_pid' holds the pid of the process. When a process exits, the
236 * 'pp_isactive' field is set to zero, but the process structure is
237 * not immediately reclaimed because there may still be samples in the
238 * log for this process.
241 struct pmcstat_process {
242 LIST_ENTRY(pmcstat_process) pp_next; /* hash-next */
243 pid_t pp_pid; /* associated pid */
244 int pp_isactive; /* whether active */
245 uintfptr_t pp_entryaddr; /* entry address */
246 TAILQ_HEAD(,pmcstat_pcmap) pp_map; /* address range map */
250 * All process descriptors are kept in a hash table.
252 static LIST_HEAD(,pmcstat_process) pmcstat_process_hash[PMCSTAT_NHASH];
254 static struct pmcstat_process *pmcstat_kernproc; /* kernel 'process' */
257 * Each function symbol tracked by pmcstat(8).
260 struct pmcstat_symbol {
261 pmcstat_interned_string ps_name;
262 uint64_t ps_start;
263 uint64_t ps_end;
267 * Each call graph node is tracked by a pmcstat_cgnode struct.
270 struct pmcstat_cgnode {
271 struct pmcstat_image *pcg_image;
272 uintfptr_t pcg_func;
273 uint32_t pcg_count;
274 uint32_t pcg_nchildren;
275 LIST_ENTRY(pmcstat_cgnode) pcg_sibling;
276 LIST_HEAD(,pmcstat_cgnode) pcg_children;
279 struct pmcstat_cgnode_hash {
280 struct pmcstat_cgnode *pch_cgnode;
281 uint32_t pch_pmcid;
282 LIST_ENTRY(pmcstat_cgnode_hash) pch_next;
285 static int pmcstat_cgnode_hash_count;
286 static pmcstat_interned_string pmcstat_previous_filename_printed;
289 * The toplevel CG nodes (i.e., with rank == 0) are placed in a hash table.
292 static LIST_HEAD(,pmcstat_cgnode_hash) pmcstat_cgnode_hash[PMCSTAT_NHASH];
294 /* Misc. statistics */
295 static struct pmcstat_stats {
296 int ps_exec_aout; /* # a.out executables seen */
297 int ps_exec_elf; /* # elf executables seen */
298 int ps_exec_errors; /* # errors processing executables */
299 int ps_exec_indeterminable; /* # unknown executables seen */
300 int ps_samples_total; /* total number of samples processed */
301 int ps_samples_skipped; /* #samples filtered out for any reason */
302 int ps_samples_unknown_offset; /* #samples of rank 0 not in a map */
303 int ps_samples_indeterminable; /* #samples in indeterminable images */
304 int ps_callchain_dubious_frames;/* #dubious frame pointers seen */
305 } pmcstat_stats;
309 * Prototypes
312 static void pmcstat_gmon_create_file(struct pmcstat_gmonfile *_pgf,
313 struct pmcstat_image *_image);
314 static pmcstat_interned_string pmcstat_gmon_create_name(const char *_sd,
315 struct pmcstat_image *_img, pmc_id_t _pmcid);
316 static void pmcstat_gmon_map_file(struct pmcstat_gmonfile *_pgf);
317 static void pmcstat_gmon_unmap_file(struct pmcstat_gmonfile *_pgf);
319 static void pmcstat_image_determine_type(struct pmcstat_image *_image,
320 struct pmcstat_args *_a);
321 static struct pmcstat_gmonfile *pmcstat_image_find_gmonfile(struct
322 pmcstat_image *_i, pmc_id_t _id);
323 static struct pmcstat_image *pmcstat_image_from_path(pmcstat_interned_string
324 _path, int _iskernelmodule);
325 static void pmcstat_image_get_aout_params(struct pmcstat_image *_image,
326 struct pmcstat_args *_a);
327 static void pmcstat_image_get_elf_params(struct pmcstat_image *_image,
328 struct pmcstat_args *_a);
329 static void pmcstat_image_increment_bucket(struct pmcstat_pcmap *_pcm,
330 uintfptr_t _pc, pmc_id_t _pmcid, struct pmcstat_args *_a);
331 static void pmcstat_image_link(struct pmcstat_process *_pp,
332 struct pmcstat_image *_i, uintfptr_t _lpc);
334 static void pmcstat_pmcid_add(pmc_id_t _pmcid,
335 pmcstat_interned_string _name, struct pmcstat_args *_a);
336 static const char *pmcstat_pmcid_to_name(pmc_id_t _pmcid);
338 static void pmcstat_process_aout_exec(struct pmcstat_process *_pp,
339 struct pmcstat_image *_image, uintfptr_t _entryaddr,
340 struct pmcstat_args *_a);
341 static void pmcstat_process_elf_exec(struct pmcstat_process *_pp,
342 struct pmcstat_image *_image, uintfptr_t _entryaddr,
343 struct pmcstat_args *_a);
344 static void pmcstat_process_exec(struct pmcstat_process *_pp,
345 pmcstat_interned_string _path, uintfptr_t _entryaddr,
346 struct pmcstat_args *_ao);
347 static struct pmcstat_process *pmcstat_process_lookup(pid_t _pid,
348 int _allocate);
349 static struct pmcstat_pcmap *pmcstat_process_find_map(
350 struct pmcstat_process *_p, uintfptr_t _pc);
352 static int pmcstat_string_compute_hash(const char *_string);
353 static void pmcstat_string_initialize(void);
354 static pmcstat_interned_string pmcstat_string_intern(const char *_s);
355 static pmcstat_interned_string pmcstat_string_lookup(const char *_s);
356 static int pmcstat_string_lookup_hash(pmcstat_interned_string _is);
357 static void pmcstat_string_shutdown(void);
358 static const char *pmcstat_string_unintern(pmcstat_interned_string _is);
362 * A simple implementation of interned strings. Each interned string
363 * is assigned a unique address, so that subsequent string compares
364 * can be done by a simple pointer comparision instead of using
365 * strcmp(). This speeds up hash table lookups and saves memory if
366 * duplicate strings are the norm.
368 struct pmcstat_string {
369 LIST_ENTRY(pmcstat_string) ps_next; /* hash link */
370 int ps_len;
371 int ps_hash;
372 char *ps_string;
375 static LIST_HEAD(,pmcstat_string) pmcstat_string_hash[PMCSTAT_NHASH];
378 * Compute a 'hash' value for a string.
381 static int
382 pmcstat_string_compute_hash(const char *s)
384 int hash;
386 for (hash = 0; *s; s++)
387 hash ^= *s;
389 return (hash & PMCSTAT_HASH_MASK);
393 * Intern a copy of string 's', and return a pointer to the
394 * interned structure.
397 static pmcstat_interned_string
398 pmcstat_string_intern(const char *s)
400 struct pmcstat_string *ps;
401 const struct pmcstat_string *cps;
402 int hash, len;
404 if ((cps = pmcstat_string_lookup(s)) != NULL)
405 return (cps);
407 hash = pmcstat_string_compute_hash(s);
408 len = strlen(s);
410 if ((ps = malloc(sizeof(*ps))) == NULL)
411 err(EX_OSERR, "ERROR: Could not intern string");
412 ps->ps_len = len;
413 ps->ps_hash = hash;
414 ps->ps_string = strdup(s);
415 LIST_INSERT_HEAD(&pmcstat_string_hash[hash], ps, ps_next);
416 return ((pmcstat_interned_string) ps);
419 static const char *
420 pmcstat_string_unintern(pmcstat_interned_string str)
422 const char *s;
424 s = ((const struct pmcstat_string *) str)->ps_string;
425 return (s);
428 static pmcstat_interned_string
429 pmcstat_string_lookup(const char *s)
431 struct pmcstat_string *ps;
432 int hash, len;
434 hash = pmcstat_string_compute_hash(s);
435 len = strlen(s);
437 LIST_FOREACH(ps, &pmcstat_string_hash[hash], ps_next)
438 if (ps->ps_len == len && ps->ps_hash == hash &&
439 strcmp(ps->ps_string, s) == 0)
440 return (ps);
441 return (NULL);
444 static int
445 pmcstat_string_lookup_hash(pmcstat_interned_string s)
447 const struct pmcstat_string *ps;
449 ps = (const struct pmcstat_string *) s;
450 return (ps->ps_hash);
454 * Initialize the string interning facility.
457 static void
458 pmcstat_string_initialize(void)
460 int i;
462 for (i = 0; i < PMCSTAT_NHASH; i++)
463 LIST_INIT(&pmcstat_string_hash[i]);
467 * Destroy the string table, free'ing up space.
470 static void
471 pmcstat_string_shutdown(void)
473 int i;
474 struct pmcstat_string *ps, *pstmp;
476 for (i = 0; i < PMCSTAT_NHASH; i++)
477 LIST_FOREACH_SAFE(ps, &pmcstat_string_hash[i], ps_next,
478 pstmp) {
479 LIST_REMOVE(ps, ps_next);
480 free(ps->ps_string);
481 free(ps);
486 * Create a gmon.out file and size it.
489 static void
490 pmcstat_gmon_create_file(struct pmcstat_gmonfile *pgf,
491 struct pmcstat_image *image)
493 int fd;
494 size_t count;
495 struct gmonhdr gm;
496 const char *pathname;
497 char buffer[DEFAULT_BUFFER_SIZE];
499 pathname = pmcstat_string_unintern(pgf->pgf_name);
500 if ((fd = open(pathname, O_RDWR|O_NOFOLLOW|O_CREAT,
501 S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0)
502 err(EX_OSERR, "ERROR: Cannot open \"%s\"", pathname);
504 gm.lpc = image->pi_start;
505 gm.hpc = image->pi_end;
506 gm.ncnt = (pgf->pgf_nbuckets * sizeof(HISTCOUNTER)) +
507 sizeof(struct gmonhdr);
508 gm.version = GMONVERSION;
509 gm.profrate = 0; /* use ticks */
510 gm.histcounter_type = 0; /* compatibility with moncontrol() */
511 gm.spare[0] = gm.spare[1] = 0;
513 /* Write out the gmon header */
514 if (write(fd, &gm, sizeof(gm)) < 0)
515 goto error;
517 /* Zero fill the samples[] array */
518 (void) memset(buffer, 0, sizeof(buffer));
520 count = pgf->pgf_ndatabytes - sizeof(struct gmonhdr);
521 while (count > sizeof(buffer)) {
522 if (write(fd, &buffer, sizeof(buffer)) < 0)
523 goto error;
524 count -= sizeof(buffer);
527 if (write(fd, &buffer, count) < 0)
528 goto error;
530 (void) close(fd);
532 return;
534 error:
535 err(EX_OSERR, "ERROR: Cannot write \"%s\"", pathname);
539 * Determine the full pathname of a gmon.out file for a given
540 * (image,pmcid) combination. Return the interned string.
543 pmcstat_interned_string
544 pmcstat_gmon_create_name(const char *samplesdir, struct pmcstat_image *image,
545 pmc_id_t pmcid)
547 const char *pmcname;
548 char fullpath[PATH_MAX];
550 pmcname = pmcstat_pmcid_to_name(pmcid);
552 (void) snprintf(fullpath, sizeof(fullpath),
553 "%s/%s/%s", samplesdir, pmcname,
554 pmcstat_string_unintern(image->pi_samplename));
556 return (pmcstat_string_intern(fullpath));
561 * Mmap in a gmon.out file for processing.
564 static void
565 pmcstat_gmon_map_file(struct pmcstat_gmonfile *pgf)
567 int fd;
568 const char *pathname;
570 pathname = pmcstat_string_unintern(pgf->pgf_name);
572 /* the gmon.out file must already exist */
573 if ((fd = open(pathname, O_RDWR | O_NOFOLLOW, 0)) < 0)
574 err(EX_OSERR, "ERROR: cannot open \"%s\"", pathname);
576 pgf->pgf_gmondata = mmap(NULL, pgf->pgf_ndatabytes,
577 PROT_READ|PROT_WRITE, MAP_NOSYNC|MAP_SHARED, fd, 0);
579 if (pgf->pgf_gmondata == MAP_FAILED)
580 err(EX_OSERR, "ERROR: cannot map \"%s\"", pathname);
582 (void) close(fd);
586 * Unmap a gmon.out file after sync'ing its data to disk.
589 static void
590 pmcstat_gmon_unmap_file(struct pmcstat_gmonfile *pgf)
592 (void) msync(pgf->pgf_gmondata, pgf->pgf_ndatabytes,
593 MS_SYNC);
594 (void) munmap(pgf->pgf_gmondata, pgf->pgf_ndatabytes);
595 pgf->pgf_gmondata = NULL;
598 static void
599 pmcstat_gmon_append_arc(struct pmcstat_image *image, pmc_id_t pmcid,
600 uintptr_t rawfrom, uintptr_t rawto, uint32_t count)
602 struct rawarc arc; /* from <sys/gmon.h> */
603 const char *pathname;
604 struct pmcstat_gmonfile *pgf;
606 if ((pgf = pmcstat_image_find_gmonfile(image, pmcid)) == NULL)
607 return;
609 if (pgf->pgf_file == NULL) {
610 pathname = pmcstat_string_unintern(pgf->pgf_name);
611 if ((pgf->pgf_file = fopen(pathname, "a")) == NULL)
612 return;
615 arc.raw_frompc = rawfrom + image->pi_vaddr;
616 arc.raw_selfpc = rawto + image->pi_vaddr;
617 arc.raw_count = count;
619 (void) fwrite(&arc, sizeof(arc), 1, pgf->pgf_file);
623 static struct pmcstat_gmonfile *
624 pmcstat_image_find_gmonfile(struct pmcstat_image *image, pmc_id_t pmcid)
626 struct pmcstat_gmonfile *pgf;
627 LIST_FOREACH(pgf, &image->pi_gmlist, pgf_next)
628 if (pgf->pgf_pmcid == pmcid)
629 return (pgf);
630 return (NULL);
635 * Determine whether a given executable image is an A.OUT object, and
636 * if so, fill in its parameters from the text file.
637 * Sets image->pi_type.
640 static void
641 pmcstat_image_get_aout_params(struct pmcstat_image *image,
642 struct pmcstat_args *a)
644 int fd;
645 ssize_t nbytes;
646 struct exec ex;
647 const char *path;
648 char buffer[PATH_MAX];
650 path = pmcstat_string_unintern(image->pi_execpath);
651 assert(path != NULL);
653 if (image->pi_iskernelmodule)
654 errx(EX_SOFTWARE, "ERROR: a.out kernel modules are "
655 "unsupported \"%s\"", path);
657 (void) snprintf(buffer, sizeof(buffer), "%s%s",
658 a->pa_fsroot, path);
660 if ((fd = open(buffer, O_RDONLY, 0)) < 0 ||
661 (nbytes = read(fd, &ex, sizeof(ex))) < 0) {
662 warn("WARNING: Cannot determine type of \"%s\"", path);
663 image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE;
664 if (fd != -1)
665 (void) close(fd);
666 return;
669 (void) close(fd);
671 if ((unsigned) nbytes != sizeof(ex) ||
672 N_BADMAG(ex))
673 return;
675 image->pi_type = PMCSTAT_IMAGE_AOUT;
677 /* TODO: the rest of a.out processing */
679 return;
683 * Helper function.
686 static int
687 pmcstat_symbol_compare(const void *a, const void *b)
689 const struct pmcstat_symbol *sym1, *sym2;
691 sym1 = (const struct pmcstat_symbol *) a;
692 sym2 = (const struct pmcstat_symbol *) b;
694 if (sym1->ps_end <= sym2->ps_start)
695 return (-1);
696 if (sym1->ps_start >= sym2->ps_end)
697 return (1);
698 return (0);
702 * Map an address to a symbol in an image.
705 static struct pmcstat_symbol *
706 pmcstat_symbol_search(struct pmcstat_image *image, uintfptr_t addr)
708 struct pmcstat_symbol sym;
710 if (image->pi_symbols == NULL)
711 return (NULL);
713 sym.ps_name = NULL;
714 sym.ps_start = addr;
715 sym.ps_end = addr + 1;
717 return (bsearch((void *) &sym, image->pi_symbols,
718 image->pi_symcount, sizeof(struct pmcstat_symbol),
719 pmcstat_symbol_compare));
723 * Add the list of symbols in the given section to the list associated
724 * with the object.
726 static void
727 pmcstat_image_add_symbols(struct pmcstat_image *image, Elf *e,
728 Elf_Scn *scn, GElf_Shdr *sh)
730 int firsttime;
731 size_t n, newsyms, nshsyms, nfuncsyms;
732 struct pmcstat_symbol *symptr;
733 char *fnname;
734 GElf_Sym sym;
735 Elf_Data *data;
737 if ((data = elf_getdata(scn, NULL)) == NULL)
738 return;
741 * Determine the number of functions named in this
742 * section.
745 nshsyms = sh->sh_size / sh->sh_entsize;
746 for (n = nfuncsyms = 0; n < nshsyms; n++) {
747 if (gelf_getsym(data, (int) n, &sym) != &sym)
748 return;
749 if (GELF_ST_TYPE(sym.st_info) == STT_FUNC)
750 nfuncsyms++;
753 if (nfuncsyms == 0)
754 return;
757 * Allocate space for the new entries.
759 firsttime = image->pi_symbols == NULL;
760 symptr = realloc(image->pi_symbols,
761 sizeof(*symptr) * (image->pi_symcount + nfuncsyms));
762 if (symptr == image->pi_symbols) /* realloc() failed. */
763 return;
764 image->pi_symbols = symptr;
767 * Append new symbols to the end of the current table.
769 symptr += image->pi_symcount;
771 for (n = newsyms = 0; n < nshsyms; n++) {
772 if (gelf_getsym(data, (int) n, &sym) != &sym)
773 return;
774 if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
775 continue;
777 if (!firsttime && pmcstat_symbol_search(image, sym.st_value))
778 continue; /* We've seen this symbol already. */
780 if ((fnname = elf_strptr(e, sh->sh_link, sym.st_name))
781 == NULL)
782 continue;
784 symptr->ps_name = pmcstat_string_intern(fnname);
785 symptr->ps_start = sym.st_value - image->pi_vaddr;
786 symptr->ps_end = symptr->ps_start + sym.st_size;
787 symptr++;
789 newsyms++;
792 image->pi_symcount += newsyms;
794 assert(newsyms <= nfuncsyms);
797 * Return space to the system if there were duplicates.
799 if (newsyms < nfuncsyms)
800 image->pi_symbols = realloc(image->pi_symbols,
801 sizeof(*symptr) * image->pi_symcount);
804 * Keep the list of symbols sorted.
806 qsort(image->pi_symbols, image->pi_symcount, sizeof(*symptr),
807 pmcstat_symbol_compare);
810 * Deal with function symbols that have a size of 'zero' by
811 * making them extend to the next higher address. These
812 * symbols are usually defined in assembly code.
814 for (symptr = image->pi_symbols;
815 symptr < image->pi_symbols + (image->pi_symcount - 1);
816 symptr++)
817 if (symptr->ps_start == symptr->ps_end)
818 symptr->ps_end = (symptr+1)->ps_start;
822 * Examine an ELF file to determine the size of its text segment.
823 * Sets image->pi_type if anything conclusive can be determined about
824 * this image.
827 static void
828 pmcstat_image_get_elf_params(struct pmcstat_image *image,
829 struct pmcstat_args *a)
831 int fd;
832 size_t i, nph, nsh;
833 const char *path, *elfbase;
834 uintfptr_t minva, maxva;
835 Elf *e;
836 Elf_Scn *scn;
837 GElf_Ehdr eh;
838 GElf_Phdr ph;
839 GElf_Shdr sh;
840 enum pmcstat_image_type image_type;
841 char buffer[PATH_MAX];
843 assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN);
845 image->pi_start = minva = ~(uintfptr_t) 0;
846 image->pi_end = maxva = (uintfptr_t) 0;
847 image->pi_type = image_type = PMCSTAT_IMAGE_INDETERMINABLE;
848 image->pi_isdynamic = 0;
849 image->pi_dynlinkerpath = NULL;
850 image->pi_vaddr = 0;
852 path = pmcstat_string_unintern(image->pi_execpath);
853 assert(path != NULL);
856 * Look for kernel modules under FSROOT/KERNELPATH/NAME,
857 * and user mode executable objects under FSROOT/PATHNAME.
859 if (image->pi_iskernelmodule)
860 (void) snprintf(buffer, sizeof(buffer), "%s%s/%s",
861 a->pa_fsroot, a->pa_kernel, path);
862 else
863 (void) snprintf(buffer, sizeof(buffer), "%s%s",
864 a->pa_fsroot, path);
866 e = NULL;
867 if ((fd = open(buffer, O_RDONLY, 0)) < 0 ||
868 (e = elf_begin(fd, ELF_C_READ, NULL)) == NULL ||
869 (elf_kind(e) != ELF_K_ELF)) {
870 warnx("WARNING: Cannot determine the type of \"%s\".",
871 buffer);
872 goto done;
875 if (gelf_getehdr(e, &eh) != &eh) {
876 warnx("WARNING: Cannot retrieve the ELF Header for "
877 "\"%s\": %s.", buffer, elf_errmsg(-1));
878 goto done;
881 if (eh.e_type != ET_EXEC && eh.e_type != ET_DYN &&
882 !(image->pi_iskernelmodule && eh.e_type == ET_REL)) {
883 warnx("WARNING: \"%s\" is of an unsupported ELF type.",
884 buffer);
885 goto done;
888 image_type = eh.e_ident[EI_CLASS] == ELFCLASS32 ?
889 PMCSTAT_IMAGE_ELF32 : PMCSTAT_IMAGE_ELF64;
892 * Determine the virtual address where an executable would be
893 * loaded. Additionally, for dynamically linked executables,
894 * save the pathname to the runtime linker.
896 if (eh.e_type == ET_EXEC) {
897 if (elf_getphnum(e, &nph) == 0) {
898 warnx("WARNING: Could not determine the number of "
899 "program headers in \"%s\": %s.", buffer,
900 elf_errmsg(-1));
901 goto done;
903 for (i = 0; i < eh.e_phnum; i++) {
904 if (gelf_getphdr(e, i, &ph) != &ph) {
905 warnx("WARNING: Retrieval of PHDR entry #%ju "
906 "in \"%s\" failed: %s.", (uintmax_t) i,
907 buffer, elf_errmsg(-1));
908 goto done;
910 switch (ph.p_type) {
911 case PT_DYNAMIC:
912 image->pi_isdynamic = 1;
913 break;
914 case PT_INTERP:
915 if ((elfbase = elf_rawfile(e, NULL)) == NULL) {
916 warnx("WARNING: Cannot retrieve the "
917 "interpreter for \"%s\": %s.",
918 buffer, elf_errmsg(-1));
919 goto done;
921 image->pi_dynlinkerpath =
922 pmcstat_string_intern(elfbase +
923 ph.p_offset);
924 break;
925 case PT_LOAD:
926 if (ph.p_offset == 0)
927 image->pi_vaddr = ph.p_vaddr;
928 break;
934 * Get the min and max VA associated with this ELF object.
936 if (elf_getshnum(e, &nsh) == 0) {
937 warnx("WARNING: Could not determine the number of sections "
938 "for \"%s\": %s.", buffer, elf_errmsg(-1));
939 goto done;
942 for (i = 0; i < nsh; i++) {
943 if ((scn = elf_getscn(e, i)) == NULL ||
944 gelf_getshdr(scn, &sh) != &sh) {
945 warnx("WARNING: Could not retrieve section header "
946 "#%ju in \"%s\": %s.", (uintmax_t) i, buffer,
947 elf_errmsg(-1));
948 goto done;
950 if (sh.sh_flags & SHF_EXECINSTR) {
951 minva = min(minva, sh.sh_addr);
952 maxva = max(maxva, sh.sh_addr + sh.sh_size);
954 if (sh.sh_type == SHT_SYMTAB || sh.sh_type == SHT_DYNSYM)
955 pmcstat_image_add_symbols(image, e, scn, &sh);
958 image->pi_start = minva;
959 image->pi_end = maxva;
960 image->pi_type = image_type;
961 image->pi_fullpath = pmcstat_string_intern(buffer);
963 done:
964 (void) elf_end(e);
965 if (fd >= 0)
966 (void) close(fd);
967 return;
971 * Given an image descriptor, determine whether it is an ELF, or AOUT.
972 * If no handler claims the image, set its type to 'INDETERMINABLE'.
975 static void
976 pmcstat_image_determine_type(struct pmcstat_image *image,
977 struct pmcstat_args *a)
979 assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN);
981 /* Try each kind of handler in turn */
982 if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
983 pmcstat_image_get_elf_params(image, a);
984 if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
985 pmcstat_image_get_aout_params(image, a);
988 * Otherwise, remember that we tried to determine
989 * the object's type and had failed.
991 if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
992 image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE;
996 * Locate an image descriptor given an interned path, adding a fresh
997 * descriptor to the cache if necessary. This function also finds a
998 * suitable name for this image's sample file.
1000 * We defer filling in the file format specific parts of the image
1001 * structure till the time we actually see a sample that would fall
1002 * into this image.
1005 static struct pmcstat_image *
1006 pmcstat_image_from_path(pmcstat_interned_string internedpath,
1007 int iskernelmodule)
1009 int count, hash, nlen;
1010 struct pmcstat_image *pi;
1011 char *sn;
1012 char name[NAME_MAX];
1014 hash = pmcstat_string_lookup_hash(internedpath);
1016 /* First, look for an existing entry. */
1017 LIST_FOREACH(pi, &pmcstat_image_hash[hash], pi_next)
1018 if (pi->pi_execpath == internedpath &&
1019 pi->pi_iskernelmodule == iskernelmodule)
1020 return (pi);
1023 * Allocate a new entry and place it at the head of the hash
1024 * and LRU lists.
1026 pi = malloc(sizeof(*pi));
1027 if (pi == NULL)
1028 return (NULL);
1030 pi->pi_type = PMCSTAT_IMAGE_UNKNOWN;
1031 pi->pi_execpath = internedpath;
1032 pi->pi_start = ~0;
1033 pi->pi_end = 0;
1034 pi->pi_entry = 0;
1035 pi->pi_vaddr = 0;
1036 pi->pi_isdynamic = 0;
1037 pi->pi_iskernelmodule = iskernelmodule;
1038 pi->pi_dynlinkerpath = NULL;
1039 pi->pi_symbols = NULL;
1040 pi->pi_symcount = 0;
1043 * Look for a suitable name for the sample files associated
1044 * with this image: if `basename(path)`+".gmon" is available,
1045 * we use that, otherwise we try iterating through
1046 * `basename(path)`+ "~" + NNN + ".gmon" till we get a free
1047 * entry.
1049 if ((sn = basename(pmcstat_string_unintern(internedpath))) == NULL)
1050 err(EX_OSERR, "ERROR: Cannot process \"%s\"",
1051 pmcstat_string_unintern(internedpath));
1053 nlen = strlen(sn);
1054 nlen = min(nlen, (int) (sizeof(name) - sizeof(".gmon")));
1056 snprintf(name, sizeof(name), "%.*s.gmon", nlen, sn);
1058 /* try use the unabridged name first */
1059 if (pmcstat_string_lookup(name) == NULL)
1060 pi->pi_samplename = pmcstat_string_intern(name);
1061 else {
1063 * Otherwise use a prefix from the original name and
1064 * upto 3 digits.
1066 nlen = strlen(sn);
1067 nlen = min(nlen, (int) (sizeof(name)-sizeof("~NNN.gmon")));
1068 count = 0;
1069 do {
1070 if (++count > 999)
1071 errx(EX_CANTCREAT, "ERROR: cannot create a "
1072 "gmon file for \"%s\"", name);
1073 snprintf(name, sizeof(name), "%.*s~%3.3d.gmon",
1074 nlen, sn, count);
1075 if (pmcstat_string_lookup(name) == NULL) {
1076 pi->pi_samplename =
1077 pmcstat_string_intern(name);
1078 count = 0;
1080 } while (count > 0);
1084 LIST_INIT(&pi->pi_gmlist);
1086 LIST_INSERT_HEAD(&pmcstat_image_hash[hash], pi, pi_next);
1088 return (pi);
1092 * Increment the bucket in the gmon.out file corresponding to 'pmcid'
1093 * and 'pc'.
1096 static void
1097 pmcstat_image_increment_bucket(struct pmcstat_pcmap *map, uintfptr_t pc,
1098 pmc_id_t pmcid, struct pmcstat_args *a)
1100 struct pmcstat_image *image;
1101 struct pmcstat_gmonfile *pgf;
1102 uintfptr_t bucket;
1103 HISTCOUNTER *hc;
1105 assert(pc >= map->ppm_lowpc && pc < map->ppm_highpc);
1107 image = map->ppm_image;
1110 * If this is the first time we are seeing a sample for
1111 * this executable image, try determine its parameters.
1113 if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
1114 pmcstat_image_determine_type(image, a);
1116 assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN);
1118 /* Ignore samples in images that we know nothing about. */
1119 if (image->pi_type == PMCSTAT_IMAGE_INDETERMINABLE) {
1120 pmcstat_stats.ps_samples_indeterminable++;
1121 return;
1125 * Find the gmon file corresponding to 'pmcid', creating it if
1126 * needed.
1128 pgf = pmcstat_image_find_gmonfile(image, pmcid);
1129 if (pgf == NULL) {
1130 if ((pgf = calloc(1, sizeof(*pgf))) == NULL)
1131 err(EX_OSERR, "ERROR:");
1133 pgf->pgf_gmondata = NULL; /* mark as unmapped */
1134 pgf->pgf_name = pmcstat_gmon_create_name(a->pa_samplesdir,
1135 image, pmcid);
1136 pgf->pgf_pmcid = pmcid;
1137 assert(image->pi_end > image->pi_start);
1138 pgf->pgf_nbuckets = (image->pi_end - image->pi_start) /
1139 FUNCTION_ALIGNMENT; /* see <machine/profile.h> */
1140 pgf->pgf_ndatabytes = sizeof(struct gmonhdr) +
1141 pgf->pgf_nbuckets * sizeof(HISTCOUNTER);
1142 pgf->pgf_nsamples = 0;
1143 pgf->pgf_file = NULL;
1145 pmcstat_gmon_create_file(pgf, image);
1147 LIST_INSERT_HEAD(&image->pi_gmlist, pgf, pgf_next);
1151 * Map the gmon file in if needed. It may have been mapped
1152 * out under memory pressure.
1154 if (pgf->pgf_gmondata == NULL)
1155 pmcstat_gmon_map_file(pgf);
1157 assert(pgf->pgf_gmondata != NULL);
1163 bucket = (pc - map->ppm_lowpc) / FUNCTION_ALIGNMENT;
1165 assert(bucket < pgf->pgf_nbuckets);
1167 hc = (HISTCOUNTER *) ((uintptr_t) pgf->pgf_gmondata +
1168 sizeof(struct gmonhdr));
1170 /* saturating add */
1171 if (hc[bucket] < 0xFFFFU) /* XXX tie this to sizeof(HISTCOUNTER) */
1172 hc[bucket]++;
1173 else /* mark that an overflow occurred */
1174 pgf->pgf_overflow = 1;
1176 pgf->pgf_nsamples++;
1180 * Record the fact that PC values from 'start' to 'end' come from
1181 * image 'image'.
1184 static void
1185 pmcstat_image_link(struct pmcstat_process *pp, struct pmcstat_image *image,
1186 uintfptr_t start)
1188 struct pmcstat_pcmap *pcm, *pcmnew;
1189 uintfptr_t offset;
1191 assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN &&
1192 image->pi_type != PMCSTAT_IMAGE_INDETERMINABLE);
1194 if ((pcmnew = malloc(sizeof(*pcmnew))) == NULL)
1195 err(EX_OSERR, "ERROR: Cannot create a map entry");
1198 * Adjust the map entry to only cover the text portion
1199 * of the object.
1202 offset = start - image->pi_vaddr;
1203 pcmnew->ppm_lowpc = image->pi_start + offset;
1204 pcmnew->ppm_highpc = image->pi_end + offset;
1205 pcmnew->ppm_image = image;
1207 assert(pcmnew->ppm_lowpc < pcmnew->ppm_highpc);
1209 /* Overlapped mmap()'s are assumed to never occur. */
1210 TAILQ_FOREACH(pcm, &pp->pp_map, ppm_next)
1211 if (pcm->ppm_lowpc >= pcmnew->ppm_highpc)
1212 break;
1214 if (pcm == NULL)
1215 TAILQ_INSERT_TAIL(&pp->pp_map, pcmnew, ppm_next);
1216 else
1217 TAILQ_INSERT_BEFORE(pcm, pcmnew, ppm_next);
1221 * Unmap images in the range [start..end) associated with process
1222 * 'pp'.
1225 static void
1226 pmcstat_image_unmap(struct pmcstat_process *pp, uintfptr_t start,
1227 uintfptr_t end)
1229 struct pmcstat_pcmap *pcm, *pcmtmp, *pcmnew;
1231 assert(pp != NULL);
1232 assert(start < end);
1235 * Cases:
1236 * - we could have the range completely in the middle of an
1237 * existing pcmap; in this case we have to split the pcmap
1238 * structure into two (i.e., generate a 'hole').
1239 * - we could have the range covering multiple pcmaps; these
1240 * will have to be removed.
1241 * - we could have either 'start' or 'end' falling in the
1242 * middle of a pcmap; in this case shorten the entry.
1244 TAILQ_FOREACH_SAFE(pcm, &pp->pp_map, ppm_next, pcmtmp) {
1245 assert(pcm->ppm_lowpc < pcm->ppm_highpc);
1246 if (pcm->ppm_highpc <= start)
1247 continue;
1248 if (pcm->ppm_lowpc >= end)
1249 return;
1250 if (pcm->ppm_lowpc >= start && pcm->ppm_highpc <= end) {
1252 * The current pcmap is completely inside the
1253 * unmapped range: remove it entirely.
1255 TAILQ_REMOVE(&pp->pp_map, pcm, ppm_next);
1256 free(pcm);
1257 } else if (pcm->ppm_lowpc < start && pcm->ppm_highpc > end) {
1259 * Split this pcmap into two; curtail the
1260 * current map to end at [start-1], and start
1261 * the new one at [end].
1263 if ((pcmnew = malloc(sizeof(*pcmnew))) == NULL)
1264 err(EX_OSERR, "ERROR: Cannot split a map "
1265 "entry");
1267 pcmnew->ppm_image = pcm->ppm_image;
1269 pcmnew->ppm_lowpc = end;
1270 pcmnew->ppm_highpc = pcm->ppm_highpc;
1272 pcm->ppm_highpc = start;
1274 TAILQ_INSERT_AFTER(&pp->pp_map, pcm, pcmnew, ppm_next);
1276 return;
1277 } else if (pcm->ppm_lowpc < start && pcm->ppm_highpc <= end)
1278 pcm->ppm_highpc = start;
1279 else if (pcm->ppm_lowpc >= start && pcm->ppm_highpc > end)
1280 pcm->ppm_lowpc = end;
1281 else
1282 assert(0);
1287 * Add a {pmcid,name} mapping.
1290 static void
1291 pmcstat_pmcid_add(pmc_id_t pmcid, pmcstat_interned_string ps,
1292 struct pmcstat_args *a)
1294 struct pmcstat_pmcrecord *pr;
1295 struct stat st;
1296 char fullpath[PATH_MAX];
1298 /* Replace an existing name for the PMC. */
1299 LIST_FOREACH(pr, &pmcstat_pmcs, pr_next)
1300 if (pr->pr_pmcid == pmcid) {
1301 pr->pr_pmcname = ps;
1302 return;
1306 * Otherwise, allocate a new descriptor and create the
1307 * appropriate directory to hold gmon.out files.
1309 if ((pr = malloc(sizeof(*pr))) == NULL)
1310 err(EX_OSERR, "ERROR: Cannot allocate pmc record");
1312 pr->pr_pmcid = pmcid;
1313 pr->pr_pmcname = ps;
1314 LIST_INSERT_HEAD(&pmcstat_pmcs, pr, pr_next);
1316 (void) snprintf(fullpath, sizeof(fullpath), "%s/%s", a->pa_samplesdir,
1317 pmcstat_string_unintern(ps));
1319 /* If the path name exists, it should be a directory */
1320 if (stat(fullpath, &st) == 0 && S_ISDIR(st.st_mode))
1321 return;
1323 if (mkdir(fullpath, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) < 0)
1324 err(EX_OSERR, "ERROR: Cannot create directory \"%s\"",
1325 fullpath);
1329 * Given a pmcid in use, find its human-readable name.
1332 static const char *
1333 pmcstat_pmcid_to_name(pmc_id_t pmcid)
1335 struct pmcstat_pmcrecord *pr;
1336 char fullpath[PATH_MAX];
1338 LIST_FOREACH(pr, &pmcstat_pmcs, pr_next)
1339 if (pr->pr_pmcid == pmcid)
1340 return (pmcstat_string_unintern(pr->pr_pmcname));
1342 /* create a default name and add this entry */
1343 if ((pr = malloc(sizeof(*pr))) == NULL)
1344 err(EX_OSERR, "ERROR: ");
1345 pr->pr_pmcid = pmcid;
1347 (void) snprintf(fullpath, sizeof(fullpath), "%X", (unsigned int) pmcid);
1348 pr->pr_pmcname = pmcstat_string_intern(fullpath);
1350 LIST_INSERT_HEAD(&pmcstat_pmcs, pr, pr_next);
1352 return (pmcstat_string_unintern(pr->pr_pmcname));
1356 * Associate an AOUT image with a process.
1359 static void
1360 pmcstat_process_aout_exec(struct pmcstat_process *pp,
1361 struct pmcstat_image *image, uintfptr_t entryaddr,
1362 struct pmcstat_args *a)
1364 (void) pp;
1365 (void) image;
1366 (void) entryaddr;
1367 (void) a;
1368 /* TODO Implement a.out handling */
1372 * Associate an ELF image with a process.
1375 static void
1376 pmcstat_process_elf_exec(struct pmcstat_process *pp,
1377 struct pmcstat_image *image, uintfptr_t entryaddr,
1378 struct pmcstat_args *a)
1380 uintmax_t libstart;
1381 struct pmcstat_image *rtldimage;
1383 assert(image->pi_type == PMCSTAT_IMAGE_ELF32 ||
1384 image->pi_type == PMCSTAT_IMAGE_ELF64);
1386 /* Create a map entry for the base executable. */
1387 pmcstat_image_link(pp, image, image->pi_vaddr);
1390 * For dynamically linked executables we need to determine
1391 * where the dynamic linker was mapped to for this process,
1392 * Subsequent executable objects that are mapped in by the
1393 * dynamic linker will be tracked by log events of type
1394 * PMCLOG_TYPE_MAP_IN.
1397 if (image->pi_isdynamic) {
1400 * The runtime loader gets loaded just after the maximum
1401 * possible heap address. Like so:
1403 * [ TEXT DATA BSS HEAP -->*RTLD SHLIBS <--STACK]
1404 * ^ ^
1405 * 0 VM_MAXUSER_ADDRESS
1408 * The exact address where the loader gets mapped in
1409 * will vary according to the size of the executable
1410 * and the limits on the size of the process'es data
1411 * segment at the time of exec(). The entry address
1412 * recorded at process exec time corresponds to the
1413 * 'start' address inside the dynamic linker. From
1414 * this we can figure out the address where the
1415 * runtime loader's file object had been mapped to.
1417 rtldimage = pmcstat_image_from_path(image->pi_dynlinkerpath,
1419 if (rtldimage == NULL) {
1420 warnx("WARNING: Cannot find image for \"%s\".",
1421 pmcstat_string_unintern(image->pi_dynlinkerpath));
1422 pmcstat_stats.ps_exec_errors++;
1423 return;
1426 if (rtldimage->pi_type == PMCSTAT_IMAGE_UNKNOWN)
1427 pmcstat_image_get_elf_params(rtldimage, a);
1429 if (rtldimage->pi_type != PMCSTAT_IMAGE_ELF32 &&
1430 rtldimage->pi_type != PMCSTAT_IMAGE_ELF64) {
1431 warnx("WARNING: rtld not an ELF object \"%s\".",
1432 pmcstat_string_unintern(image->pi_dynlinkerpath));
1433 return;
1436 libstart = entryaddr - rtldimage->pi_entry;
1437 pmcstat_image_link(pp, rtldimage, libstart);
1442 * Find the process descriptor corresponding to a PID. If 'allocate'
1443 * is zero, we return a NULL if a pid descriptor could not be found or
1444 * a process descriptor process. If 'allocate' is non-zero, then we
1445 * will attempt to allocate a fresh process descriptor. Zombie
1446 * process descriptors are only removed if a fresh allocation for the
1447 * same PID is requested.
1450 static struct pmcstat_process *
1451 pmcstat_process_lookup(pid_t pid, int allocate)
1453 uint32_t hash;
1454 struct pmcstat_pcmap *ppm, *ppmtmp;
1455 struct pmcstat_process *pp, *pptmp;
1457 hash = (uint32_t) pid & PMCSTAT_HASH_MASK; /* simplicity wins */
1459 LIST_FOREACH_SAFE(pp, &pmcstat_process_hash[hash], pp_next, pptmp)
1460 if (pp->pp_pid == pid) {
1461 /* Found a descriptor, check and process zombies */
1462 if (allocate && pp->pp_isactive == 0) {
1463 /* remove maps */
1464 TAILQ_FOREACH_SAFE(ppm, &pp->pp_map, ppm_next,
1465 ppmtmp) {
1466 TAILQ_REMOVE(&pp->pp_map, ppm, ppm_next);
1467 free(ppm);
1469 /* remove process entry */
1470 LIST_REMOVE(pp, pp_next);
1471 free(pp);
1472 break;
1474 return (pp);
1477 if (!allocate)
1478 return (NULL);
1480 if ((pp = malloc(sizeof(*pp))) == NULL)
1481 err(EX_OSERR, "ERROR: Cannot allocate pid descriptor");
1483 pp->pp_pid = pid;
1484 pp->pp_isactive = 1;
1486 TAILQ_INIT(&pp->pp_map);
1488 LIST_INSERT_HEAD(&pmcstat_process_hash[hash], pp, pp_next);
1489 return (pp);
1493 * Associate an image and a process.
1496 static void
1497 pmcstat_process_exec(struct pmcstat_process *pp,
1498 pmcstat_interned_string path, uintfptr_t entryaddr,
1499 struct pmcstat_args *a)
1501 struct pmcstat_image *image;
1503 if ((image = pmcstat_image_from_path(path, 0)) == NULL) {
1504 pmcstat_stats.ps_exec_errors++;
1505 return;
1508 if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
1509 pmcstat_image_determine_type(image, a);
1511 assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN);
1513 switch (image->pi_type) {
1514 case PMCSTAT_IMAGE_ELF32:
1515 case PMCSTAT_IMAGE_ELF64:
1516 pmcstat_stats.ps_exec_elf++;
1517 pmcstat_process_elf_exec(pp, image, entryaddr, a);
1518 break;
1520 case PMCSTAT_IMAGE_AOUT:
1521 pmcstat_stats.ps_exec_aout++;
1522 pmcstat_process_aout_exec(pp, image, entryaddr, a);
1523 break;
1525 case PMCSTAT_IMAGE_INDETERMINABLE:
1526 pmcstat_stats.ps_exec_indeterminable++;
1527 break;
1529 default:
1530 err(EX_SOFTWARE, "ERROR: Unsupported executable type for "
1531 "\"%s\"", pmcstat_string_unintern(path));
1537 * Find the map entry associated with process 'p' at PC value 'pc'.
1540 static struct pmcstat_pcmap *
1541 pmcstat_process_find_map(struct pmcstat_process *p, uintfptr_t pc)
1543 struct pmcstat_pcmap *ppm;
1545 TAILQ_FOREACH(ppm, &p->pp_map, ppm_next) {
1546 if (pc >= ppm->ppm_lowpc && pc < ppm->ppm_highpc)
1547 return (ppm);
1548 if (pc < ppm->ppm_lowpc)
1549 return (NULL);
1552 return (NULL);
1555 static struct pmcstat_cgnode *
1556 pmcstat_cgnode_allocate(struct pmcstat_image *image, uintfptr_t pc)
1558 struct pmcstat_cgnode *cg;
1560 if ((cg = malloc(sizeof(*cg))) == NULL)
1561 err(EX_OSERR, "ERROR: Cannot allocate callgraph node");
1563 cg->pcg_image = image;
1564 cg->pcg_func = pc;
1566 cg->pcg_count = 0;
1567 cg->pcg_nchildren = 0;
1568 LIST_INIT(&cg->pcg_children);
1570 return (cg);
1574 * Free a node and its children.
1576 static void
1577 pmcstat_cgnode_free(struct pmcstat_cgnode *cg)
1579 struct pmcstat_cgnode *cgc, *cgtmp;
1581 LIST_FOREACH_SAFE(cgc, &cg->pcg_children, pcg_sibling, cgtmp)
1582 pmcstat_cgnode_free(cgc);
1583 free(cg);
1587 * Look for a callgraph node associated with pmc `pmcid' in the global
1588 * hash table that corresponds to the given `pc' value in the process
1589 * `pp'.
1591 static struct pmcstat_cgnode *
1592 pmcstat_cgnode_hash_lookup_pc(struct pmcstat_process *pp, uint32_t pmcid,
1593 uintfptr_t pc, int usermode)
1595 struct pmcstat_pcmap *ppm;
1596 struct pmcstat_symbol *sym;
1597 struct pmcstat_image *image;
1598 struct pmcstat_cgnode *cg;
1599 struct pmcstat_cgnode_hash *h;
1600 uintfptr_t loadaddress;
1601 unsigned int i, hash;
1603 ppm = pmcstat_process_find_map(usermode ? pp : pmcstat_kernproc, pc);
1604 if (ppm == NULL)
1605 return (NULL);
1607 image = ppm->ppm_image;
1609 loadaddress = ppm->ppm_lowpc + image->pi_vaddr - image->pi_start;
1610 pc -= loadaddress; /* Convert to an offset in the image. */
1613 * Try determine the function at this offset. If we can't
1614 * find a function round leave the `pc' value alone.
1616 if ((sym = pmcstat_symbol_search(image, pc)) != NULL)
1617 pc = sym->ps_start;
1619 for (hash = i = 0; i < sizeof(uintfptr_t); i++)
1620 hash += (pc >> i) & 0xFF;
1622 hash &= PMCSTAT_HASH_MASK;
1624 cg = NULL;
1625 LIST_FOREACH(h, &pmcstat_cgnode_hash[hash], pch_next)
1627 if (h->pch_pmcid != pmcid)
1628 continue;
1630 cg = h->pch_cgnode;
1632 assert(cg != NULL);
1634 if (cg->pcg_image == image && cg->pcg_func == pc)
1635 return (cg);
1639 * We haven't seen this (pmcid, pc) tuple yet, so allocate a
1640 * new callgraph node and a new hash table entry for it.
1642 cg = pmcstat_cgnode_allocate(image, pc);
1643 if ((h = malloc(sizeof(*h))) == NULL)
1644 err(EX_OSERR, "ERROR: Could not allocate callgraph node");
1646 h->pch_pmcid = pmcid;
1647 h->pch_cgnode = cg;
1648 LIST_INSERT_HEAD(&pmcstat_cgnode_hash[hash], h, pch_next);
1650 pmcstat_cgnode_hash_count++;
1652 return (cg);
1656 * Compare two callgraph nodes for sorting.
1658 static int
1659 pmcstat_cgnode_compare(const void *a, const void *b)
1661 const struct pmcstat_cgnode *const *pcg1, *const *pcg2, *cg1, *cg2;
1663 pcg1 = (const struct pmcstat_cgnode *const *) a;
1664 cg1 = *pcg1;
1665 pcg2 = (const struct pmcstat_cgnode *const *) b;
1666 cg2 = *pcg2;
1668 /* Sort in reverse order */
1669 if (cg1->pcg_count < cg2->pcg_count)
1670 return (1);
1671 if (cg1->pcg_count > cg2->pcg_count)
1672 return (-1);
1673 return (0);
1677 * Find (allocating if a needed) a callgraph node in the given
1678 * parent with the same (image, pcoffset) pair.
1681 static struct pmcstat_cgnode *
1682 pmcstat_cgnode_find(struct pmcstat_cgnode *parent, struct pmcstat_image *image,
1683 uintfptr_t pcoffset)
1685 struct pmcstat_cgnode *child;
1687 LIST_FOREACH(child, &parent->pcg_children, pcg_sibling) {
1688 if (child->pcg_image == image &&
1689 child->pcg_func == pcoffset)
1690 return (child);
1694 * Allocate a new structure.
1697 child = pmcstat_cgnode_allocate(image, pcoffset);
1700 * Link it into the parent.
1702 LIST_INSERT_HEAD(&parent->pcg_children, child, pcg_sibling);
1703 parent->pcg_nchildren++;
1705 return (child);
1709 * Print one callgraph node. The output format is:
1711 * indentation %(parent's samples) #nsamples function@object
1713 static void
1714 pmcstat_cgnode_print(struct pmcstat_args *a, struct pmcstat_cgnode *cg,
1715 int depth, uint32_t total)
1717 uint32_t n;
1718 const char *space;
1719 struct pmcstat_symbol *sym;
1720 struct pmcstat_cgnode **sortbuffer, **cgn, *pcg;
1722 space = " ";
1724 if (depth > 0)
1725 (void) fprintf(a->pa_graphfile, "%*s", depth, space);
1727 if (cg->pcg_count == total)
1728 (void) fprintf(a->pa_graphfile, "100.0%% ");
1729 else
1730 (void) fprintf(a->pa_graphfile, "%05.2f%% ",
1731 100.0 * cg->pcg_count / total);
1733 n = fprintf(a->pa_graphfile, " [%u] ", cg->pcg_count);
1735 /* #samples is a 12 character wide field. */
1736 if (n < 12)
1737 (void) fprintf(a->pa_graphfile, "%*s", 12 - n, space);
1739 if (depth > 0)
1740 (void) fprintf(a->pa_graphfile, "%*s", depth, space);
1742 sym = pmcstat_symbol_search(cg->pcg_image, cg->pcg_func);
1743 if (sym)
1744 (void) fprintf(a->pa_graphfile, "%s",
1745 pmcstat_string_unintern(sym->ps_name));
1746 else
1747 (void) fprintf(a->pa_graphfile, "%p",
1748 (void *) (cg->pcg_image->pi_vaddr + cg->pcg_func));
1750 if (pmcstat_previous_filename_printed !=
1751 cg->pcg_image->pi_fullpath) {
1752 pmcstat_previous_filename_printed = cg->pcg_image->pi_fullpath;
1753 (void) fprintf(a->pa_graphfile, " @ %s\n",
1754 pmcstat_string_unintern(
1755 pmcstat_previous_filename_printed));
1756 } else
1757 (void) fprintf(a->pa_graphfile, "\n");
1759 if (cg->pcg_nchildren == 0)
1760 return;
1762 if ((sortbuffer = (struct pmcstat_cgnode **)
1763 malloc(sizeof(struct pmcstat_cgnode *) *
1764 cg->pcg_nchildren)) == NULL)
1765 err(EX_OSERR, "ERROR: Cannot print callgraph");
1766 cgn = sortbuffer;
1768 LIST_FOREACH(pcg, &cg->pcg_children, pcg_sibling)
1769 *cgn++ = pcg;
1771 assert(cgn - sortbuffer == (int) cg->pcg_nchildren);
1773 qsort(sortbuffer, cg->pcg_nchildren, sizeof(struct pmcstat_cgnode *),
1774 pmcstat_cgnode_compare);
1776 for (cgn = sortbuffer, n = 0; n < cg->pcg_nchildren; n++, cgn++)
1777 pmcstat_cgnode_print(a, *cgn, depth+1, cg->pcg_count);
1779 free(sortbuffer);
1783 * Record a callchain.
1786 static void
1787 pmcstat_record_callchain(struct pmcstat_process *pp, uint32_t pmcid,
1788 uint32_t nsamples, uintfptr_t *cc, int usermode, struct pmcstat_args *a)
1790 uintfptr_t pc, loadaddress;
1791 uint32_t n;
1792 struct pmcstat_image *image;
1793 struct pmcstat_pcmap *ppm;
1794 struct pmcstat_symbol *sym;
1795 struct pmcstat_cgnode *parent, *child;
1798 * Find the callgraph node recorded in the global hash table
1799 * for this (pmcid, pc).
1802 pc = cc[0];
1803 parent = pmcstat_cgnode_hash_lookup_pc(pp, pmcid, pc, usermode);
1804 if (parent == NULL) {
1805 pmcstat_stats.ps_callchain_dubious_frames++;
1806 return;
1809 parent->pcg_count++;
1812 * For each return address in the call chain record, subject
1813 * to the maximum depth desired.
1814 * - Find the image associated with the sample. Stop if there
1815 * there is no valid image at that address.
1816 * - Find the function that overlaps the return address.
1817 * - If found: use the start address of the function.
1818 * If not found (say an object's symbol table is not present or
1819 * is incomplete), round down to th gprof bucket granularity.
1820 * - Convert return virtual address to an offset in the image.
1821 * - Look for a child with the same {offset,image} tuple,
1822 * inserting one if needed.
1823 * - Increment the count of occurrences of the child.
1826 for (n = 1; n < (uint32_t) a->pa_graphdepth && n < nsamples; n++,
1827 parent = child) {
1828 pc = cc[n];
1830 ppm = pmcstat_process_find_map(usermode ? pp :
1831 pmcstat_kernproc, pc);
1832 if (ppm == NULL)
1833 return;
1835 image = ppm->ppm_image;
1836 loadaddress = ppm->ppm_lowpc + image->pi_vaddr -
1837 image->pi_start;
1838 pc -= loadaddress;
1840 if ((sym = pmcstat_symbol_search(image, pc)) != NULL)
1841 pc = sym->ps_start;
1843 child = pmcstat_cgnode_find(parent, image, pc);
1844 child->pcg_count++;
1849 * Printing a callgraph for a PMC.
1851 static void
1852 pmcstat_callgraph_print_for_pmcid(struct pmcstat_args *a,
1853 struct pmcstat_pmcrecord *pmcr)
1855 int n, nentries;
1856 uint32_t nsamples, pmcid;
1857 struct pmcstat_cgnode **sortbuffer, **cgn;
1858 struct pmcstat_cgnode_hash *pch;
1861 * We pull out all callgraph nodes in the top-level hash table
1862 * with a matching PMC id. We then sort these based on the
1863 * frequency of occurrence. Each callgraph node is then
1864 * printed.
1867 nsamples = 0;
1868 pmcid = pmcr->pr_pmcid;
1869 if ((sortbuffer = (struct pmcstat_cgnode **)
1870 malloc(sizeof(struct pmcstat_cgnode *) *
1871 pmcstat_cgnode_hash_count)) == NULL)
1872 err(EX_OSERR, "ERROR: Cannot sort callgraph");
1873 cgn = sortbuffer;
1875 memset(sortbuffer, 0xFF, pmcstat_cgnode_hash_count *
1876 sizeof(struct pmcstat_cgnode **));
1878 for (n = 0; n < PMCSTAT_NHASH; n++)
1879 LIST_FOREACH(pch, &pmcstat_cgnode_hash[n], pch_next)
1880 if (pch->pch_pmcid == pmcid) {
1881 nsamples += pch->pch_cgnode->pcg_count;
1882 *cgn++ = pch->pch_cgnode;
1885 nentries = cgn - sortbuffer;
1886 assert(nentries <= pmcstat_cgnode_hash_count);
1888 if (nentries == 0)
1889 return;
1891 qsort(sortbuffer, nentries, sizeof(struct pmcstat_cgnode *),
1892 pmcstat_cgnode_compare);
1894 (void) fprintf(a->pa_graphfile,
1895 "@ %s [%u samples]\n\n",
1896 pmcstat_string_unintern(pmcr->pr_pmcname),
1897 nsamples);
1899 for (cgn = sortbuffer, n = 0; n < nentries; n++, cgn++) {
1900 pmcstat_previous_filename_printed = NULL;
1901 pmcstat_cgnode_print(a, *cgn, 0, nsamples);
1902 (void) fprintf(a->pa_graphfile, "\n");
1905 free(sortbuffer);
1909 * Print out callgraphs.
1912 static void
1913 pmcstat_callgraph_print(struct pmcstat_args *a)
1915 struct pmcstat_pmcrecord *pmcr;
1917 LIST_FOREACH(pmcr, &pmcstat_pmcs, pr_next)
1918 pmcstat_callgraph_print_for_pmcid(a, pmcr);
1921 static void
1922 pmcstat_cgnode_do_gmon_arcs(struct pmcstat_cgnode *cg, pmc_id_t pmcid)
1924 struct pmcstat_cgnode *cgc;
1927 * Look for child nodes that belong to the same image.
1930 LIST_FOREACH(cgc, &cg->pcg_children, pcg_sibling) {
1931 if (cgc->pcg_image == cg->pcg_image)
1932 pmcstat_gmon_append_arc(cg->pcg_image, pmcid,
1933 cgc->pcg_func, cg->pcg_func, cgc->pcg_count);
1934 if (cgc->pcg_nchildren > 0)
1935 pmcstat_cgnode_do_gmon_arcs(cgc, pmcid);
1939 static void
1940 pmcstat_callgraph_do_gmon_arcs_for_pmcid(pmc_id_t pmcid)
1942 int n;
1943 struct pmcstat_cgnode_hash *pch;
1945 for (n = 0; n < PMCSTAT_NHASH; n++)
1946 LIST_FOREACH(pch, &pmcstat_cgnode_hash[n], pch_next)
1947 if (pch->pch_pmcid == pmcid &&
1948 pch->pch_cgnode->pcg_nchildren > 1)
1949 pmcstat_cgnode_do_gmon_arcs(pch->pch_cgnode,
1950 pmcid);
1954 static void
1955 pmcstat_callgraph_do_gmon_arcs(void)
1957 struct pmcstat_pmcrecord *pmcr;
1959 LIST_FOREACH(pmcr, &pmcstat_pmcs, pr_next)
1960 pmcstat_callgraph_do_gmon_arcs_for_pmcid(pmcr->pr_pmcid);
1964 * Convert a hwpmc(4) log to profile information. A system-wide
1965 * callgraph is generated if FLAG_DO_CALLGRAPHS is set. gmon.out
1966 * files usable by gprof(1) are created if FLAG_DO_GPROF is set.
1968 static int
1969 pmcstat_analyze_log(struct pmcstat_args *a)
1971 uint32_t cpu, cpuflags;
1972 uintfptr_t pc;
1973 pid_t pid;
1974 struct pmcstat_image *image;
1975 struct pmcstat_process *pp, *ppnew;
1976 struct pmcstat_pcmap *ppm, *ppmtmp;
1977 struct pmclog_ev ev;
1978 pmcstat_interned_string image_path;
1980 assert(a->pa_flags & FLAG_DO_ANALYSIS);
1982 if (elf_version(EV_CURRENT) == EV_NONE)
1983 err(EX_UNAVAILABLE, "Elf library intialization failed");
1985 while (pmclog_read(a->pa_logparser, &ev) == 0) {
1986 assert(ev.pl_state == PMCLOG_OK);
1988 switch (ev.pl_type) {
1989 case PMCLOG_TYPE_INITIALIZE:
1990 if ((ev.pl_u.pl_i.pl_version & 0xFF000000) !=
1991 PMC_VERSION_MAJOR << 24 && a->pa_verbosity > 0)
1992 warnx("WARNING: Log version 0x%x does not "
1993 "match compiled version 0x%x.",
1994 ev.pl_u.pl_i.pl_version,
1995 PMC_VERSION_MAJOR);
1996 break;
1998 case PMCLOG_TYPE_MAP_IN:
2000 * Introduce an address range mapping for a
2001 * userland process or the kernel (pid == -1).
2003 * We always allocate a process descriptor so
2004 * that subsequent samples seen for this
2005 * address range are mapped to the current
2006 * object being mapped in.
2008 pid = ev.pl_u.pl_mi.pl_pid;
2009 if (pid == -1)
2010 pp = pmcstat_kernproc;
2011 else
2012 pp = pmcstat_process_lookup(pid,
2013 PMCSTAT_ALLOCATE);
2015 assert(pp != NULL);
2017 image_path = pmcstat_string_intern(ev.pl_u.pl_mi.
2018 pl_pathname);
2019 image = pmcstat_image_from_path(image_path, pid == -1);
2020 if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
2021 pmcstat_image_determine_type(image, a);
2022 if (image->pi_type != PMCSTAT_IMAGE_INDETERMINABLE)
2023 pmcstat_image_link(pp, image,
2024 ev.pl_u.pl_mi.pl_start);
2025 break;
2027 case PMCLOG_TYPE_MAP_OUT:
2029 * Remove an address map.
2031 pid = ev.pl_u.pl_mo.pl_pid;
2032 if (pid == -1)
2033 pp = pmcstat_kernproc;
2034 else
2035 pp = pmcstat_process_lookup(pid, 0);
2037 if (pp == NULL) /* unknown process */
2038 break;
2040 pmcstat_image_unmap(pp, ev.pl_u.pl_mo.pl_start,
2041 ev.pl_u.pl_mo.pl_end);
2042 break;
2044 case PMCLOG_TYPE_PCSAMPLE:
2046 * Note: the `PCSAMPLE' log entry is not
2047 * generated by hpwmc(4) after version 2.
2051 * We bring in the gmon file for the image
2052 * currently associated with the PMC & pid
2053 * pair and increment the appropriate entry
2054 * bin inside this.
2056 pmcstat_stats.ps_samples_total++;
2058 pc = ev.pl_u.pl_s.pl_pc;
2059 pp = pmcstat_process_lookup(ev.pl_u.pl_s.pl_pid,
2060 PMCSTAT_ALLOCATE);
2061 if ((ppm = pmcstat_process_find_map(pp, pc)) == NULL &&
2062 (ppm = pmcstat_process_find_map(pmcstat_kernproc,
2063 pc)) == NULL) { /* unknown process,offset pair */
2064 pmcstat_stats.ps_samples_unknown_offset++;
2065 break;
2068 pmcstat_image_increment_bucket(ppm, pc,
2069 ev.pl_u.pl_s.pl_pmcid, a);
2071 break;
2073 case PMCLOG_TYPE_CALLCHAIN:
2074 pmcstat_stats.ps_samples_total++;
2076 cpuflags = ev.pl_u.pl_cc.pl_cpuflags;
2077 cpu = PMC_CALLCHAIN_CPUFLAGS_TO_CPU(cpuflags);
2079 /* Filter on the CPU id. */
2080 if ((a->pa_cpumask & (1 << cpu)) == 0) {
2081 pmcstat_stats.ps_samples_skipped++;
2082 break;
2085 pp = pmcstat_process_lookup(ev.pl_u.pl_cc.pl_pid,
2086 PMCSTAT_ALLOCATE);
2088 pmcstat_record_callchain(pp,
2089 ev.pl_u.pl_cc.pl_pmcid, ev.pl_u.pl_cc.pl_npc,
2090 ev.pl_u.pl_cc.pl_pc,
2091 PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(cpuflags), a);
2093 if ((a->pa_flags & FLAG_DO_GPROF) == 0)
2094 break;
2096 pc = ev.pl_u.pl_cc.pl_pc[0];
2097 if ((ppm = pmcstat_process_find_map(pp, pc)) == NULL &&
2098 (ppm = pmcstat_process_find_map(pmcstat_kernproc,
2099 pc)) == NULL) { /* unknown offset */
2100 pmcstat_stats.ps_samples_unknown_offset++;
2101 break;
2104 pmcstat_image_increment_bucket(ppm, pc,
2105 ev.pl_u.pl_cc.pl_pmcid, a);
2107 break;
2109 case PMCLOG_TYPE_PMCALLOCATE:
2111 * Record the association pmc id between this
2112 * PMC and its name.
2114 pmcstat_pmcid_add(ev.pl_u.pl_a.pl_pmcid,
2115 pmcstat_string_intern(ev.pl_u.pl_a.pl_evname), a);
2116 break;
2118 case PMCLOG_TYPE_PROCEXEC:
2121 * Change the executable image associated with
2122 * a process.
2124 pp = pmcstat_process_lookup(ev.pl_u.pl_x.pl_pid,
2125 PMCSTAT_ALLOCATE);
2127 /* delete the current process map */
2128 TAILQ_FOREACH_SAFE(ppm, &pp->pp_map, ppm_next, ppmtmp) {
2129 TAILQ_REMOVE(&pp->pp_map, ppm, ppm_next);
2130 free(ppm);
2133 /* associate this process image */
2134 image_path = pmcstat_string_intern(
2135 ev.pl_u.pl_x.pl_pathname);
2136 assert(image_path != NULL);
2137 pmcstat_process_exec(pp, image_path,
2138 ev.pl_u.pl_x.pl_entryaddr, a);
2139 break;
2141 case PMCLOG_TYPE_PROCEXIT:
2144 * Due to the way the log is generated, the
2145 * last few samples corresponding to a process
2146 * may appear in the log after the process
2147 * exit event is recorded. Thus we keep the
2148 * process' descriptor and associated data
2149 * structures around, but mark the process as
2150 * having exited.
2152 pp = pmcstat_process_lookup(ev.pl_u.pl_e.pl_pid, 0);
2153 if (pp == NULL)
2154 break;
2155 pp->pp_isactive = 0; /* mark as a zombie */
2156 break;
2158 case PMCLOG_TYPE_SYSEXIT:
2159 pp = pmcstat_process_lookup(ev.pl_u.pl_se.pl_pid, 0);
2160 if (pp == NULL)
2161 break;
2162 pp->pp_isactive = 0; /* make a zombie */
2163 break;
2165 case PMCLOG_TYPE_PROCFORK:
2168 * Allocate a process descriptor for the new
2169 * (child) process.
2171 ppnew =
2172 pmcstat_process_lookup(ev.pl_u.pl_f.pl_newpid,
2173 PMCSTAT_ALLOCATE);
2176 * If we had been tracking the parent, clone
2177 * its address maps.
2179 pp = pmcstat_process_lookup(ev.pl_u.pl_f.pl_oldpid, 0);
2180 if (pp == NULL)
2181 break;
2182 TAILQ_FOREACH(ppm, &pp->pp_map, ppm_next)
2183 pmcstat_image_link(ppnew, ppm->ppm_image,
2184 ppm->ppm_lowpc);
2185 break;
2187 default: /* other types of entries are not relevant */
2188 break;
2192 if (ev.pl_state == PMCLOG_EOF)
2193 return (PMCSTAT_FINISHED);
2194 else if (ev.pl_state == PMCLOG_REQUIRE_DATA)
2195 return (PMCSTAT_RUNNING);
2197 err(EX_DATAERR, "ERROR: event parsing failed (record %jd, "
2198 "offset 0x%jx)", (uintmax_t) ev.pl_count + 1, ev.pl_offset);
2202 * Print log entries as text.
2205 static int
2206 pmcstat_print_log(struct pmcstat_args *a)
2208 struct pmclog_ev ev;
2209 uint32_t npc;
2211 while (pmclog_read(a->pa_logparser, &ev) == 0) {
2212 assert(ev.pl_state == PMCLOG_OK);
2213 switch (ev.pl_type) {
2214 case PMCLOG_TYPE_CALLCHAIN:
2215 PMCSTAT_PRINT_ENTRY(a, "callchain",
2216 "%d 0x%x %d %d %c", ev.pl_u.pl_cc.pl_pid,
2217 ev.pl_u.pl_cc.pl_pmcid,
2218 PMC_CALLCHAIN_CPUFLAGS_TO_CPU(ev.pl_u.pl_cc. \
2219 pl_cpuflags), ev.pl_u.pl_cc.pl_npc,
2220 PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(ev.pl_u.pl_cc.\
2221 pl_cpuflags) ? 'u' : 's');
2222 for (npc = 0; npc < ev.pl_u.pl_cc.pl_npc; npc++)
2223 PMCSTAT_PRINT_ENTRY(a, "...", "%p",
2224 (void *) ev.pl_u.pl_cc.pl_pc[npc]);
2225 break;
2226 case PMCLOG_TYPE_CLOSELOG:
2227 PMCSTAT_PRINT_ENTRY(a,"closelog",);
2228 break;
2229 case PMCLOG_TYPE_DROPNOTIFY:
2230 PMCSTAT_PRINT_ENTRY(a,"drop",);
2231 break;
2232 case PMCLOG_TYPE_INITIALIZE:
2233 PMCSTAT_PRINT_ENTRY(a,"initlog","0x%x \"%s\"",
2234 ev.pl_u.pl_i.pl_version,
2235 pmc_name_of_cputype(ev.pl_u.pl_i.pl_arch));
2236 if ((ev.pl_u.pl_i.pl_version & 0xFF000000) !=
2237 PMC_VERSION_MAJOR << 24 && a->pa_verbosity > 0)
2238 warnx("WARNING: Log version 0x%x != expected "
2239 "version 0x%x.", ev.pl_u.pl_i.pl_version,
2240 PMC_VERSION);
2241 break;
2242 case PMCLOG_TYPE_MAP_IN:
2243 PMCSTAT_PRINT_ENTRY(a,"map-in","%d %p \"%s\"",
2244 ev.pl_u.pl_mi.pl_pid,
2245 (void *) ev.pl_u.pl_mi.pl_start,
2246 ev.pl_u.pl_mi.pl_pathname);
2247 break;
2248 case PMCLOG_TYPE_MAP_OUT:
2249 PMCSTAT_PRINT_ENTRY(a,"map-out","%d %p %p",
2250 ev.pl_u.pl_mo.pl_pid,
2251 (void *) ev.pl_u.pl_mo.pl_start,
2252 (void *) ev.pl_u.pl_mo.pl_end);
2253 break;
2254 case PMCLOG_TYPE_PCSAMPLE:
2255 PMCSTAT_PRINT_ENTRY(a,"sample","0x%x %d %p %c",
2256 ev.pl_u.pl_s.pl_pmcid,
2257 ev.pl_u.pl_s.pl_pid,
2258 (void *) ev.pl_u.pl_s.pl_pc,
2259 ev.pl_u.pl_s.pl_usermode ? 'u' : 's');
2260 break;
2261 case PMCLOG_TYPE_PMCALLOCATE:
2262 PMCSTAT_PRINT_ENTRY(a,"allocate","0x%x \"%s\" 0x%x",
2263 ev.pl_u.pl_a.pl_pmcid,
2264 ev.pl_u.pl_a.pl_evname,
2265 ev.pl_u.pl_a.pl_flags);
2266 break;
2267 case PMCLOG_TYPE_PMCATTACH:
2268 PMCSTAT_PRINT_ENTRY(a,"attach","0x%x %d \"%s\"",
2269 ev.pl_u.pl_t.pl_pmcid,
2270 ev.pl_u.pl_t.pl_pid,
2271 ev.pl_u.pl_t.pl_pathname);
2272 break;
2273 case PMCLOG_TYPE_PMCDETACH:
2274 PMCSTAT_PRINT_ENTRY(a,"detach","0x%x %d",
2275 ev.pl_u.pl_d.pl_pmcid,
2276 ev.pl_u.pl_d.pl_pid);
2277 break;
2278 case PMCLOG_TYPE_PROCCSW:
2279 PMCSTAT_PRINT_ENTRY(a,"cswval","0x%x %d %jd",
2280 ev.pl_u.pl_c.pl_pmcid,
2281 ev.pl_u.pl_c.pl_pid,
2282 ev.pl_u.pl_c.pl_value);
2283 break;
2284 case PMCLOG_TYPE_PROCEXEC:
2285 PMCSTAT_PRINT_ENTRY(a,"exec","0x%x %d %p \"%s\"",
2286 ev.pl_u.pl_x.pl_pmcid,
2287 ev.pl_u.pl_x.pl_pid,
2288 (void *) ev.pl_u.pl_x.pl_entryaddr,
2289 ev.pl_u.pl_x.pl_pathname);
2290 break;
2291 case PMCLOG_TYPE_PROCEXIT:
2292 PMCSTAT_PRINT_ENTRY(a,"exitval","0x%x %d %jd",
2293 ev.pl_u.pl_e.pl_pmcid,
2294 ev.pl_u.pl_e.pl_pid,
2295 ev.pl_u.pl_e.pl_value);
2296 break;
2297 case PMCLOG_TYPE_PROCFORK:
2298 PMCSTAT_PRINT_ENTRY(a,"fork","%d %d",
2299 ev.pl_u.pl_f.pl_oldpid,
2300 ev.pl_u.pl_f.pl_newpid);
2301 break;
2302 case PMCLOG_TYPE_USERDATA:
2303 PMCSTAT_PRINT_ENTRY(a,"userdata","0x%x",
2304 ev.pl_u.pl_u.pl_userdata);
2305 break;
2306 case PMCLOG_TYPE_SYSEXIT:
2307 PMCSTAT_PRINT_ENTRY(a,"exit","%d",
2308 ev.pl_u.pl_se.pl_pid);
2309 break;
2310 default:
2311 fprintf(a->pa_printfile, "unknown event (type %d).\n",
2312 ev.pl_type);
2316 if (ev.pl_state == PMCLOG_EOF)
2317 return (PMCSTAT_FINISHED);
2318 else if (ev.pl_state == PMCLOG_REQUIRE_DATA)
2319 return (PMCSTAT_RUNNING);
2321 errx(EX_DATAERR, "ERROR: event parsing failed "
2322 "(record %jd, offset 0x%jx).",
2323 (uintmax_t) ev.pl_count + 1, ev.pl_offset);
2324 /*NOTREACHED*/
2328 * Public Interfaces.
2332 * Close a logfile, after first flushing all in-module queued data.
2336 pmcstat_close_log(struct pmcstat_args *a)
2338 if (pmc_flush_logfile() < 0 ||
2339 pmc_configure_logfile(-1) < 0)
2340 err(EX_OSERR, "ERROR: logging failed");
2341 a->pa_flags &= ~(FLAG_HAS_OUTPUT_LOGFILE | FLAG_HAS_PIPE);
2342 return (a->pa_flags & FLAG_HAS_PIPE ? PMCSTAT_EXITING :
2343 PMCSTAT_FINISHED);
2349 * Open a log file, for reading or writing.
2351 * The function returns the fd of a successfully opened log or -1 in
2352 * case of failure.
2356 pmcstat_open_log(const char *path, int mode)
2358 int error, fd;
2359 size_t hlen;
2360 const char *p, *errstr;
2361 struct addrinfo hints, *res, *res0;
2362 char hostname[MAXHOSTNAMELEN];
2364 errstr = NULL;
2365 fd = -1;
2368 * If 'path' is "-" then open one of stdin or stdout depending
2369 * on the value of 'mode'.
2371 * If 'path' contains a ':' and does not start with a '/' or '.',
2372 * and is being opened for writing, treat it as a "host:port"
2373 * specification and open a network socket.
2375 * Otherwise, treat 'path' as a file name and open that.
2377 if (path[0] == '-' && path[1] == '\0')
2378 fd = (mode == PMCSTAT_OPEN_FOR_READ) ? 0 : 1;
2379 else if (mode == PMCSTAT_OPEN_FOR_WRITE && path[0] != '/' &&
2380 path[0] != '.' && strchr(path, ':') != NULL) {
2382 p = strrchr(path, ':');
2383 hlen = p - path;
2384 if (p == path || hlen >= sizeof(hostname)) {
2385 errstr = strerror(EINVAL);
2386 goto done;
2389 assert(hlen < sizeof(hostname));
2390 (void) strncpy(hostname, path, hlen);
2391 hostname[hlen] = '\0';
2393 (void) memset(&hints, 0, sizeof(hints));
2394 hints.ai_family = AF_UNSPEC;
2395 hints.ai_socktype = SOCK_STREAM;
2396 if ((error = getaddrinfo(hostname, p+1, &hints, &res0)) != 0) {
2397 errstr = gai_strerror(error);
2398 goto done;
2401 fd = -1;
2402 for (res = res0; res; res = res->ai_next) {
2403 if ((fd = socket(res->ai_family, res->ai_socktype,
2404 res->ai_protocol)) < 0) {
2405 errstr = strerror(errno);
2406 continue;
2408 if (connect(fd, res->ai_addr, res->ai_addrlen) < 0) {
2409 errstr = strerror(errno);
2410 (void) close(fd);
2411 fd = -1;
2412 continue;
2414 errstr = NULL;
2415 break;
2417 freeaddrinfo(res0);
2419 } else if ((fd = open(path, mode == PMCSTAT_OPEN_FOR_READ ?
2420 O_RDONLY : (O_WRONLY|O_CREAT|O_TRUNC),
2421 S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0)
2422 errstr = strerror(errno);
2424 done:
2425 if (errstr)
2426 errx(EX_OSERR, "ERROR: Cannot open \"%s\" for %s: %s.", path,
2427 (mode == PMCSTAT_OPEN_FOR_READ ? "reading" : "writing"),
2428 errstr);
2430 return (fd);
2434 * Process a log file in offline analysis mode.
2438 pmcstat_process_log(struct pmcstat_args *a)
2442 * If analysis has not been asked for, just print the log to
2443 * the current output file.
2445 if (a->pa_flags & FLAG_DO_PRINT)
2446 return (pmcstat_print_log(a));
2447 else
2448 return (pmcstat_analyze_log(a));
2452 * Initialize module.
2455 void
2456 pmcstat_initialize_logging(struct pmcstat_args *a)
2458 int i;
2460 (void) a;
2462 /* use a convenient format for 'ldd' output */
2463 if (setenv("LD_TRACE_LOADED_OBJECTS_FMT1","%o \"%p\" %x\n",1) != 0)
2464 err(EX_OSERR, "ERROR: Cannot setenv");
2466 /* Initialize hash tables */
2467 pmcstat_string_initialize();
2468 for (i = 0; i < PMCSTAT_NHASH; i++) {
2469 LIST_INIT(&pmcstat_image_hash[i]);
2470 LIST_INIT(&pmcstat_process_hash[i]);
2474 * Create a fake 'process' entry for the kernel with pid -1.
2475 * hwpmc(4) will subsequently inform us about where the kernel
2476 * and any loaded kernel modules are mapped.
2478 if ((pmcstat_kernproc = pmcstat_process_lookup((pid_t) -1,
2479 PMCSTAT_ALLOCATE)) == NULL)
2480 err(EX_OSERR, "ERROR: Cannot initialize logging");
2484 * Shutdown module.
2487 void
2488 pmcstat_shutdown_logging(struct pmcstat_args *a)
2490 int i;
2491 FILE *mf;
2492 struct pmcstat_gmonfile *pgf, *pgftmp;
2493 struct pmcstat_image *pi, *pitmp;
2494 struct pmcstat_process *pp, *pptmp;
2495 struct pmcstat_cgnode_hash *pch, *pchtmp;
2497 /* determine where to send the map file */
2498 mf = NULL;
2499 if (a->pa_mapfilename != NULL)
2500 mf = (strcmp(a->pa_mapfilename, "-") == 0) ?
2501 a->pa_printfile : fopen(a->pa_mapfilename, "w");
2503 if (mf == NULL && a->pa_flags & FLAG_DO_GPROF &&
2504 a->pa_verbosity >= 2)
2505 mf = a->pa_printfile;
2507 if (mf)
2508 (void) fprintf(mf, "MAP:\n");
2511 if (a->pa_flags & FLAG_DO_CALLGRAPHS)
2512 pmcstat_callgraph_print(a);
2515 * Sync back all gprof flat profile data.
2517 for (i = 0; i < PMCSTAT_NHASH; i++) {
2518 LIST_FOREACH(pi, &pmcstat_image_hash[i], pi_next) {
2519 if (mf)
2520 (void) fprintf(mf, " \"%s\" => \"%s\"",
2521 pmcstat_string_unintern(pi->pi_execpath),
2522 pmcstat_string_unintern(
2523 pi->pi_samplename));
2525 /* flush gmon.out data to disk */
2526 LIST_FOREACH(pgf, &pi->pi_gmlist, pgf_next) {
2527 pmcstat_gmon_unmap_file(pgf);
2528 if (mf)
2529 (void) fprintf(mf, " %s/%d",
2530 pmcstat_pmcid_to_name(
2531 pgf->pgf_pmcid),
2532 pgf->pgf_nsamples);
2533 if (pgf->pgf_overflow && a->pa_verbosity >= 1)
2534 warnx("WARNING: profile \"%s\" "
2535 "overflowed.",
2536 pmcstat_string_unintern(
2537 pgf->pgf_name));
2540 if (mf)
2541 (void) fprintf(mf, "\n");
2546 * Compute arcs and add these to the gprof files.
2548 if (a->pa_flags & FLAG_DO_GPROF && a->pa_graphdepth > 1)
2549 pmcstat_callgraph_do_gmon_arcs();
2552 * Free memory.
2554 for (i = 0; i < PMCSTAT_NHASH; i++) {
2555 LIST_FOREACH_SAFE(pch, &pmcstat_cgnode_hash[i], pch_next,
2556 pchtmp) {
2557 pmcstat_cgnode_free(pch->pch_cgnode);
2558 free(pch);
2562 for (i = 0; i < PMCSTAT_NHASH; i++) {
2563 LIST_FOREACH_SAFE(pi, &pmcstat_image_hash[i], pi_next, pitmp)
2565 LIST_FOREACH_SAFE(pgf, &pi->pi_gmlist, pgf_next,
2566 pgftmp) {
2567 if (pgf->pgf_file)
2568 (void) fclose(pgf->pgf_file);
2569 LIST_REMOVE(pgf, pgf_next);
2570 free(pgf);
2572 if (pi->pi_symbols)
2573 free(pi->pi_symbols);
2575 LIST_REMOVE(pi, pi_next);
2576 free(pi);
2579 LIST_FOREACH_SAFE(pp, &pmcstat_process_hash[i], pp_next,
2580 pptmp) {
2581 LIST_REMOVE(pp, pp_next);
2582 free(pp);
2586 pmcstat_string_shutdown();
2589 * Print errors unless -q was specified. Print all statistics
2590 * if verbosity > 1.
2592 #define PRINT(N,V,A) do { \
2593 if (pmcstat_stats.ps_##V || (A)->pa_verbosity >= 2) \
2594 (void) fprintf((A)->pa_printfile, " %-40s %d\n",\
2595 N, pmcstat_stats.ps_##V); \
2596 } while (0)
2598 if (a->pa_verbosity >= 1 && a->pa_flags & FLAG_DO_GPROF) {
2599 (void) fprintf(a->pa_printfile, "CONVERSION STATISTICS:\n");
2600 PRINT("#exec/a.out", exec_aout, a);
2601 PRINT("#exec/elf", exec_elf, a);
2602 PRINT("#exec/unknown", exec_indeterminable, a);
2603 PRINT("#exec handling errors", exec_errors, a);
2604 PRINT("#samples/total", samples_total, a);
2605 PRINT("#samples/unclaimed", samples_unknown_offset, a);
2606 PRINT("#samples/unknown-object", samples_indeterminable, a);
2607 PRINT("#callchain/dubious-frames", callchain_dubious_frames,
2611 if (mf)
2612 (void) fclose(mf);