2 * kvm tracing application
4 * This tool is used for collecting trace buffer data
7 * Based on blktrace 0.99.3
9 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
10 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
11 * Copyright (C) 2008 Eric Liu <eric.e.liu@intel.com>
13 * This work is licensed under the GNU LGPL license, version 2.
19 #include <sys/types.h>
25 #include <sys/ioctl.h>
26 #include <sys/param.h>
27 #include <sys/statfs.h>
40 #include <linux/kvm.h>
42 static char kvmtrace_version
[] = "0.1";
45 * You may want to increase this even more, if you are logging at a high
46 * rate and see skipped/missed events
48 #define BUF_SIZE (512 * 1024)
51 #define OFILE_BUF (128 * 1024)
53 #define DEBUGFS_TYPE 0x64626720
55 #define max(a, b) ((a) > (b) ? (a) : (b))
57 #define S_OPTS "r:o:w:?Vb:n:D:"
58 static struct option l_opts
[] = {
61 .has_arg
= required_argument
,
67 .has_arg
= required_argument
,
73 .has_arg
= required_argument
,
79 .has_arg
= no_argument
,
84 .name
= "buffer-size",
85 .has_arg
= required_argument
,
90 .name
= "num-sub-buffers",
91 .has_arg
= required_argument
,
97 .has_arg
= required_argument
,
106 struct thread_information
{
111 char fn
[MAXPATHLEN
+ 64];
116 int (*get_subbuf
)(struct thread_information
*, unsigned int);
117 int (*read_data
)(struct thread_information
*, void *, unsigned int);
119 unsigned long long data_read
;
121 struct kvm_trace_information
*trace_info
;
126 * mmap controlled output files
128 unsigned long long fs_size
;
129 unsigned long long fs_max_size
;
130 unsigned long fs_off
;
132 unsigned long fs_buf_len
;
136 struct kvm_trace_information
{
138 volatile int trace_started
;
139 unsigned long lost_records
;
140 struct thread_information
*threads
;
141 unsigned long buf_size
;
142 unsigned long buf_nr
;
145 static struct kvm_trace_information trace_information
;
148 static char default_debugfs_path
[] = "/sys/kernel/debug";
150 /* command line option globals */
151 static char *debugfs_path
;
152 static char *output_name
;
153 static char *output_dir
;
154 static int stop_watch
;
155 static unsigned long buf_size
= BUF_SIZE
;
156 static unsigned long buf_nr
= BUF_NR
;
157 static unsigned int page_size
;
159 #define for_each_cpu_online(cpu) \
160 for (cpu = 0; cpu < ncpus; cpu++)
161 #define for_each_tip(tip, i) \
162 for (i = 0, tip = trace_information.threads; i < ncpus; i++, tip++)
164 #define is_done() (*(volatile int *)(&done))
165 static volatile int done
;
167 #define is_trace_stopped() (*(volatile int *)(&trace_stopped))
168 static volatile int trace_stopped
;
170 static void exit_trace(int status
);
172 static void handle_sigint(__attribute__((__unused__
)) int sig
)
174 ioctl(trace_information
.fd
, KVM_TRACE_PAUSE
);
178 static int get_lost_records()
181 char tmp
[MAXPATHLEN
+ 64];
183 snprintf(tmp
, sizeof(tmp
), "%s/kvm/lost_records", debugfs_path
);
184 fd
= open(tmp
, O_RDONLY
);
187 * this may be ok, if the kernel doesn't support dropped counts
192 fprintf(stderr
, "Couldn't open dropped file %s\n", tmp
);
196 if (read(fd
, tmp
, sizeof(tmp
)) < 0) {
206 static void wait_for_data(struct thread_information
*tip
, int timeout
)
208 struct pollfd pfd
= { .fd
= tip
->fd
, .events
= POLLIN
};
211 if (poll(&pfd
, 1, timeout
) < 0) {
215 if (pfd
.revents
& POLLIN
)
220 static int read_data(struct thread_information
*tip
, void *buf
,
226 wait_for_data(tip
, 100);
228 ret
= read(tip
->fd
, buf
, len
);
235 if (errno
!= EAGAIN
) {
237 fprintf(stderr
, "Thread %d failed read of %s\n",
243 } while (!is_done());
250 * For file output, truncate and mmap the file appropriately
252 static int mmap_subbuf(struct thread_information
*tip
, unsigned int maxlen
)
254 int ofd
= fileno(tip
->ofile
);
260 * extend file, if we have to. use chunks of 16 subbuffers.
262 if (tip
->fs_off
+ maxlen
> tip
->fs_buf_len
) {
264 munlock(tip
->fs_buf
, tip
->fs_buf_len
);
265 munmap(tip
->fs_buf
, tip
->fs_buf_len
);
269 tip
->fs_off
= tip
->fs_size
& (page_size
- 1);
270 nr
= max(16, tip
->trace_info
->buf_nr
);
271 size
= tip
->trace_info
->buf_size
;
272 tip
->fs_buf_len
= (nr
* size
) - tip
->fs_off
;
273 tip
->fs_max_size
+= tip
->fs_buf_len
;
275 if (ftruncate(ofd
, tip
->fs_max_size
) < 0) {
280 tip
->fs_buf
= mmap(NULL
, tip
->fs_buf_len
, PROT_WRITE
,
281 MAP_SHARED
, ofd
, tip
->fs_size
- tip
->fs_off
);
282 if (tip
->fs_buf
== MAP_FAILED
) {
286 mlock(tip
->fs_buf
, tip
->fs_buf_len
);
289 ret
= tip
->read_data(tip
, tip
->fs_buf
+ tip
->fs_off
, maxlen
);
291 tip
->data_read
+= ret
;
300 static void tip_ftrunc_final(struct thread_information
*tip
)
303 * truncate to right size and cleanup mmap
306 int ofd
= fileno(tip
->ofile
);
309 munmap(tip
->fs_buf
, tip
->fs_buf_len
);
311 ftruncate(ofd
, tip
->fs_size
);
315 static void *thread_main(void *arg
)
317 struct thread_information
*tip
= arg
;
318 pid_t pid
= getpid();
322 CPU_SET((tip
->cpu
), &cpu_mask
);
324 if (sched_setaffinity(pid
, sizeof(cpu_mask
), &cpu_mask
) == -1) {
325 perror("sched_setaffinity");
329 snprintf(tip
->fn
, sizeof(tip
->fn
), "%s/kvm/trace%d",
330 debugfs_path
, tip
->cpu
);
331 tip
->fd
= open(tip
->fn
, O_RDONLY
);
334 fprintf(stderr
, "Thread %d failed open of %s\n", tip
->cpu
,
339 if (tip
->get_subbuf(tip
, tip
->trace_info
->buf_size
) < 0)
344 * trace is stopped, pull data until we get a short read
346 while (tip
->get_subbuf(tip
, tip
->trace_info
->buf_size
) > 0)
349 tip_ftrunc_final(tip
);
354 static int fill_ofname(struct thread_information
*tip
, char *dst
)
360 len
= sprintf(dst
, "%s/", output_dir
);
362 len
= sprintf(dst
, "./");
364 if (stat(dst
, &sb
) < 0) {
365 if (errno
!= ENOENT
) {
369 if (mkdir(dst
, 0755) < 0) {
371 fprintf(stderr
, "Can't make output dir\n");
376 sprintf(dst
+ len
, "%s.kvmtrace.%d", output_name
, tip
->cpu
);
381 static void fill_ops(struct thread_information
*tip
)
383 tip
->get_subbuf
= mmap_subbuf
;
384 tip
->read_data
= read_data
;
387 static void close_thread(struct thread_information
*tip
)
393 if (tip
->ofile_buffer
)
394 free(tip
->ofile_buffer
);
398 tip
->ofile_buffer
= NULL
;
401 static int tip_open_output(struct thread_information
*tip
)
406 if (fill_ofname(tip
, op
))
409 tip
->ofile
= fopen(op
, "w+");
411 vbuf_size
= OFILE_BUF
;
413 if (tip
->ofile
== NULL
) {
418 tip
->ofile_buffer
= malloc(vbuf_size
);
419 if (setvbuf(tip
->ofile
, tip
->ofile_buffer
, mode
, vbuf_size
)) {
429 static int start_threads(int cpu
)
431 struct thread_information
*tip
;
433 tip
= trace_information
.threads
+ cpu
;
435 tip
->trace_info
= &trace_information
;
438 if (tip_open_output(tip
))
441 if (pthread_create(&tip
->thread
, NULL
, thread_main
, tip
)) {
442 perror("pthread_create");
450 static void stop_threads()
452 struct thread_information
*tip
;
456 for_each_tip(tip
, i
) {
458 (void) pthread_join(tip
->thread
, (void *) &ret
);
463 static int start_trace(void)
466 struct kvm_user_trace_setup kuts
;
468 fd
= trace_information
.fd
= open("/dev/kvm", O_RDWR
);
474 memset(&kuts
, 0, sizeof(kuts
));
475 kuts
.buf_size
= trace_information
.buf_size
= buf_size
;
476 kuts
.buf_nr
= trace_information
.buf_nr
= buf_nr
;
478 if (ioctl(trace_information
.fd
, KVM_TRACE_ENABLE
, &kuts
) < 0) {
479 perror("KVM_TRACE_ENABLE");
483 trace_information
.trace_started
= 1;
488 static void cleanup_trace(void)
490 if (trace_information
.fd
== -1)
493 trace_information
.lost_records
= get_lost_records();
495 if (trace_information
.trace_started
) {
496 trace_information
.trace_started
= 0;
497 if (ioctl(trace_information
.fd
, KVM_TRACE_DISABLE
) < 0)
498 perror("KVM_TRACE_DISABLE");
501 close(trace_information
.fd
);
502 trace_information
.fd
= -1;
505 static void stop_all_traces(void)
507 if (!is_trace_stopped()) {
514 static void exit_trace(int status
)
520 static int start_kvm_trace(void)
523 struct thread_information
*tip
;
525 size
= ncpus
* sizeof(struct thread_information
);
528 fprintf(stderr
, "Out of memory, threads (%d)\n", size
);
531 memset(tip
, 0, size
);
532 trace_information
.threads
= tip
;
537 for_each_cpu_online(i
) {
538 if (start_threads(i
)) {
539 fprintf(stderr
, "Failed to start worker threads\n");
553 static void wait_for_threads(void)
555 struct thread_information
*tip
;
563 tips_running
+= !tip
->exited
;
565 } while (tips_running
);
568 static void show_stats(void)
570 struct thread_information
*tip
;
571 unsigned long long data_read
;
575 for_each_tip(tip
, i
) {
576 printf(" CPU%3d: %8llu KiB data\n",
577 tip
->cpu
, (tip
->data_read
+ 1023) >> 10);
578 data_read
+= tip
->data_read
;
581 printf(" Total: lost %lu, %8llu KiB data\n",
582 trace_information
.lost_records
, (data_read
+ 1023) >> 10);
584 if (trace_information
.lost_records
)
585 fprintf(stderr
, "You have lost records, "
586 "consider using a larger buffer size (-b)\n");
589 static char usage_str
[] = \
590 "[ -r debugfs path ] [ -D output dir ] [ -b buffer size ]\n" \
591 "[ -n number of buffers] [ -o <output file> ] [ -w time ] [ -V ]\n\n" \
592 "\t-r Path to mounted debugfs, defaults to /sys/kernel/debug\n" \
593 "\t-o File(s) to send output to\n" \
594 "\t-D Directory to prepend to output file names\n" \
595 "\t-w Stop after defined time, in seconds\n" \
596 "\t-b Sub buffer size in KiB\n" \
597 "\t-n Number of sub buffers\n" \
598 "\t-V Print program version info\n\n";
600 static void show_usage(char *prog
)
602 fprintf(stderr
, "Usage: %s %s %s", prog
, kvmtrace_version
, usage_str
);
606 void parse_args(int argc
, char **argv
)
610 while ((c
= getopt_long(argc
, argv
, S_OPTS
, l_opts
, NULL
)) >= 0) {
613 debugfs_path
= optarg
;
616 output_name
= optarg
;
619 stop_watch
= atoi(optarg
);
620 if (stop_watch
<= 0) {
622 "Invalid stopwatch value (%d secs)\n",
628 printf("%s version %s\n", argv
[0], kvmtrace_version
);
631 buf_size
= strtoul(optarg
, NULL
, 10);
632 if (buf_size
<= 0 || buf_size
> 16*1024) {
634 "Invalid buffer size (%lu)\n",
641 buf_nr
= strtoul(optarg
, NULL
, 10);
644 "Invalid buffer nr (%lu)\n", buf_nr
);
656 if (optind
< argc
|| output_name
== NULL
)
660 int main(int argc
, char *argv
[])
664 parse_args(argc
, argv
);
667 debugfs_path
= default_debugfs_path
;
669 if (statfs(debugfs_path
, &st
) < 0) {
671 fprintf(stderr
, "%s does not appear to be a valid path\n",
674 } else if (st
.f_type
!= (long) DEBUGFS_TYPE
) {
675 fprintf(stderr
, "%s does not appear to be a debug filesystem,"
676 " please mount debugfs.\n",
681 page_size
= getpagesize();
683 ncpus
= sysconf(_SC_NPROCESSORS_ONLN
);
685 fprintf(stderr
, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
689 signal(SIGINT
, handle_sigint
);
690 signal(SIGHUP
, handle_sigint
);
691 signal(SIGTERM
, handle_sigint
);
692 signal(SIGALRM
, handle_sigint
);
693 signal(SIGPIPE
, SIG_IGN
);
695 if (start_kvm_trace() != 0)