kvm: qemu: expose MSI capability to guest
[kvm-userspace.git] / user / kvmtrace.c
blobde3c1897f466082cdd536d43e03442bec7cd2e1b
1 /*
2 * kvm tracing application
4 * This tool is used for collecting trace buffer data
5 * for kvm trace.
7 * Based on blktrace 0.99.3
9 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
10 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
11 * Copyright (C) 2008 Eric Liu <eric.e.liu@intel.com>
13 * This work is licensed under the GNU LGPL license, version 2.
16 #define _GNU_SOURCE
18 #include <pthread.h>
19 #include <sys/types.h>
20 #include <sys/stat.h>
21 #include <unistd.h>
22 #include <signal.h>
23 #include <fcntl.h>
24 #include <string.h>
25 #include <sys/ioctl.h>
26 #include <sys/param.h>
27 #include <sys/statfs.h>
28 #include <sys/poll.h>
29 #include <sys/mman.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <ctype.h>
33 #include <getopt.h>
34 #include <errno.h>
35 #include <sched.h>
37 #ifndef __user
38 #define __user
39 #endif
40 #include <linux/kvm.h>
42 static char kvmtrace_version[] = "0.1";
45 * You may want to increase this even more, if you are logging at a high
46 * rate and see skipped/missed events
48 #define BUF_SIZE (512 * 1024)
49 #define BUF_NR (8)
51 #define OFILE_BUF (128 * 1024)
53 #define DEBUGFS_TYPE 0x64626720
55 #define max(a, b) ((a) > (b) ? (a) : (b))
57 #define S_OPTS "r:o:w:?Vb:n:D:"
58 static struct option l_opts[] = {
60 .name = "relay",
61 .has_arg = required_argument,
62 .flag = NULL,
63 .val = 'r'
66 .name = "output",
67 .has_arg = required_argument,
68 .flag = NULL,
69 .val = 'o'
72 .name = "stopwatch",
73 .has_arg = required_argument,
74 .flag = NULL,
75 .val = 'w'
78 .name = "version",
79 .has_arg = no_argument,
80 .flag = NULL,
81 .val = 'V'
84 .name = "buffer-size",
85 .has_arg = required_argument,
86 .flag = NULL,
87 .val = 'b'
90 .name = "num-sub-buffers",
91 .has_arg = required_argument,
92 .flag = NULL,
93 .val = 'n'
96 .name = "output-dir",
97 .has_arg = required_argument,
98 .flag = NULL,
99 .val = 'D'
102 .name = NULL,
106 struct thread_information {
107 int cpu;
108 pthread_t thread;
110 int fd;
111 char fn[MAXPATHLEN + 64];
113 FILE *ofile;
114 char *ofile_buffer;
116 int (*get_subbuf)(struct thread_information *, unsigned int);
117 int (*read_data)(struct thread_information *, void *, unsigned int);
119 unsigned long long data_read;
121 struct kvm_trace_information *trace_info;
123 int exited;
126 * mmap controlled output files
128 unsigned long long fs_size;
129 unsigned long long fs_max_size;
130 unsigned long fs_off;
131 void *fs_buf;
132 unsigned long fs_buf_len;
136 struct kvm_trace_information {
137 int fd;
138 volatile int trace_started;
139 unsigned long lost_records;
140 struct thread_information *threads;
141 unsigned long buf_size;
142 unsigned long buf_nr;
145 static struct kvm_trace_information trace_information;
147 static int ncpus;
148 static char default_debugfs_path[] = "/sys/kernel/debug";
150 /* command line option globals */
151 static char *debugfs_path;
152 static char *output_name;
153 static char *output_dir;
154 static int stop_watch;
155 static unsigned long buf_size = BUF_SIZE;
156 static unsigned long buf_nr = BUF_NR;
157 static unsigned int page_size;
159 #define for_each_cpu_online(cpu) \
160 for (cpu = 0; cpu < ncpus; cpu++)
161 #define for_each_tip(tip, i) \
162 for (i = 0, tip = trace_information.threads; i < ncpus; i++, tip++)
164 #define is_done() (*(volatile int *)(&done))
165 static volatile int done;
167 #define is_trace_stopped() (*(volatile int *)(&trace_stopped))
168 static volatile int trace_stopped;
170 static void exit_trace(int status);
172 static void handle_sigint(__attribute__((__unused__)) int sig)
174 ioctl(trace_information.fd, KVM_TRACE_PAUSE);
175 done = 1;
178 static int get_lost_records()
180 int fd;
181 char tmp[MAXPATHLEN + 64];
183 snprintf(tmp, sizeof(tmp), "%s/kvm/lost_records", debugfs_path);
184 fd = open(tmp, O_RDONLY);
185 if (fd < 0) {
187 * this may be ok, if the kernel doesn't support dropped counts
189 if (errno == ENOENT)
190 return 0;
192 fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
193 return -1;
196 if (read(fd, tmp, sizeof(tmp)) < 0) {
197 perror(tmp);
198 close(fd);
199 return -1;
201 close(fd);
203 return atoi(tmp);
206 static void wait_for_data(struct thread_information *tip, int timeout)
208 struct pollfd pfd = { .fd = tip->fd, .events = POLLIN };
210 while (!is_done()) {
211 if (poll(&pfd, 1, timeout) < 0) {
212 perror("poll");
213 break;
215 if (pfd.revents & POLLIN)
216 break;
220 static int read_data(struct thread_information *tip, void *buf,
221 unsigned int len)
223 int ret = 0;
225 do {
226 wait_for_data(tip, 100);
228 ret = read(tip->fd, buf, len);
230 if (!ret)
231 continue;
232 else if (ret > 0)
233 return ret;
234 else {
235 if (errno != EAGAIN) {
236 perror(tip->fn);
237 fprintf(stderr, "Thread %d failed read of %s\n",
238 tip->cpu, tip->fn);
239 break;
241 continue;
243 } while (!is_done());
245 return ret;
250 * For file output, truncate and mmap the file appropriately
252 static int mmap_subbuf(struct thread_information *tip, unsigned int maxlen)
254 int ofd = fileno(tip->ofile);
255 int ret;
256 unsigned long nr;
257 unsigned long size;
260 * extend file, if we have to. use chunks of 16 subbuffers.
262 if (tip->fs_off + maxlen > tip->fs_buf_len) {
263 if (tip->fs_buf) {
264 munlock(tip->fs_buf, tip->fs_buf_len);
265 munmap(tip->fs_buf, tip->fs_buf_len);
266 tip->fs_buf = NULL;
269 tip->fs_off = tip->fs_size & (page_size - 1);
270 nr = max(16, tip->trace_info->buf_nr);
271 size = tip->trace_info->buf_size;
272 tip->fs_buf_len = (nr * size) - tip->fs_off;
273 tip->fs_max_size += tip->fs_buf_len;
275 if (ftruncate(ofd, tip->fs_max_size) < 0) {
276 perror("ftruncate");
277 return -1;
280 tip->fs_buf = mmap(NULL, tip->fs_buf_len, PROT_WRITE,
281 MAP_SHARED, ofd, tip->fs_size - tip->fs_off);
282 if (tip->fs_buf == MAP_FAILED) {
283 perror("mmap");
284 return -1;
286 mlock(tip->fs_buf, tip->fs_buf_len);
289 ret = tip->read_data(tip, tip->fs_buf + tip->fs_off, maxlen);
290 if (ret >= 0) {
291 tip->data_read += ret;
292 tip->fs_size += ret;
293 tip->fs_off += ret;
294 return 0;
297 return -1;
300 static void tip_ftrunc_final(struct thread_information *tip)
303 * truncate to right size and cleanup mmap
305 if (tip->ofile) {
306 int ofd = fileno(tip->ofile);
308 if (tip->fs_buf)
309 munmap(tip->fs_buf, tip->fs_buf_len);
311 ftruncate(ofd, tip->fs_size);
315 static void *thread_main(void *arg)
317 struct thread_information *tip = arg;
318 pid_t pid = getpid();
319 cpu_set_t cpu_mask;
321 CPU_ZERO(&cpu_mask);
322 CPU_SET((tip->cpu), &cpu_mask);
324 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
325 perror("sched_setaffinity");
326 exit_trace(1);
329 snprintf(tip->fn, sizeof(tip->fn), "%s/kvm/trace%d",
330 debugfs_path, tip->cpu);
331 tip->fd = open(tip->fn, O_RDONLY);
332 if (tip->fd < 0) {
333 perror(tip->fn);
334 fprintf(stderr, "Thread %d failed open of %s\n", tip->cpu,
335 tip->fn);
336 exit_trace(1);
338 while (!is_done()) {
339 if (tip->get_subbuf(tip, tip->trace_info->buf_size) < 0)
340 break;
344 * trace is stopped, pull data until we get a short read
346 while (tip->get_subbuf(tip, tip->trace_info->buf_size) > 0)
349 tip_ftrunc_final(tip);
350 tip->exited = 1;
351 return NULL;
354 static int fill_ofname(struct thread_information *tip, char *dst)
356 struct stat sb;
357 int len = 0;
359 if (output_dir)
360 len = sprintf(dst, "%s/", output_dir);
361 else
362 len = sprintf(dst, "./");
364 if (stat(dst, &sb) < 0) {
365 if (errno != ENOENT) {
366 perror("stat");
367 return 1;
369 if (mkdir(dst, 0755) < 0) {
370 perror(dst);
371 fprintf(stderr, "Can't make output dir\n");
372 return 1;
376 sprintf(dst + len, "%s.kvmtrace.%d", output_name, tip->cpu);
378 return 0;
381 static void fill_ops(struct thread_information *tip)
383 tip->get_subbuf = mmap_subbuf;
384 tip->read_data = read_data;
387 static void close_thread(struct thread_information *tip)
389 if (tip->fd != -1)
390 close(tip->fd);
391 if (tip->ofile)
392 fclose(tip->ofile);
393 if (tip->ofile_buffer)
394 free(tip->ofile_buffer);
396 tip->fd = -1;
397 tip->ofile = NULL;
398 tip->ofile_buffer = NULL;
401 static int tip_open_output(struct thread_information *tip)
403 int mode, vbuf_size;
404 char op[NAME_MAX];
406 if (fill_ofname(tip, op))
407 return 1;
409 tip->ofile = fopen(op, "w+");
410 mode = _IOFBF;
411 vbuf_size = OFILE_BUF;
413 if (tip->ofile == NULL) {
414 perror(op);
415 return 1;
418 tip->ofile_buffer = malloc(vbuf_size);
419 if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
420 perror("setvbuf");
421 close_thread(tip);
422 return 1;
425 fill_ops(tip);
426 return 0;
429 static int start_threads(int cpu)
431 struct thread_information *tip;
433 tip = trace_information.threads + cpu;
434 tip->cpu = cpu;
435 tip->trace_info = &trace_information;
436 tip->fd = -1;
438 if (tip_open_output(tip))
439 return 1;
441 if (pthread_create(&tip->thread, NULL, thread_main, tip)) {
442 perror("pthread_create");
443 close_thread(tip);
444 return 1;
447 return 0;
450 static void stop_threads()
452 struct thread_information *tip;
453 unsigned long ret;
454 int i;
456 for_each_tip(tip, i) {
457 if (tip->thread)
458 (void) pthread_join(tip->thread, (void *) &ret);
459 close_thread(tip);
463 static int start_trace(void)
465 int fd;
466 struct kvm_user_trace_setup kuts;
468 fd = trace_information.fd = open("/dev/kvm", O_RDWR);
469 if (fd == -1) {
470 perror("/dev/kvm");
471 return 1;
474 memset(&kuts, 0, sizeof(kuts));
475 kuts.buf_size = trace_information.buf_size = buf_size;
476 kuts.buf_nr = trace_information.buf_nr = buf_nr;
478 if (ioctl(trace_information.fd , KVM_TRACE_ENABLE, &kuts) < 0) {
479 perror("KVM_TRACE_ENABLE");
480 close(fd);
481 return 1;
483 trace_information.trace_started = 1;
485 return 0;
488 static void cleanup_trace(void)
490 if (trace_information.fd == -1)
491 return;
493 trace_information.lost_records = get_lost_records();
495 if (trace_information.trace_started) {
496 trace_information.trace_started = 0;
497 if (ioctl(trace_information.fd, KVM_TRACE_DISABLE) < 0)
498 perror("KVM_TRACE_DISABLE");
501 close(trace_information.fd);
502 trace_information.fd = -1;
505 static void stop_all_traces(void)
507 if (!is_trace_stopped()) {
508 trace_stopped = 1;
509 stop_threads();
510 cleanup_trace();
514 static void exit_trace(int status)
516 stop_all_traces();
517 exit(status);
520 static int start_kvm_trace(void)
522 int i, size;
523 struct thread_information *tip;
525 size = ncpus * sizeof(struct thread_information);
526 tip = malloc(size);
527 if (!tip) {
528 fprintf(stderr, "Out of memory, threads (%d)\n", size);
529 return 1;
531 memset(tip, 0, size);
532 trace_information.threads = tip;
534 if (start_trace())
535 return 1;
537 for_each_cpu_online(i) {
538 if (start_threads(i)) {
539 fprintf(stderr, "Failed to start worker threads\n");
540 break;
544 if (i != ncpus) {
545 stop_threads();
546 cleanup_trace();
547 return 1;
550 return 0;
553 static void wait_for_threads(void)
555 struct thread_information *tip;
556 int i, tips_running;
558 do {
559 tips_running = 0;
560 usleep(100000);
562 for_each_tip(tip, i)
563 tips_running += !tip->exited;
565 } while (tips_running);
568 static void show_stats(void)
570 struct thread_information *tip;
571 unsigned long long data_read;
572 int i;
574 data_read = 0;
575 for_each_tip(tip, i) {
576 printf(" CPU%3d: %8llu KiB data\n",
577 tip->cpu, (tip->data_read + 1023) >> 10);
578 data_read += tip->data_read;
581 printf(" Total: lost %lu, %8llu KiB data\n",
582 trace_information.lost_records, (data_read + 1023) >> 10);
584 if (trace_information.lost_records)
585 fprintf(stderr, "You have lost records, "
586 "consider using a larger buffer size (-b)\n");
589 static char usage_str[] = \
590 "[ -r debugfs path ] [ -D output dir ] [ -b buffer size ]\n" \
591 "[ -n number of buffers] [ -o <output file> ] [ -w time ] [ -V ]\n\n" \
592 "\t-r Path to mounted debugfs, defaults to /sys/kernel/debug\n" \
593 "\t-o File(s) to send output to\n" \
594 "\t-D Directory to prepend to output file names\n" \
595 "\t-w Stop after defined time, in seconds\n" \
596 "\t-b Sub buffer size in KiB\n" \
597 "\t-n Number of sub buffers\n" \
598 "\t-V Print program version info\n\n";
600 static void show_usage(char *prog)
602 fprintf(stderr, "Usage: %s %s %s", prog, kvmtrace_version, usage_str);
603 exit(EXIT_FAILURE);
606 void parse_args(int argc, char **argv)
608 int c;
610 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
611 switch (c) {
612 case 'r':
613 debugfs_path = optarg;
614 break;
615 case 'o':
616 output_name = optarg;
617 break;
618 case 'w':
619 stop_watch = atoi(optarg);
620 if (stop_watch <= 0) {
621 fprintf(stderr,
622 "Invalid stopwatch value (%d secs)\n",
623 stop_watch);
624 exit(EXIT_FAILURE);
626 break;
627 case 'V':
628 printf("%s version %s\n", argv[0], kvmtrace_version);
629 exit(EXIT_SUCCESS);
630 case 'b':
631 buf_size = strtoul(optarg, NULL, 10);
632 if (buf_size <= 0 || buf_size > 16*1024) {
633 fprintf(stderr,
634 "Invalid buffer size (%lu)\n",
635 buf_size);
636 exit(EXIT_FAILURE);
638 buf_size <<= 10;
639 break;
640 case 'n':
641 buf_nr = strtoul(optarg, NULL, 10);
642 if (buf_nr <= 0) {
643 fprintf(stderr,
644 "Invalid buffer nr (%lu)\n", buf_nr);
645 exit(EXIT_FAILURE);
647 break;
648 case 'D':
649 output_dir = optarg;
650 break;
651 default:
652 show_usage(argv[0]);
656 if (optind < argc || output_name == NULL)
657 show_usage(argv[0]);
660 int main(int argc, char *argv[])
662 struct statfs st;
664 parse_args(argc, argv);
666 if (!debugfs_path)
667 debugfs_path = default_debugfs_path;
669 if (statfs(debugfs_path, &st) < 0) {
670 perror("statfs");
671 fprintf(stderr, "%s does not appear to be a valid path\n",
672 debugfs_path);
673 return 1;
674 } else if (st.f_type != (long) DEBUGFS_TYPE) {
675 fprintf(stderr, "%s does not appear to be a debug filesystem,"
676 " please mount debugfs.\n",
677 debugfs_path);
678 return 1;
681 page_size = getpagesize();
683 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
684 if (ncpus < 0) {
685 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
686 return 1;
689 signal(SIGINT, handle_sigint);
690 signal(SIGHUP, handle_sigint);
691 signal(SIGTERM, handle_sigint);
692 signal(SIGALRM, handle_sigint);
693 signal(SIGPIPE, SIG_IGN);
695 if (start_kvm_trace() != 0)
696 return 1;
698 if (stop_watch)
699 alarm(stop_watch);
701 wait_for_threads();
702 stop_all_traces();
703 show_stats();
705 return 0;