drbd: merge_bvec_fn: properly remap bvm->bi_bdev
[linux/fpc-iii.git] / kernel / trace / trace.c
blob71136720ffa189ea63c4bfea23b6fab884b9650b
1 /*
2 * ring buffer based function tracer
4 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 * Originally taken from the RT patch by:
8 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 * Based on code from the latency_tracer, that is:
11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 Nadia Yvette Chambers
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
43 #include "trace.h"
44 #include "trace_output.h"
47 * On boot up, the ring buffer is set to the minimum size, so that
48 * we do not waste memory on systems that are not using tracing.
50 bool ring_buffer_expanded;
53 * We need to change this state when a selftest is running.
54 * A selftest will lurk into the ring-buffer to count the
55 * entries inserted during the selftest although some concurrent
56 * insertions into the ring-buffer such as trace_printk could occurred
57 * at the same time, giving false positive or negative results.
59 static bool __read_mostly tracing_selftest_running;
62 * If a tracer is running, we do not want to run SELFTEST.
64 bool __read_mostly tracing_selftest_disabled;
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68 { }
71 static struct tracer_flags dummy_tracer_flags = {
72 .val = 0,
73 .opts = dummy_tracer_opt
76 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
78 return 0;
82 * To prevent the comm cache from being overwritten when no
83 * tracing is active, only save the comm when a trace event
84 * occurred.
86 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89 * Kill all tracing for good (never come back).
90 * It is initialized to 1 but will turn to zero if the initialization
91 * of the tracer is successful. But that is the only place that sets
92 * this back to zero.
94 static int tracing_disabled = 1;
96 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
98 cpumask_var_t __read_mostly tracing_buffer_mask;
101 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
103 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104 * is set, then ftrace_dump is called. This will output the contents
105 * of the ftrace buffers to the console. This is very useful for
106 * capturing traces that lead to crashes and outputing it to a
107 * serial console.
109 * It is default off, but you can enable it with either specifying
110 * "ftrace_dump_on_oops" in the kernel command line, or setting
111 * /proc/sys/kernel/ftrace_dump_on_oops
112 * Set 1 if you want to dump buffers of all CPUs
113 * Set 2 if you want to dump the buffer of the CPU that triggered oops
116 enum ftrace_dump_mode ftrace_dump_on_oops;
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
121 static int tracing_set_tracer(const char *buf);
123 #define MAX_TRACER_SIZE 100
124 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
125 static char *default_bootup_tracer;
127 static bool allocate_snapshot;
129 static int __init set_cmdline_ftrace(char *str)
131 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
132 default_bootup_tracer = bootup_tracer_buf;
133 /* We are using ftrace early, expand it */
134 ring_buffer_expanded = true;
135 return 1;
137 __setup("ftrace=", set_cmdline_ftrace);
139 static int __init set_ftrace_dump_on_oops(char *str)
141 if (*str++ != '=' || !*str) {
142 ftrace_dump_on_oops = DUMP_ALL;
143 return 1;
146 if (!strcmp("orig_cpu", str)) {
147 ftrace_dump_on_oops = DUMP_ORIG;
148 return 1;
151 return 0;
153 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
155 static int __init stop_trace_on_warning(char *str)
157 __disable_trace_on_warning = 1;
158 return 1;
160 __setup("traceoff_on_warning=", stop_trace_on_warning);
162 static int __init boot_alloc_snapshot(char *str)
164 allocate_snapshot = true;
165 /* We also need the main ring buffer expanded */
166 ring_buffer_expanded = true;
167 return 1;
169 __setup("alloc_snapshot", boot_alloc_snapshot);
172 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
173 static char *trace_boot_options __initdata;
175 static int __init set_trace_boot_options(char *str)
177 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
178 trace_boot_options = trace_boot_options_buf;
179 return 0;
181 __setup("trace_options=", set_trace_boot_options);
184 unsigned long long ns2usecs(cycle_t nsec)
186 nsec += 500;
187 do_div(nsec, 1000);
188 return nsec;
192 * The global_trace is the descriptor that holds the tracing
193 * buffers for the live tracing. For each CPU, it contains
194 * a link list of pages that will store trace entries. The
195 * page descriptor of the pages in the memory is used to hold
196 * the link list by linking the lru item in the page descriptor
197 * to each of the pages in the buffer per CPU.
199 * For each active CPU there is a data field that holds the
200 * pages for the buffer for that CPU. Each CPU has the same number
201 * of pages allocated for its buffer.
203 static struct trace_array global_trace;
205 LIST_HEAD(ftrace_trace_arrays);
207 int trace_array_get(struct trace_array *this_tr)
209 struct trace_array *tr;
210 int ret = -ENODEV;
212 mutex_lock(&trace_types_lock);
213 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
214 if (tr == this_tr) {
215 tr->ref++;
216 ret = 0;
217 break;
220 mutex_unlock(&trace_types_lock);
222 return ret;
225 static void __trace_array_put(struct trace_array *this_tr)
227 WARN_ON(!this_tr->ref);
228 this_tr->ref--;
231 void trace_array_put(struct trace_array *this_tr)
233 mutex_lock(&trace_types_lock);
234 __trace_array_put(this_tr);
235 mutex_unlock(&trace_types_lock);
238 int filter_check_discard(struct ftrace_event_file *file, void *rec,
239 struct ring_buffer *buffer,
240 struct ring_buffer_event *event)
242 if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
243 !filter_match_preds(file->filter, rec)) {
244 ring_buffer_discard_commit(buffer, event);
245 return 1;
248 return 0;
250 EXPORT_SYMBOL_GPL(filter_check_discard);
252 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
253 struct ring_buffer *buffer,
254 struct ring_buffer_event *event)
256 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
257 !filter_match_preds(call->filter, rec)) {
258 ring_buffer_discard_commit(buffer, event);
259 return 1;
262 return 0;
264 EXPORT_SYMBOL_GPL(call_filter_check_discard);
266 cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
268 u64 ts;
270 /* Early boot up does not have a buffer yet */
271 if (!buf->buffer)
272 return trace_clock_local();
274 ts = ring_buffer_time_stamp(buf->buffer, cpu);
275 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
277 return ts;
280 cycle_t ftrace_now(int cpu)
282 return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
286 * tracing_is_enabled - Show if global_trace has been disabled
288 * Shows if the global trace has been enabled or not. It uses the
289 * mirror flag "buffer_disabled" to be used in fast paths such as for
290 * the irqsoff tracer. But it may be inaccurate due to races. If you
291 * need to know the accurate state, use tracing_is_on() which is a little
292 * slower, but accurate.
294 int tracing_is_enabled(void)
297 * For quick access (irqsoff uses this in fast path), just
298 * return the mirror variable of the state of the ring buffer.
299 * It's a little racy, but we don't really care.
301 smp_rmb();
302 return !global_trace.buffer_disabled;
306 * trace_buf_size is the size in bytes that is allocated
307 * for a buffer. Note, the number of bytes is always rounded
308 * to page size.
310 * This number is purposely set to a low number of 16384.
311 * If the dump on oops happens, it will be much appreciated
312 * to not have to wait for all that output. Anyway this can be
313 * boot time and run time configurable.
315 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
317 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
319 /* trace_types holds a link list of available tracers. */
320 static struct tracer *trace_types __read_mostly;
323 * trace_types_lock is used to protect the trace_types list.
325 DEFINE_MUTEX(trace_types_lock);
328 * serialize the access of the ring buffer
330 * ring buffer serializes readers, but it is low level protection.
331 * The validity of the events (which returns by ring_buffer_peek() ..etc)
332 * are not protected by ring buffer.
334 * The content of events may become garbage if we allow other process consumes
335 * these events concurrently:
336 * A) the page of the consumed events may become a normal page
337 * (not reader page) in ring buffer, and this page will be rewrited
338 * by events producer.
339 * B) The page of the consumed events may become a page for splice_read,
340 * and this page will be returned to system.
342 * These primitives allow multi process access to different cpu ring buffer
343 * concurrently.
345 * These primitives don't distinguish read-only and read-consume access.
346 * Multi read-only access are also serialized.
349 #ifdef CONFIG_SMP
350 static DECLARE_RWSEM(all_cpu_access_lock);
351 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
353 static inline void trace_access_lock(int cpu)
355 if (cpu == RING_BUFFER_ALL_CPUS) {
356 /* gain it for accessing the whole ring buffer. */
357 down_write(&all_cpu_access_lock);
358 } else {
359 /* gain it for accessing a cpu ring buffer. */
361 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
362 down_read(&all_cpu_access_lock);
364 /* Secondly block other access to this @cpu ring buffer. */
365 mutex_lock(&per_cpu(cpu_access_lock, cpu));
369 static inline void trace_access_unlock(int cpu)
371 if (cpu == RING_BUFFER_ALL_CPUS) {
372 up_write(&all_cpu_access_lock);
373 } else {
374 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
375 up_read(&all_cpu_access_lock);
379 static inline void trace_access_lock_init(void)
381 int cpu;
383 for_each_possible_cpu(cpu)
384 mutex_init(&per_cpu(cpu_access_lock, cpu));
387 #else
389 static DEFINE_MUTEX(access_lock);
391 static inline void trace_access_lock(int cpu)
393 (void)cpu;
394 mutex_lock(&access_lock);
397 static inline void trace_access_unlock(int cpu)
399 (void)cpu;
400 mutex_unlock(&access_lock);
403 static inline void trace_access_lock_init(void)
407 #endif
409 /* trace_flags holds trace_options default values */
410 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
411 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
412 TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
413 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
415 static void tracer_tracing_on(struct trace_array *tr)
417 if (tr->trace_buffer.buffer)
418 ring_buffer_record_on(tr->trace_buffer.buffer);
420 * This flag is looked at when buffers haven't been allocated
421 * yet, or by some tracers (like irqsoff), that just want to
422 * know if the ring buffer has been disabled, but it can handle
423 * races of where it gets disabled but we still do a record.
424 * As the check is in the fast path of the tracers, it is more
425 * important to be fast than accurate.
427 tr->buffer_disabled = 0;
428 /* Make the flag seen by readers */
429 smp_wmb();
433 * tracing_on - enable tracing buffers
435 * This function enables tracing buffers that may have been
436 * disabled with tracing_off.
438 void tracing_on(void)
440 tracer_tracing_on(&global_trace);
442 EXPORT_SYMBOL_GPL(tracing_on);
445 * __trace_puts - write a constant string into the trace buffer.
446 * @ip: The address of the caller
447 * @str: The constant string to write
448 * @size: The size of the string.
450 int __trace_puts(unsigned long ip, const char *str, int size)
452 struct ring_buffer_event *event;
453 struct ring_buffer *buffer;
454 struct print_entry *entry;
455 unsigned long irq_flags;
456 int alloc;
457 int pc;
459 if (!(trace_flags & TRACE_ITER_PRINTK))
460 return 0;
462 pc = preempt_count();
464 if (unlikely(tracing_selftest_running || tracing_disabled))
465 return 0;
467 alloc = sizeof(*entry) + size + 2; /* possible \n added */
469 local_save_flags(irq_flags);
470 buffer = global_trace.trace_buffer.buffer;
471 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
472 irq_flags, pc);
473 if (!event)
474 return 0;
476 entry = ring_buffer_event_data(event);
477 entry->ip = ip;
479 memcpy(&entry->buf, str, size);
481 /* Add a newline if necessary */
482 if (entry->buf[size - 1] != '\n') {
483 entry->buf[size] = '\n';
484 entry->buf[size + 1] = '\0';
485 } else
486 entry->buf[size] = '\0';
488 __buffer_unlock_commit(buffer, event);
489 ftrace_trace_stack(buffer, irq_flags, 4, pc);
491 return size;
493 EXPORT_SYMBOL_GPL(__trace_puts);
496 * __trace_bputs - write the pointer to a constant string into trace buffer
497 * @ip: The address of the caller
498 * @str: The constant string to write to the buffer to
500 int __trace_bputs(unsigned long ip, const char *str)
502 struct ring_buffer_event *event;
503 struct ring_buffer *buffer;
504 struct bputs_entry *entry;
505 unsigned long irq_flags;
506 int size = sizeof(struct bputs_entry);
507 int pc;
509 if (!(trace_flags & TRACE_ITER_PRINTK))
510 return 0;
512 pc = preempt_count();
514 if (unlikely(tracing_selftest_running || tracing_disabled))
515 return 0;
517 local_save_flags(irq_flags);
518 buffer = global_trace.trace_buffer.buffer;
519 event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
520 irq_flags, pc);
521 if (!event)
522 return 0;
524 entry = ring_buffer_event_data(event);
525 entry->ip = ip;
526 entry->str = str;
528 __buffer_unlock_commit(buffer, event);
529 ftrace_trace_stack(buffer, irq_flags, 4, pc);
531 return 1;
533 EXPORT_SYMBOL_GPL(__trace_bputs);
535 #ifdef CONFIG_TRACER_SNAPSHOT
537 * trace_snapshot - take a snapshot of the current buffer.
539 * This causes a swap between the snapshot buffer and the current live
540 * tracing buffer. You can use this to take snapshots of the live
541 * trace when some condition is triggered, but continue to trace.
543 * Note, make sure to allocate the snapshot with either
544 * a tracing_snapshot_alloc(), or by doing it manually
545 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
547 * If the snapshot buffer is not allocated, it will stop tracing.
548 * Basically making a permanent snapshot.
550 void tracing_snapshot(void)
552 struct trace_array *tr = &global_trace;
553 struct tracer *tracer = tr->current_trace;
554 unsigned long flags;
556 if (in_nmi()) {
557 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
558 internal_trace_puts("*** snapshot is being ignored ***\n");
559 return;
562 if (!tr->allocated_snapshot) {
563 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
564 internal_trace_puts("*** stopping trace here! ***\n");
565 tracing_off();
566 return;
569 /* Note, snapshot can not be used when the tracer uses it */
570 if (tracer->use_max_tr) {
571 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
572 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
573 return;
576 local_irq_save(flags);
577 update_max_tr(tr, current, smp_processor_id());
578 local_irq_restore(flags);
580 EXPORT_SYMBOL_GPL(tracing_snapshot);
582 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
583 struct trace_buffer *size_buf, int cpu_id);
584 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
586 static int alloc_snapshot(struct trace_array *tr)
588 int ret;
590 if (!tr->allocated_snapshot) {
592 /* allocate spare buffer */
593 ret = resize_buffer_duplicate_size(&tr->max_buffer,
594 &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
595 if (ret < 0)
596 return ret;
598 tr->allocated_snapshot = true;
601 return 0;
604 void free_snapshot(struct trace_array *tr)
607 * We don't free the ring buffer. instead, resize it because
608 * The max_tr ring buffer has some state (e.g. ring->clock) and
609 * we want preserve it.
611 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
612 set_buffer_entries(&tr->max_buffer, 1);
613 tracing_reset_online_cpus(&tr->max_buffer);
614 tr->allocated_snapshot = false;
618 * tracing_alloc_snapshot - allocate snapshot buffer.
620 * This only allocates the snapshot buffer if it isn't already
621 * allocated - it doesn't also take a snapshot.
623 * This is meant to be used in cases where the snapshot buffer needs
624 * to be set up for events that can't sleep but need to be able to
625 * trigger a snapshot.
627 int tracing_alloc_snapshot(void)
629 struct trace_array *tr = &global_trace;
630 int ret;
632 ret = alloc_snapshot(tr);
633 WARN_ON(ret < 0);
635 return ret;
637 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
640 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
642 * This is similar to trace_snapshot(), but it will allocate the
643 * snapshot buffer if it isn't already allocated. Use this only
644 * where it is safe to sleep, as the allocation may sleep.
646 * This causes a swap between the snapshot buffer and the current live
647 * tracing buffer. You can use this to take snapshots of the live
648 * trace when some condition is triggered, but continue to trace.
650 void tracing_snapshot_alloc(void)
652 int ret;
654 ret = tracing_alloc_snapshot();
655 if (ret < 0)
656 return;
658 tracing_snapshot();
660 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
661 #else
662 void tracing_snapshot(void)
664 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
666 EXPORT_SYMBOL_GPL(tracing_snapshot);
667 int tracing_alloc_snapshot(void)
669 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
670 return -ENODEV;
672 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
673 void tracing_snapshot_alloc(void)
675 /* Give warning */
676 tracing_snapshot();
678 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
679 #endif /* CONFIG_TRACER_SNAPSHOT */
681 static void tracer_tracing_off(struct trace_array *tr)
683 if (tr->trace_buffer.buffer)
684 ring_buffer_record_off(tr->trace_buffer.buffer);
686 * This flag is looked at when buffers haven't been allocated
687 * yet, or by some tracers (like irqsoff), that just want to
688 * know if the ring buffer has been disabled, but it can handle
689 * races of where it gets disabled but we still do a record.
690 * As the check is in the fast path of the tracers, it is more
691 * important to be fast than accurate.
693 tr->buffer_disabled = 1;
694 /* Make the flag seen by readers */
695 smp_wmb();
699 * tracing_off - turn off tracing buffers
701 * This function stops the tracing buffers from recording data.
702 * It does not disable any overhead the tracers themselves may
703 * be causing. This function simply causes all recording to
704 * the ring buffers to fail.
706 void tracing_off(void)
708 tracer_tracing_off(&global_trace);
710 EXPORT_SYMBOL_GPL(tracing_off);
712 void disable_trace_on_warning(void)
714 if (__disable_trace_on_warning)
715 tracing_off();
719 * tracer_tracing_is_on - show real state of ring buffer enabled
720 * @tr : the trace array to know if ring buffer is enabled
722 * Shows real state of the ring buffer if it is enabled or not.
724 static int tracer_tracing_is_on(struct trace_array *tr)
726 if (tr->trace_buffer.buffer)
727 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
728 return !tr->buffer_disabled;
732 * tracing_is_on - show state of ring buffers enabled
734 int tracing_is_on(void)
736 return tracer_tracing_is_on(&global_trace);
738 EXPORT_SYMBOL_GPL(tracing_is_on);
740 static int __init set_buf_size(char *str)
742 unsigned long buf_size;
744 if (!str)
745 return 0;
746 buf_size = memparse(str, &str);
747 /* nr_entries can not be zero */
748 if (buf_size == 0)
749 return 0;
750 trace_buf_size = buf_size;
751 return 1;
753 __setup("trace_buf_size=", set_buf_size);
755 static int __init set_tracing_thresh(char *str)
757 unsigned long threshold;
758 int ret;
760 if (!str)
761 return 0;
762 ret = kstrtoul(str, 0, &threshold);
763 if (ret < 0)
764 return 0;
765 tracing_thresh = threshold * 1000;
766 return 1;
768 __setup("tracing_thresh=", set_tracing_thresh);
770 unsigned long nsecs_to_usecs(unsigned long nsecs)
772 return nsecs / 1000;
775 /* These must match the bit postions in trace_iterator_flags */
776 static const char *trace_options[] = {
777 "print-parent",
778 "sym-offset",
779 "sym-addr",
780 "verbose",
781 "raw",
782 "hex",
783 "bin",
784 "block",
785 "stacktrace",
786 "trace_printk",
787 "ftrace_preempt",
788 "branch",
789 "annotate",
790 "userstacktrace",
791 "sym-userobj",
792 "printk-msg-only",
793 "context-info",
794 "latency-format",
795 "sleep-time",
796 "graph-time",
797 "record-cmd",
798 "overwrite",
799 "disable_on_free",
800 "irq-info",
801 "markers",
802 "function-trace",
803 NULL
806 static struct {
807 u64 (*func)(void);
808 const char *name;
809 int in_ns; /* is this clock in nanoseconds? */
810 } trace_clocks[] = {
811 { trace_clock_local, "local", 1 },
812 { trace_clock_global, "global", 1 },
813 { trace_clock_counter, "counter", 0 },
814 { trace_clock_jiffies, "uptime", 0 },
815 { trace_clock, "perf", 1 },
816 ARCH_TRACE_CLOCKS
820 * trace_parser_get_init - gets the buffer for trace parser
822 int trace_parser_get_init(struct trace_parser *parser, int size)
824 memset(parser, 0, sizeof(*parser));
826 parser->buffer = kmalloc(size, GFP_KERNEL);
827 if (!parser->buffer)
828 return 1;
830 parser->size = size;
831 return 0;
835 * trace_parser_put - frees the buffer for trace parser
837 void trace_parser_put(struct trace_parser *parser)
839 kfree(parser->buffer);
843 * trace_get_user - reads the user input string separated by space
844 * (matched by isspace(ch))
846 * For each string found the 'struct trace_parser' is updated,
847 * and the function returns.
849 * Returns number of bytes read.
851 * See kernel/trace/trace.h for 'struct trace_parser' details.
853 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
854 size_t cnt, loff_t *ppos)
856 char ch;
857 size_t read = 0;
858 ssize_t ret;
860 if (!*ppos)
861 trace_parser_clear(parser);
863 ret = get_user(ch, ubuf++);
864 if (ret)
865 goto out;
867 read++;
868 cnt--;
871 * The parser is not finished with the last write,
872 * continue reading the user input without skipping spaces.
874 if (!parser->cont) {
875 /* skip white space */
876 while (cnt && isspace(ch)) {
877 ret = get_user(ch, ubuf++);
878 if (ret)
879 goto out;
880 read++;
881 cnt--;
884 /* only spaces were written */
885 if (isspace(ch)) {
886 *ppos += read;
887 ret = read;
888 goto out;
891 parser->idx = 0;
894 /* read the non-space input */
895 while (cnt && !isspace(ch)) {
896 if (parser->idx < parser->size - 1)
897 parser->buffer[parser->idx++] = ch;
898 else {
899 ret = -EINVAL;
900 goto out;
902 ret = get_user(ch, ubuf++);
903 if (ret)
904 goto out;
905 read++;
906 cnt--;
909 /* We either got finished input or we have to wait for another call. */
910 if (isspace(ch)) {
911 parser->buffer[parser->idx] = 0;
912 parser->cont = false;
913 } else if (parser->idx < parser->size - 1) {
914 parser->cont = true;
915 parser->buffer[parser->idx++] = ch;
916 } else {
917 ret = -EINVAL;
918 goto out;
921 *ppos += read;
922 ret = read;
924 out:
925 return ret;
928 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
930 int len;
931 int ret;
933 if (!cnt)
934 return 0;
936 if (s->len <= s->readpos)
937 return -EBUSY;
939 len = s->len - s->readpos;
940 if (cnt > len)
941 cnt = len;
942 ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
943 if (ret == cnt)
944 return -EFAULT;
946 cnt -= ret;
948 s->readpos += cnt;
949 return cnt;
952 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
954 int len;
956 if (s->len <= s->readpos)
957 return -EBUSY;
959 len = s->len - s->readpos;
960 if (cnt > len)
961 cnt = len;
962 memcpy(buf, s->buffer + s->readpos, cnt);
964 s->readpos += cnt;
965 return cnt;
969 * ftrace_max_lock is used to protect the swapping of buffers
970 * when taking a max snapshot. The buffers themselves are
971 * protected by per_cpu spinlocks. But the action of the swap
972 * needs its own lock.
974 * This is defined as a arch_spinlock_t in order to help
975 * with performance when lockdep debugging is enabled.
977 * It is also used in other places outside the update_max_tr
978 * so it needs to be defined outside of the
979 * CONFIG_TRACER_MAX_TRACE.
981 static arch_spinlock_t ftrace_max_lock =
982 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
984 unsigned long __read_mostly tracing_thresh;
986 #ifdef CONFIG_TRACER_MAX_TRACE
987 unsigned long __read_mostly tracing_max_latency;
990 * Copy the new maximum trace into the separate maximum-trace
991 * structure. (this way the maximum trace is permanently saved,
992 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
994 static void
995 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
997 struct trace_buffer *trace_buf = &tr->trace_buffer;
998 struct trace_buffer *max_buf = &tr->max_buffer;
999 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1000 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1002 max_buf->cpu = cpu;
1003 max_buf->time_start = data->preempt_timestamp;
1005 max_data->saved_latency = tracing_max_latency;
1006 max_data->critical_start = data->critical_start;
1007 max_data->critical_end = data->critical_end;
1009 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1010 max_data->pid = tsk->pid;
1012 * If tsk == current, then use current_uid(), as that does not use
1013 * RCU. The irq tracer can be called out of RCU scope.
1015 if (tsk == current)
1016 max_data->uid = current_uid();
1017 else
1018 max_data->uid = task_uid(tsk);
1020 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1021 max_data->policy = tsk->policy;
1022 max_data->rt_priority = tsk->rt_priority;
1024 /* record this tasks comm */
1025 tracing_record_cmdline(tsk);
1029 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1030 * @tr: tracer
1031 * @tsk: the task with the latency
1032 * @cpu: The cpu that initiated the trace.
1034 * Flip the buffers between the @tr and the max_tr and record information
1035 * about which task was the cause of this latency.
1037 void
1038 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1040 struct ring_buffer *buf;
1042 if (tr->stop_count)
1043 return;
1045 WARN_ON_ONCE(!irqs_disabled());
1047 if (!tr->allocated_snapshot) {
1048 /* Only the nop tracer should hit this when disabling */
1049 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1050 return;
1053 arch_spin_lock(&ftrace_max_lock);
1055 buf = tr->trace_buffer.buffer;
1056 tr->trace_buffer.buffer = tr->max_buffer.buffer;
1057 tr->max_buffer.buffer = buf;
1059 __update_max_tr(tr, tsk, cpu);
1060 arch_spin_unlock(&ftrace_max_lock);
1064 * update_max_tr_single - only copy one trace over, and reset the rest
1065 * @tr - tracer
1066 * @tsk - task with the latency
1067 * @cpu - the cpu of the buffer to copy.
1069 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1071 void
1072 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1074 int ret;
1076 if (tr->stop_count)
1077 return;
1079 WARN_ON_ONCE(!irqs_disabled());
1080 if (!tr->allocated_snapshot) {
1081 /* Only the nop tracer should hit this when disabling */
1082 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1083 return;
1086 arch_spin_lock(&ftrace_max_lock);
1088 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1090 if (ret == -EBUSY) {
1092 * We failed to swap the buffer due to a commit taking
1093 * place on this CPU. We fail to record, but we reset
1094 * the max trace buffer (no one writes directly to it)
1095 * and flag that it failed.
1097 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1098 "Failed to swap buffers due to commit in progress\n");
1101 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1103 __update_max_tr(tr, tsk, cpu);
1104 arch_spin_unlock(&ftrace_max_lock);
1106 #endif /* CONFIG_TRACER_MAX_TRACE */
1108 static int default_wait_pipe(struct trace_iterator *iter)
1110 /* Iterators are static, they should be filled or empty */
1111 if (trace_buffer_iter(iter, iter->cpu_file))
1112 return 0;
1114 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1117 #ifdef CONFIG_FTRACE_STARTUP_TEST
1118 static int run_tracer_selftest(struct tracer *type)
1120 struct trace_array *tr = &global_trace;
1121 struct tracer *saved_tracer = tr->current_trace;
1122 int ret;
1124 if (!type->selftest || tracing_selftest_disabled)
1125 return 0;
1128 * Run a selftest on this tracer.
1129 * Here we reset the trace buffer, and set the current
1130 * tracer to be this tracer. The tracer can then run some
1131 * internal tracing to verify that everything is in order.
1132 * If we fail, we do not register this tracer.
1134 tracing_reset_online_cpus(&tr->trace_buffer);
1136 tr->current_trace = type;
1138 #ifdef CONFIG_TRACER_MAX_TRACE
1139 if (type->use_max_tr) {
1140 /* If we expanded the buffers, make sure the max is expanded too */
1141 if (ring_buffer_expanded)
1142 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1143 RING_BUFFER_ALL_CPUS);
1144 tr->allocated_snapshot = true;
1146 #endif
1148 /* the test is responsible for initializing and enabling */
1149 pr_info("Testing tracer %s: ", type->name);
1150 ret = type->selftest(type, tr);
1151 /* the test is responsible for resetting too */
1152 tr->current_trace = saved_tracer;
1153 if (ret) {
1154 printk(KERN_CONT "FAILED!\n");
1155 /* Add the warning after printing 'FAILED' */
1156 WARN_ON(1);
1157 return -1;
1159 /* Only reset on passing, to avoid touching corrupted buffers */
1160 tracing_reset_online_cpus(&tr->trace_buffer);
1162 #ifdef CONFIG_TRACER_MAX_TRACE
1163 if (type->use_max_tr) {
1164 tr->allocated_snapshot = false;
1166 /* Shrink the max buffer again */
1167 if (ring_buffer_expanded)
1168 ring_buffer_resize(tr->max_buffer.buffer, 1,
1169 RING_BUFFER_ALL_CPUS);
1171 #endif
1173 printk(KERN_CONT "PASSED\n");
1174 return 0;
1176 #else
1177 static inline int run_tracer_selftest(struct tracer *type)
1179 return 0;
1181 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1184 * register_tracer - register a tracer with the ftrace system.
1185 * @type - the plugin for the tracer
1187 * Register a new plugin tracer.
1189 int register_tracer(struct tracer *type)
1191 struct tracer *t;
1192 int ret = 0;
1194 if (!type->name) {
1195 pr_info("Tracer must have a name\n");
1196 return -1;
1199 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1200 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1201 return -1;
1204 mutex_lock(&trace_types_lock);
1206 tracing_selftest_running = true;
1208 for (t = trace_types; t; t = t->next) {
1209 if (strcmp(type->name, t->name) == 0) {
1210 /* already found */
1211 pr_info("Tracer %s already registered\n",
1212 type->name);
1213 ret = -1;
1214 goto out;
1218 if (!type->set_flag)
1219 type->set_flag = &dummy_set_flag;
1220 if (!type->flags)
1221 type->flags = &dummy_tracer_flags;
1222 else
1223 if (!type->flags->opts)
1224 type->flags->opts = dummy_tracer_opt;
1225 if (!type->wait_pipe)
1226 type->wait_pipe = default_wait_pipe;
1228 ret = run_tracer_selftest(type);
1229 if (ret < 0)
1230 goto out;
1232 type->next = trace_types;
1233 trace_types = type;
1235 out:
1236 tracing_selftest_running = false;
1237 mutex_unlock(&trace_types_lock);
1239 if (ret || !default_bootup_tracer)
1240 goto out_unlock;
1242 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1243 goto out_unlock;
1245 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1246 /* Do we want this tracer to start on bootup? */
1247 tracing_set_tracer(type->name);
1248 default_bootup_tracer = NULL;
1249 /* disable other selftests, since this will break it. */
1250 tracing_selftest_disabled = true;
1251 #ifdef CONFIG_FTRACE_STARTUP_TEST
1252 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1253 type->name);
1254 #endif
1256 out_unlock:
1257 return ret;
1260 void tracing_reset(struct trace_buffer *buf, int cpu)
1262 struct ring_buffer *buffer = buf->buffer;
1264 if (!buffer)
1265 return;
1267 ring_buffer_record_disable(buffer);
1269 /* Make sure all commits have finished */
1270 synchronize_sched();
1271 ring_buffer_reset_cpu(buffer, cpu);
1273 ring_buffer_record_enable(buffer);
1276 void tracing_reset_online_cpus(struct trace_buffer *buf)
1278 struct ring_buffer *buffer = buf->buffer;
1279 int cpu;
1281 if (!buffer)
1282 return;
1284 ring_buffer_record_disable(buffer);
1286 /* Make sure all commits have finished */
1287 synchronize_sched();
1289 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1291 for_each_online_cpu(cpu)
1292 ring_buffer_reset_cpu(buffer, cpu);
1294 ring_buffer_record_enable(buffer);
1297 /* Must have trace_types_lock held */
1298 void tracing_reset_all_online_cpus(void)
1300 struct trace_array *tr;
1302 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1303 tracing_reset_online_cpus(&tr->trace_buffer);
1304 #ifdef CONFIG_TRACER_MAX_TRACE
1305 tracing_reset_online_cpus(&tr->max_buffer);
1306 #endif
1310 #define SAVED_CMDLINES 128
1311 #define NO_CMDLINE_MAP UINT_MAX
1312 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1313 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
1314 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
1315 static int cmdline_idx;
1316 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1318 /* temporary disable recording */
1319 static atomic_t trace_record_cmdline_disabled __read_mostly;
1321 static void trace_init_cmdlines(void)
1323 memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
1324 memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
1325 cmdline_idx = 0;
1328 int is_tracing_stopped(void)
1330 return global_trace.stop_count;
1334 * tracing_start - quick start of the tracer
1336 * If tracing is enabled but was stopped by tracing_stop,
1337 * this will start the tracer back up.
1339 void tracing_start(void)
1341 struct ring_buffer *buffer;
1342 unsigned long flags;
1344 if (tracing_disabled)
1345 return;
1347 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1348 if (--global_trace.stop_count) {
1349 if (global_trace.stop_count < 0) {
1350 /* Someone screwed up their debugging */
1351 WARN_ON_ONCE(1);
1352 global_trace.stop_count = 0;
1354 goto out;
1357 /* Prevent the buffers from switching */
1358 arch_spin_lock(&ftrace_max_lock);
1360 buffer = global_trace.trace_buffer.buffer;
1361 if (buffer)
1362 ring_buffer_record_enable(buffer);
1364 #ifdef CONFIG_TRACER_MAX_TRACE
1365 buffer = global_trace.max_buffer.buffer;
1366 if (buffer)
1367 ring_buffer_record_enable(buffer);
1368 #endif
1370 arch_spin_unlock(&ftrace_max_lock);
1372 out:
1373 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1376 static void tracing_start_tr(struct trace_array *tr)
1378 struct ring_buffer *buffer;
1379 unsigned long flags;
1381 if (tracing_disabled)
1382 return;
1384 /* If global, we need to also start the max tracer */
1385 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1386 return tracing_start();
1388 raw_spin_lock_irqsave(&tr->start_lock, flags);
1390 if (--tr->stop_count) {
1391 if (tr->stop_count < 0) {
1392 /* Someone screwed up their debugging */
1393 WARN_ON_ONCE(1);
1394 tr->stop_count = 0;
1396 goto out;
1399 buffer = tr->trace_buffer.buffer;
1400 if (buffer)
1401 ring_buffer_record_enable(buffer);
1403 out:
1404 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1408 * tracing_stop - quick stop of the tracer
1410 * Light weight way to stop tracing. Use in conjunction with
1411 * tracing_start.
1413 void tracing_stop(void)
1415 struct ring_buffer *buffer;
1416 unsigned long flags;
1418 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1419 if (global_trace.stop_count++)
1420 goto out;
1422 /* Prevent the buffers from switching */
1423 arch_spin_lock(&ftrace_max_lock);
1425 buffer = global_trace.trace_buffer.buffer;
1426 if (buffer)
1427 ring_buffer_record_disable(buffer);
1429 #ifdef CONFIG_TRACER_MAX_TRACE
1430 buffer = global_trace.max_buffer.buffer;
1431 if (buffer)
1432 ring_buffer_record_disable(buffer);
1433 #endif
1435 arch_spin_unlock(&ftrace_max_lock);
1437 out:
1438 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1441 static void tracing_stop_tr(struct trace_array *tr)
1443 struct ring_buffer *buffer;
1444 unsigned long flags;
1446 /* If global, we need to also stop the max tracer */
1447 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1448 return tracing_stop();
1450 raw_spin_lock_irqsave(&tr->start_lock, flags);
1451 if (tr->stop_count++)
1452 goto out;
1454 buffer = tr->trace_buffer.buffer;
1455 if (buffer)
1456 ring_buffer_record_disable(buffer);
1458 out:
1459 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1462 void trace_stop_cmdline_recording(void);
1464 static int trace_save_cmdline(struct task_struct *tsk)
1466 unsigned pid, idx;
1468 if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1469 return 0;
1472 * It's not the end of the world if we don't get
1473 * the lock, but we also don't want to spin
1474 * nor do we want to disable interrupts,
1475 * so if we miss here, then better luck next time.
1477 if (!arch_spin_trylock(&trace_cmdline_lock))
1478 return 0;
1480 idx = map_pid_to_cmdline[tsk->pid];
1481 if (idx == NO_CMDLINE_MAP) {
1482 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1485 * Check whether the cmdline buffer at idx has a pid
1486 * mapped. We are going to overwrite that entry so we
1487 * need to clear the map_pid_to_cmdline. Otherwise we
1488 * would read the new comm for the old pid.
1490 pid = map_cmdline_to_pid[idx];
1491 if (pid != NO_CMDLINE_MAP)
1492 map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1494 map_cmdline_to_pid[idx] = tsk->pid;
1495 map_pid_to_cmdline[tsk->pid] = idx;
1497 cmdline_idx = idx;
1500 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1502 arch_spin_unlock(&trace_cmdline_lock);
1504 return 1;
1507 void trace_find_cmdline(int pid, char comm[])
1509 unsigned map;
1511 if (!pid) {
1512 strcpy(comm, "<idle>");
1513 return;
1516 if (WARN_ON_ONCE(pid < 0)) {
1517 strcpy(comm, "<XXX>");
1518 return;
1521 if (pid > PID_MAX_DEFAULT) {
1522 strcpy(comm, "<...>");
1523 return;
1526 preempt_disable();
1527 arch_spin_lock(&trace_cmdline_lock);
1528 map = map_pid_to_cmdline[pid];
1529 if (map != NO_CMDLINE_MAP)
1530 strcpy(comm, saved_cmdlines[map]);
1531 else
1532 strcpy(comm, "<...>");
1534 arch_spin_unlock(&trace_cmdline_lock);
1535 preempt_enable();
1538 void tracing_record_cmdline(struct task_struct *tsk)
1540 if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1541 return;
1543 if (!__this_cpu_read(trace_cmdline_save))
1544 return;
1546 if (trace_save_cmdline(tsk))
1547 __this_cpu_write(trace_cmdline_save, false);
1550 void
1551 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1552 int pc)
1554 struct task_struct *tsk = current;
1556 entry->preempt_count = pc & 0xff;
1557 entry->pid = (tsk) ? tsk->pid : 0;
1558 entry->flags =
1559 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1560 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1561 #else
1562 TRACE_FLAG_IRQS_NOSUPPORT |
1563 #endif
1564 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1565 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1566 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1567 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1569 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1571 struct ring_buffer_event *
1572 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1573 int type,
1574 unsigned long len,
1575 unsigned long flags, int pc)
1577 struct ring_buffer_event *event;
1579 event = ring_buffer_lock_reserve(buffer, len);
1580 if (event != NULL) {
1581 struct trace_entry *ent = ring_buffer_event_data(event);
1583 tracing_generic_entry_update(ent, flags, pc);
1584 ent->type = type;
1587 return event;
1590 void
1591 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1593 __this_cpu_write(trace_cmdline_save, true);
1594 ring_buffer_unlock_commit(buffer, event);
1597 static inline void
1598 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1599 struct ring_buffer_event *event,
1600 unsigned long flags, int pc)
1602 __buffer_unlock_commit(buffer, event);
1604 ftrace_trace_stack(buffer, flags, 6, pc);
1605 ftrace_trace_userstack(buffer, flags, pc);
1608 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1609 struct ring_buffer_event *event,
1610 unsigned long flags, int pc)
1612 __trace_buffer_unlock_commit(buffer, event, flags, pc);
1614 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1616 static struct ring_buffer *temp_buffer;
1618 struct ring_buffer_event *
1619 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1620 struct ftrace_event_file *ftrace_file,
1621 int type, unsigned long len,
1622 unsigned long flags, int pc)
1624 struct ring_buffer_event *entry;
1626 *current_rb = ftrace_file->tr->trace_buffer.buffer;
1627 entry = trace_buffer_lock_reserve(*current_rb,
1628 type, len, flags, pc);
1630 * If tracing is off, but we have triggers enabled
1631 * we still need to look at the event data. Use the temp_buffer
1632 * to store the trace event for the tigger to use. It's recusive
1633 * safe and will not be recorded anywhere.
1635 if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1636 *current_rb = temp_buffer;
1637 entry = trace_buffer_lock_reserve(*current_rb,
1638 type, len, flags, pc);
1640 return entry;
1642 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1644 struct ring_buffer_event *
1645 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1646 int type, unsigned long len,
1647 unsigned long flags, int pc)
1649 *current_rb = global_trace.trace_buffer.buffer;
1650 return trace_buffer_lock_reserve(*current_rb,
1651 type, len, flags, pc);
1653 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1655 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1656 struct ring_buffer_event *event,
1657 unsigned long flags, int pc)
1659 __trace_buffer_unlock_commit(buffer, event, flags, pc);
1661 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1663 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1664 struct ring_buffer_event *event,
1665 unsigned long flags, int pc,
1666 struct pt_regs *regs)
1668 __buffer_unlock_commit(buffer, event);
1670 ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1671 ftrace_trace_userstack(buffer, flags, pc);
1673 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1675 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1676 struct ring_buffer_event *event)
1678 ring_buffer_discard_commit(buffer, event);
1680 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1682 void
1683 trace_function(struct trace_array *tr,
1684 unsigned long ip, unsigned long parent_ip, unsigned long flags,
1685 int pc)
1687 struct ftrace_event_call *call = &event_function;
1688 struct ring_buffer *buffer = tr->trace_buffer.buffer;
1689 struct ring_buffer_event *event;
1690 struct ftrace_entry *entry;
1692 /* If we are reading the ring buffer, don't trace */
1693 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1694 return;
1696 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1697 flags, pc);
1698 if (!event)
1699 return;
1700 entry = ring_buffer_event_data(event);
1701 entry->ip = ip;
1702 entry->parent_ip = parent_ip;
1704 if (!call_filter_check_discard(call, entry, buffer, event))
1705 __buffer_unlock_commit(buffer, event);
1708 #ifdef CONFIG_STACKTRACE
1710 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1711 struct ftrace_stack {
1712 unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
1715 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1716 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1718 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1719 unsigned long flags,
1720 int skip, int pc, struct pt_regs *regs)
1722 struct ftrace_event_call *call = &event_kernel_stack;
1723 struct ring_buffer_event *event;
1724 struct stack_entry *entry;
1725 struct stack_trace trace;
1726 int use_stack;
1727 int size = FTRACE_STACK_ENTRIES;
1729 trace.nr_entries = 0;
1730 trace.skip = skip;
1733 * Since events can happen in NMIs there's no safe way to
1734 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1735 * or NMI comes in, it will just have to use the default
1736 * FTRACE_STACK_SIZE.
1738 preempt_disable_notrace();
1740 use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1742 * We don't need any atomic variables, just a barrier.
1743 * If an interrupt comes in, we don't care, because it would
1744 * have exited and put the counter back to what we want.
1745 * We just need a barrier to keep gcc from moving things
1746 * around.
1748 barrier();
1749 if (use_stack == 1) {
1750 trace.entries = &__get_cpu_var(ftrace_stack).calls[0];
1751 trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
1753 if (regs)
1754 save_stack_trace_regs(regs, &trace);
1755 else
1756 save_stack_trace(&trace);
1758 if (trace.nr_entries > size)
1759 size = trace.nr_entries;
1760 } else
1761 /* From now on, use_stack is a boolean */
1762 use_stack = 0;
1764 size *= sizeof(unsigned long);
1766 event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1767 sizeof(*entry) + size, flags, pc);
1768 if (!event)
1769 goto out;
1770 entry = ring_buffer_event_data(event);
1772 memset(&entry->caller, 0, size);
1774 if (use_stack)
1775 memcpy(&entry->caller, trace.entries,
1776 trace.nr_entries * sizeof(unsigned long));
1777 else {
1778 trace.max_entries = FTRACE_STACK_ENTRIES;
1779 trace.entries = entry->caller;
1780 if (regs)
1781 save_stack_trace_regs(regs, &trace);
1782 else
1783 save_stack_trace(&trace);
1786 entry->size = trace.nr_entries;
1788 if (!call_filter_check_discard(call, entry, buffer, event))
1789 __buffer_unlock_commit(buffer, event);
1791 out:
1792 /* Again, don't let gcc optimize things here */
1793 barrier();
1794 __this_cpu_dec(ftrace_stack_reserve);
1795 preempt_enable_notrace();
1799 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1800 int skip, int pc, struct pt_regs *regs)
1802 if (!(trace_flags & TRACE_ITER_STACKTRACE))
1803 return;
1805 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1808 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1809 int skip, int pc)
1811 if (!(trace_flags & TRACE_ITER_STACKTRACE))
1812 return;
1814 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1817 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1818 int pc)
1820 __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1824 * trace_dump_stack - record a stack back trace in the trace buffer
1825 * @skip: Number of functions to skip (helper handlers)
1827 void trace_dump_stack(int skip)
1829 unsigned long flags;
1831 if (tracing_disabled || tracing_selftest_running)
1832 return;
1834 local_save_flags(flags);
1837 * Skip 3 more, seems to get us at the caller of
1838 * this function.
1840 skip += 3;
1841 __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1842 flags, skip, preempt_count(), NULL);
1845 static DEFINE_PER_CPU(int, user_stack_count);
1847 void
1848 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1850 struct ftrace_event_call *call = &event_user_stack;
1851 struct ring_buffer_event *event;
1852 struct userstack_entry *entry;
1853 struct stack_trace trace;
1855 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1856 return;
1859 * NMIs can not handle page faults, even with fix ups.
1860 * The save user stack can (and often does) fault.
1862 if (unlikely(in_nmi()))
1863 return;
1866 * prevent recursion, since the user stack tracing may
1867 * trigger other kernel events.
1869 preempt_disable();
1870 if (__this_cpu_read(user_stack_count))
1871 goto out;
1873 __this_cpu_inc(user_stack_count);
1875 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1876 sizeof(*entry), flags, pc);
1877 if (!event)
1878 goto out_drop_count;
1879 entry = ring_buffer_event_data(event);
1881 entry->tgid = current->tgid;
1882 memset(&entry->caller, 0, sizeof(entry->caller));
1884 trace.nr_entries = 0;
1885 trace.max_entries = FTRACE_STACK_ENTRIES;
1886 trace.skip = 0;
1887 trace.entries = entry->caller;
1889 save_stack_trace_user(&trace);
1890 if (!call_filter_check_discard(call, entry, buffer, event))
1891 __buffer_unlock_commit(buffer, event);
1893 out_drop_count:
1894 __this_cpu_dec(user_stack_count);
1895 out:
1896 preempt_enable();
1899 #ifdef UNUSED
1900 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1902 ftrace_trace_userstack(tr, flags, preempt_count());
1904 #endif /* UNUSED */
1906 #endif /* CONFIG_STACKTRACE */
1908 /* created for use with alloc_percpu */
1909 struct trace_buffer_struct {
1910 char buffer[TRACE_BUF_SIZE];
1913 static struct trace_buffer_struct *trace_percpu_buffer;
1914 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1915 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1916 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1919 * The buffer used is dependent on the context. There is a per cpu
1920 * buffer for normal context, softirq contex, hard irq context and
1921 * for NMI context. Thise allows for lockless recording.
1923 * Note, if the buffers failed to be allocated, then this returns NULL
1925 static char *get_trace_buf(void)
1927 struct trace_buffer_struct *percpu_buffer;
1930 * If we have allocated per cpu buffers, then we do not
1931 * need to do any locking.
1933 if (in_nmi())
1934 percpu_buffer = trace_percpu_nmi_buffer;
1935 else if (in_irq())
1936 percpu_buffer = trace_percpu_irq_buffer;
1937 else if (in_softirq())
1938 percpu_buffer = trace_percpu_sirq_buffer;
1939 else
1940 percpu_buffer = trace_percpu_buffer;
1942 if (!percpu_buffer)
1943 return NULL;
1945 return this_cpu_ptr(&percpu_buffer->buffer[0]);
1948 static int alloc_percpu_trace_buffer(void)
1950 struct trace_buffer_struct *buffers;
1951 struct trace_buffer_struct *sirq_buffers;
1952 struct trace_buffer_struct *irq_buffers;
1953 struct trace_buffer_struct *nmi_buffers;
1955 buffers = alloc_percpu(struct trace_buffer_struct);
1956 if (!buffers)
1957 goto err_warn;
1959 sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1960 if (!sirq_buffers)
1961 goto err_sirq;
1963 irq_buffers = alloc_percpu(struct trace_buffer_struct);
1964 if (!irq_buffers)
1965 goto err_irq;
1967 nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1968 if (!nmi_buffers)
1969 goto err_nmi;
1971 trace_percpu_buffer = buffers;
1972 trace_percpu_sirq_buffer = sirq_buffers;
1973 trace_percpu_irq_buffer = irq_buffers;
1974 trace_percpu_nmi_buffer = nmi_buffers;
1976 return 0;
1978 err_nmi:
1979 free_percpu(irq_buffers);
1980 err_irq:
1981 free_percpu(sirq_buffers);
1982 err_sirq:
1983 free_percpu(buffers);
1984 err_warn:
1985 WARN(1, "Could not allocate percpu trace_printk buffer");
1986 return -ENOMEM;
1989 static int buffers_allocated;
1991 void trace_printk_init_buffers(void)
1993 if (buffers_allocated)
1994 return;
1996 if (alloc_percpu_trace_buffer())
1997 return;
1999 pr_info("ftrace: Allocated trace_printk buffers\n");
2001 /* Expand the buffers to set size */
2002 tracing_update_buffers();
2004 buffers_allocated = 1;
2007 * trace_printk_init_buffers() can be called by modules.
2008 * If that happens, then we need to start cmdline recording
2009 * directly here. If the global_trace.buffer is already
2010 * allocated here, then this was called by module code.
2012 if (global_trace.trace_buffer.buffer)
2013 tracing_start_cmdline_record();
2016 void trace_printk_start_comm(void)
2018 /* Start tracing comms if trace printk is set */
2019 if (!buffers_allocated)
2020 return;
2021 tracing_start_cmdline_record();
2024 static void trace_printk_start_stop_comm(int enabled)
2026 if (!buffers_allocated)
2027 return;
2029 if (enabled)
2030 tracing_start_cmdline_record();
2031 else
2032 tracing_stop_cmdline_record();
2036 * trace_vbprintk - write binary msg to tracing buffer
2039 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2041 struct ftrace_event_call *call = &event_bprint;
2042 struct ring_buffer_event *event;
2043 struct ring_buffer *buffer;
2044 struct trace_array *tr = &global_trace;
2045 struct bprint_entry *entry;
2046 unsigned long flags;
2047 char *tbuffer;
2048 int len = 0, size, pc;
2050 if (unlikely(tracing_selftest_running || tracing_disabled))
2051 return 0;
2053 /* Don't pollute graph traces with trace_vprintk internals */
2054 pause_graph_tracing();
2056 pc = preempt_count();
2057 preempt_disable_notrace();
2059 tbuffer = get_trace_buf();
2060 if (!tbuffer) {
2061 len = 0;
2062 goto out;
2065 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2067 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2068 goto out;
2070 local_save_flags(flags);
2071 size = sizeof(*entry) + sizeof(u32) * len;
2072 buffer = tr->trace_buffer.buffer;
2073 event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2074 flags, pc);
2075 if (!event)
2076 goto out;
2077 entry = ring_buffer_event_data(event);
2078 entry->ip = ip;
2079 entry->fmt = fmt;
2081 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2082 if (!call_filter_check_discard(call, entry, buffer, event)) {
2083 __buffer_unlock_commit(buffer, event);
2084 ftrace_trace_stack(buffer, flags, 6, pc);
2087 out:
2088 preempt_enable_notrace();
2089 unpause_graph_tracing();
2091 return len;
2093 EXPORT_SYMBOL_GPL(trace_vbprintk);
2095 static int
2096 __trace_array_vprintk(struct ring_buffer *buffer,
2097 unsigned long ip, const char *fmt, va_list args)
2099 struct ftrace_event_call *call = &event_print;
2100 struct ring_buffer_event *event;
2101 int len = 0, size, pc;
2102 struct print_entry *entry;
2103 unsigned long flags;
2104 char *tbuffer;
2106 if (tracing_disabled || tracing_selftest_running)
2107 return 0;
2109 /* Don't pollute graph traces with trace_vprintk internals */
2110 pause_graph_tracing();
2112 pc = preempt_count();
2113 preempt_disable_notrace();
2116 tbuffer = get_trace_buf();
2117 if (!tbuffer) {
2118 len = 0;
2119 goto out;
2122 len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2123 if (len > TRACE_BUF_SIZE)
2124 goto out;
2126 local_save_flags(flags);
2127 size = sizeof(*entry) + len + 1;
2128 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2129 flags, pc);
2130 if (!event)
2131 goto out;
2132 entry = ring_buffer_event_data(event);
2133 entry->ip = ip;
2135 memcpy(&entry->buf, tbuffer, len);
2136 entry->buf[len] = '\0';
2137 if (!call_filter_check_discard(call, entry, buffer, event)) {
2138 __buffer_unlock_commit(buffer, event);
2139 ftrace_trace_stack(buffer, flags, 6, pc);
2141 out:
2142 preempt_enable_notrace();
2143 unpause_graph_tracing();
2145 return len;
2148 int trace_array_vprintk(struct trace_array *tr,
2149 unsigned long ip, const char *fmt, va_list args)
2151 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2154 int trace_array_printk(struct trace_array *tr,
2155 unsigned long ip, const char *fmt, ...)
2157 int ret;
2158 va_list ap;
2160 if (!(trace_flags & TRACE_ITER_PRINTK))
2161 return 0;
2163 va_start(ap, fmt);
2164 ret = trace_array_vprintk(tr, ip, fmt, ap);
2165 va_end(ap);
2166 return ret;
2169 int trace_array_printk_buf(struct ring_buffer *buffer,
2170 unsigned long ip, const char *fmt, ...)
2172 int ret;
2173 va_list ap;
2175 if (!(trace_flags & TRACE_ITER_PRINTK))
2176 return 0;
2178 va_start(ap, fmt);
2179 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2180 va_end(ap);
2181 return ret;
2184 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2186 return trace_array_vprintk(&global_trace, ip, fmt, args);
2188 EXPORT_SYMBOL_GPL(trace_vprintk);
2190 static void trace_iterator_increment(struct trace_iterator *iter)
2192 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2194 iter->idx++;
2195 if (buf_iter)
2196 ring_buffer_read(buf_iter, NULL);
2199 static struct trace_entry *
2200 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2201 unsigned long *lost_events)
2203 struct ring_buffer_event *event;
2204 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2206 if (buf_iter)
2207 event = ring_buffer_iter_peek(buf_iter, ts);
2208 else
2209 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2210 lost_events);
2212 if (event) {
2213 iter->ent_size = ring_buffer_event_length(event);
2214 return ring_buffer_event_data(event);
2216 iter->ent_size = 0;
2217 return NULL;
2220 static struct trace_entry *
2221 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2222 unsigned long *missing_events, u64 *ent_ts)
2224 struct ring_buffer *buffer = iter->trace_buffer->buffer;
2225 struct trace_entry *ent, *next = NULL;
2226 unsigned long lost_events = 0, next_lost = 0;
2227 int cpu_file = iter->cpu_file;
2228 u64 next_ts = 0, ts;
2229 int next_cpu = -1;
2230 int next_size = 0;
2231 int cpu;
2234 * If we are in a per_cpu trace file, don't bother by iterating over
2235 * all cpu and peek directly.
2237 if (cpu_file > RING_BUFFER_ALL_CPUS) {
2238 if (ring_buffer_empty_cpu(buffer, cpu_file))
2239 return NULL;
2240 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2241 if (ent_cpu)
2242 *ent_cpu = cpu_file;
2244 return ent;
2247 for_each_tracing_cpu(cpu) {
2249 if (ring_buffer_empty_cpu(buffer, cpu))
2250 continue;
2252 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2255 * Pick the entry with the smallest timestamp:
2257 if (ent && (!next || ts < next_ts)) {
2258 next = ent;
2259 next_cpu = cpu;
2260 next_ts = ts;
2261 next_lost = lost_events;
2262 next_size = iter->ent_size;
2266 iter->ent_size = next_size;
2268 if (ent_cpu)
2269 *ent_cpu = next_cpu;
2271 if (ent_ts)
2272 *ent_ts = next_ts;
2274 if (missing_events)
2275 *missing_events = next_lost;
2277 return next;
2280 /* Find the next real entry, without updating the iterator itself */
2281 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2282 int *ent_cpu, u64 *ent_ts)
2284 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2287 /* Find the next real entry, and increment the iterator to the next entry */
2288 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2290 iter->ent = __find_next_entry(iter, &iter->cpu,
2291 &iter->lost_events, &iter->ts);
2293 if (iter->ent)
2294 trace_iterator_increment(iter);
2296 return iter->ent ? iter : NULL;
2299 static void trace_consume(struct trace_iterator *iter)
2301 ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2302 &iter->lost_events);
2305 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2307 struct trace_iterator *iter = m->private;
2308 int i = (int)*pos;
2309 void *ent;
2311 WARN_ON_ONCE(iter->leftover);
2313 (*pos)++;
2315 /* can't go backwards */
2316 if (iter->idx > i)
2317 return NULL;
2319 if (iter->idx < 0)
2320 ent = trace_find_next_entry_inc(iter);
2321 else
2322 ent = iter;
2324 while (ent && iter->idx < i)
2325 ent = trace_find_next_entry_inc(iter);
2327 iter->pos = *pos;
2329 return ent;
2332 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2334 struct ring_buffer_event *event;
2335 struct ring_buffer_iter *buf_iter;
2336 unsigned long entries = 0;
2337 u64 ts;
2339 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2341 buf_iter = trace_buffer_iter(iter, cpu);
2342 if (!buf_iter)
2343 return;
2345 ring_buffer_iter_reset(buf_iter);
2348 * We could have the case with the max latency tracers
2349 * that a reset never took place on a cpu. This is evident
2350 * by the timestamp being before the start of the buffer.
2352 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2353 if (ts >= iter->trace_buffer->time_start)
2354 break;
2355 entries++;
2356 ring_buffer_read(buf_iter, NULL);
2359 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2363 * The current tracer is copied to avoid a global locking
2364 * all around.
2366 static void *s_start(struct seq_file *m, loff_t *pos)
2368 struct trace_iterator *iter = m->private;
2369 struct trace_array *tr = iter->tr;
2370 int cpu_file = iter->cpu_file;
2371 void *p = NULL;
2372 loff_t l = 0;
2373 int cpu;
2376 * copy the tracer to avoid using a global lock all around.
2377 * iter->trace is a copy of current_trace, the pointer to the
2378 * name may be used instead of a strcmp(), as iter->trace->name
2379 * will point to the same string as current_trace->name.
2381 mutex_lock(&trace_types_lock);
2382 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2383 *iter->trace = *tr->current_trace;
2384 mutex_unlock(&trace_types_lock);
2386 #ifdef CONFIG_TRACER_MAX_TRACE
2387 if (iter->snapshot && iter->trace->use_max_tr)
2388 return ERR_PTR(-EBUSY);
2389 #endif
2391 if (!iter->snapshot)
2392 atomic_inc(&trace_record_cmdline_disabled);
2394 if (*pos != iter->pos) {
2395 iter->ent = NULL;
2396 iter->cpu = 0;
2397 iter->idx = -1;
2399 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2400 for_each_tracing_cpu(cpu)
2401 tracing_iter_reset(iter, cpu);
2402 } else
2403 tracing_iter_reset(iter, cpu_file);
2405 iter->leftover = 0;
2406 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2409 } else {
2411 * If we overflowed the seq_file before, then we want
2412 * to just reuse the trace_seq buffer again.
2414 if (iter->leftover)
2415 p = iter;
2416 else {
2417 l = *pos - 1;
2418 p = s_next(m, p, &l);
2422 trace_event_read_lock();
2423 trace_access_lock(cpu_file);
2424 return p;
2427 static void s_stop(struct seq_file *m, void *p)
2429 struct trace_iterator *iter = m->private;
2431 #ifdef CONFIG_TRACER_MAX_TRACE
2432 if (iter->snapshot && iter->trace->use_max_tr)
2433 return;
2434 #endif
2436 if (!iter->snapshot)
2437 atomic_dec(&trace_record_cmdline_disabled);
2439 trace_access_unlock(iter->cpu_file);
2440 trace_event_read_unlock();
2443 static void
2444 get_total_entries(struct trace_buffer *buf,
2445 unsigned long *total, unsigned long *entries)
2447 unsigned long count;
2448 int cpu;
2450 *total = 0;
2451 *entries = 0;
2453 for_each_tracing_cpu(cpu) {
2454 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2456 * If this buffer has skipped entries, then we hold all
2457 * entries for the trace and we need to ignore the
2458 * ones before the time stamp.
2460 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2461 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2462 /* total is the same as the entries */
2463 *total += count;
2464 } else
2465 *total += count +
2466 ring_buffer_overrun_cpu(buf->buffer, cpu);
2467 *entries += count;
2471 static void print_lat_help_header(struct seq_file *m)
2473 seq_puts(m, "# _------=> CPU# \n");
2474 seq_puts(m, "# / _-----=> irqs-off \n");
2475 seq_puts(m, "# | / _----=> need-resched \n");
2476 seq_puts(m, "# || / _---=> hardirq/softirq \n");
2477 seq_puts(m, "# ||| / _--=> preempt-depth \n");
2478 seq_puts(m, "# |||| / delay \n");
2479 seq_puts(m, "# cmd pid ||||| time | caller \n");
2480 seq_puts(m, "# \\ / ||||| \\ | / \n");
2483 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2485 unsigned long total;
2486 unsigned long entries;
2488 get_total_entries(buf, &total, &entries);
2489 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
2490 entries, total, num_online_cpus());
2491 seq_puts(m, "#\n");
2494 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2496 print_event_info(buf, m);
2497 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
2498 seq_puts(m, "# | | | | |\n");
2501 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2503 print_event_info(buf, m);
2504 seq_puts(m, "# _-----=> irqs-off\n");
2505 seq_puts(m, "# / _----=> need-resched\n");
2506 seq_puts(m, "# | / _---=> hardirq/softirq\n");
2507 seq_puts(m, "# || / _--=> preempt-depth\n");
2508 seq_puts(m, "# ||| / delay\n");
2509 seq_puts(m, "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n");
2510 seq_puts(m, "# | | | |||| | |\n");
2513 void
2514 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2516 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2517 struct trace_buffer *buf = iter->trace_buffer;
2518 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2519 struct tracer *type = iter->trace;
2520 unsigned long entries;
2521 unsigned long total;
2522 const char *name = "preemption";
2524 name = type->name;
2526 get_total_entries(buf, &total, &entries);
2528 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2529 name, UTS_RELEASE);
2530 seq_puts(m, "# -----------------------------------"
2531 "---------------------------------\n");
2532 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2533 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2534 nsecs_to_usecs(data->saved_latency),
2535 entries,
2536 total,
2537 buf->cpu,
2538 #if defined(CONFIG_PREEMPT_NONE)
2539 "server",
2540 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2541 "desktop",
2542 #elif defined(CONFIG_PREEMPT)
2543 "preempt",
2544 #else
2545 "unknown",
2546 #endif
2547 /* These are reserved for later use */
2548 0, 0, 0, 0);
2549 #ifdef CONFIG_SMP
2550 seq_printf(m, " #P:%d)\n", num_online_cpus());
2551 #else
2552 seq_puts(m, ")\n");
2553 #endif
2554 seq_puts(m, "# -----------------\n");
2555 seq_printf(m, "# | task: %.16s-%d "
2556 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2557 data->comm, data->pid,
2558 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2559 data->policy, data->rt_priority);
2560 seq_puts(m, "# -----------------\n");
2562 if (data->critical_start) {
2563 seq_puts(m, "# => started at: ");
2564 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2565 trace_print_seq(m, &iter->seq);
2566 seq_puts(m, "\n# => ended at: ");
2567 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2568 trace_print_seq(m, &iter->seq);
2569 seq_puts(m, "\n#\n");
2572 seq_puts(m, "#\n");
2575 static void test_cpu_buff_start(struct trace_iterator *iter)
2577 struct trace_seq *s = &iter->seq;
2579 if (!(trace_flags & TRACE_ITER_ANNOTATE))
2580 return;
2582 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2583 return;
2585 if (cpumask_test_cpu(iter->cpu, iter->started))
2586 return;
2588 if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2589 return;
2591 cpumask_set_cpu(iter->cpu, iter->started);
2593 /* Don't print started cpu buffer for the first entry of the trace */
2594 if (iter->idx > 1)
2595 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2596 iter->cpu);
2599 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2601 struct trace_seq *s = &iter->seq;
2602 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2603 struct trace_entry *entry;
2604 struct trace_event *event;
2606 entry = iter->ent;
2608 test_cpu_buff_start(iter);
2610 event = ftrace_find_event(entry->type);
2612 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2613 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2614 if (!trace_print_lat_context(iter))
2615 goto partial;
2616 } else {
2617 if (!trace_print_context(iter))
2618 goto partial;
2622 if (event)
2623 return event->funcs->trace(iter, sym_flags, event);
2625 if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2626 goto partial;
2628 return TRACE_TYPE_HANDLED;
2629 partial:
2630 return TRACE_TYPE_PARTIAL_LINE;
2633 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2635 struct trace_seq *s = &iter->seq;
2636 struct trace_entry *entry;
2637 struct trace_event *event;
2639 entry = iter->ent;
2641 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2642 if (!trace_seq_printf(s, "%d %d %llu ",
2643 entry->pid, iter->cpu, iter->ts))
2644 goto partial;
2647 event = ftrace_find_event(entry->type);
2648 if (event)
2649 return event->funcs->raw(iter, 0, event);
2651 if (!trace_seq_printf(s, "%d ?\n", entry->type))
2652 goto partial;
2654 return TRACE_TYPE_HANDLED;
2655 partial:
2656 return TRACE_TYPE_PARTIAL_LINE;
2659 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2661 struct trace_seq *s = &iter->seq;
2662 unsigned char newline = '\n';
2663 struct trace_entry *entry;
2664 struct trace_event *event;
2666 entry = iter->ent;
2668 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2669 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2670 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2671 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2674 event = ftrace_find_event(entry->type);
2675 if (event) {
2676 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2677 if (ret != TRACE_TYPE_HANDLED)
2678 return ret;
2681 SEQ_PUT_FIELD_RET(s, newline);
2683 return TRACE_TYPE_HANDLED;
2686 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2688 struct trace_seq *s = &iter->seq;
2689 struct trace_entry *entry;
2690 struct trace_event *event;
2692 entry = iter->ent;
2694 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2695 SEQ_PUT_FIELD_RET(s, entry->pid);
2696 SEQ_PUT_FIELD_RET(s, iter->cpu);
2697 SEQ_PUT_FIELD_RET(s, iter->ts);
2700 event = ftrace_find_event(entry->type);
2701 return event ? event->funcs->binary(iter, 0, event) :
2702 TRACE_TYPE_HANDLED;
2705 int trace_empty(struct trace_iterator *iter)
2707 struct ring_buffer_iter *buf_iter;
2708 int cpu;
2710 /* If we are looking at one CPU buffer, only check that one */
2711 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2712 cpu = iter->cpu_file;
2713 buf_iter = trace_buffer_iter(iter, cpu);
2714 if (buf_iter) {
2715 if (!ring_buffer_iter_empty(buf_iter))
2716 return 0;
2717 } else {
2718 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2719 return 0;
2721 return 1;
2724 for_each_tracing_cpu(cpu) {
2725 buf_iter = trace_buffer_iter(iter, cpu);
2726 if (buf_iter) {
2727 if (!ring_buffer_iter_empty(buf_iter))
2728 return 0;
2729 } else {
2730 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2731 return 0;
2735 return 1;
2738 /* Called with trace_event_read_lock() held. */
2739 enum print_line_t print_trace_line(struct trace_iterator *iter)
2741 enum print_line_t ret;
2743 if (iter->lost_events &&
2744 !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2745 iter->cpu, iter->lost_events))
2746 return TRACE_TYPE_PARTIAL_LINE;
2748 if (iter->trace && iter->trace->print_line) {
2749 ret = iter->trace->print_line(iter);
2750 if (ret != TRACE_TYPE_UNHANDLED)
2751 return ret;
2754 if (iter->ent->type == TRACE_BPUTS &&
2755 trace_flags & TRACE_ITER_PRINTK &&
2756 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2757 return trace_print_bputs_msg_only(iter);
2759 if (iter->ent->type == TRACE_BPRINT &&
2760 trace_flags & TRACE_ITER_PRINTK &&
2761 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2762 return trace_print_bprintk_msg_only(iter);
2764 if (iter->ent->type == TRACE_PRINT &&
2765 trace_flags & TRACE_ITER_PRINTK &&
2766 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2767 return trace_print_printk_msg_only(iter);
2769 if (trace_flags & TRACE_ITER_BIN)
2770 return print_bin_fmt(iter);
2772 if (trace_flags & TRACE_ITER_HEX)
2773 return print_hex_fmt(iter);
2775 if (trace_flags & TRACE_ITER_RAW)
2776 return print_raw_fmt(iter);
2778 return print_trace_fmt(iter);
2781 void trace_latency_header(struct seq_file *m)
2783 struct trace_iterator *iter = m->private;
2785 /* print nothing if the buffers are empty */
2786 if (trace_empty(iter))
2787 return;
2789 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2790 print_trace_header(m, iter);
2792 if (!(trace_flags & TRACE_ITER_VERBOSE))
2793 print_lat_help_header(m);
2796 void trace_default_header(struct seq_file *m)
2798 struct trace_iterator *iter = m->private;
2800 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2801 return;
2803 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2804 /* print nothing if the buffers are empty */
2805 if (trace_empty(iter))
2806 return;
2807 print_trace_header(m, iter);
2808 if (!(trace_flags & TRACE_ITER_VERBOSE))
2809 print_lat_help_header(m);
2810 } else {
2811 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2812 if (trace_flags & TRACE_ITER_IRQ_INFO)
2813 print_func_help_header_irq(iter->trace_buffer, m);
2814 else
2815 print_func_help_header(iter->trace_buffer, m);
2820 static void test_ftrace_alive(struct seq_file *m)
2822 if (!ftrace_is_dead())
2823 return;
2824 seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2825 seq_printf(m, "# MAY BE MISSING FUNCTION EVENTS\n");
2828 #ifdef CONFIG_TRACER_MAX_TRACE
2829 static void show_snapshot_main_help(struct seq_file *m)
2831 seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2832 seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2833 seq_printf(m, "# Takes a snapshot of the main buffer.\n");
2834 seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2835 seq_printf(m, "# (Doesn't have to be '2' works with any number that\n");
2836 seq_printf(m, "# is not a '0' or '1')\n");
2839 static void show_snapshot_percpu_help(struct seq_file *m)
2841 seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2842 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2843 seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2844 seq_printf(m, "# Takes a snapshot of the main buffer for this cpu.\n");
2845 #else
2846 seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2847 seq_printf(m, "# Must use main snapshot file to allocate.\n");
2848 #endif
2849 seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2850 seq_printf(m, "# (Doesn't have to be '2' works with any number that\n");
2851 seq_printf(m, "# is not a '0' or '1')\n");
2854 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2856 if (iter->tr->allocated_snapshot)
2857 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2858 else
2859 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2861 seq_printf(m, "# Snapshot commands:\n");
2862 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2863 show_snapshot_main_help(m);
2864 else
2865 show_snapshot_percpu_help(m);
2867 #else
2868 /* Should never be called */
2869 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2870 #endif
2872 static int s_show(struct seq_file *m, void *v)
2874 struct trace_iterator *iter = v;
2875 int ret;
2877 if (iter->ent == NULL) {
2878 if (iter->tr) {
2879 seq_printf(m, "# tracer: %s\n", iter->trace->name);
2880 seq_puts(m, "#\n");
2881 test_ftrace_alive(m);
2883 if (iter->snapshot && trace_empty(iter))
2884 print_snapshot_help(m, iter);
2885 else if (iter->trace && iter->trace->print_header)
2886 iter->trace->print_header(m);
2887 else
2888 trace_default_header(m);
2890 } else if (iter->leftover) {
2892 * If we filled the seq_file buffer earlier, we
2893 * want to just show it now.
2895 ret = trace_print_seq(m, &iter->seq);
2897 /* ret should this time be zero, but you never know */
2898 iter->leftover = ret;
2900 } else {
2901 print_trace_line(iter);
2902 ret = trace_print_seq(m, &iter->seq);
2904 * If we overflow the seq_file buffer, then it will
2905 * ask us for this data again at start up.
2906 * Use that instead.
2907 * ret is 0 if seq_file write succeeded.
2908 * -1 otherwise.
2910 iter->leftover = ret;
2913 return 0;
2917 * Should be used after trace_array_get(), trace_types_lock
2918 * ensures that i_cdev was already initialized.
2920 static inline int tracing_get_cpu(struct inode *inode)
2922 if (inode->i_cdev) /* See trace_create_cpu_file() */
2923 return (long)inode->i_cdev - 1;
2924 return RING_BUFFER_ALL_CPUS;
2927 static const struct seq_operations tracer_seq_ops = {
2928 .start = s_start,
2929 .next = s_next,
2930 .stop = s_stop,
2931 .show = s_show,
2934 static struct trace_iterator *
2935 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2937 struct trace_array *tr = inode->i_private;
2938 struct trace_iterator *iter;
2939 int cpu;
2941 if (tracing_disabled)
2942 return ERR_PTR(-ENODEV);
2944 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2945 if (!iter)
2946 return ERR_PTR(-ENOMEM);
2948 iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2949 GFP_KERNEL);
2950 if (!iter->buffer_iter)
2951 goto release;
2954 * We make a copy of the current tracer to avoid concurrent
2955 * changes on it while we are reading.
2957 mutex_lock(&trace_types_lock);
2958 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2959 if (!iter->trace)
2960 goto fail;
2962 *iter->trace = *tr->current_trace;
2964 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2965 goto fail;
2967 iter->tr = tr;
2969 #ifdef CONFIG_TRACER_MAX_TRACE
2970 /* Currently only the top directory has a snapshot */
2971 if (tr->current_trace->print_max || snapshot)
2972 iter->trace_buffer = &tr->max_buffer;
2973 else
2974 #endif
2975 iter->trace_buffer = &tr->trace_buffer;
2976 iter->snapshot = snapshot;
2977 iter->pos = -1;
2978 iter->cpu_file = tracing_get_cpu(inode);
2979 mutex_init(&iter->mutex);
2981 /* Notify the tracer early; before we stop tracing. */
2982 if (iter->trace && iter->trace->open)
2983 iter->trace->open(iter);
2985 /* Annotate start of buffers if we had overruns */
2986 if (ring_buffer_overruns(iter->trace_buffer->buffer))
2987 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2989 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2990 if (trace_clocks[tr->clock_id].in_ns)
2991 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2993 /* stop the trace while dumping if we are not opening "snapshot" */
2994 if (!iter->snapshot)
2995 tracing_stop_tr(tr);
2997 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
2998 for_each_tracing_cpu(cpu) {
2999 iter->buffer_iter[cpu] =
3000 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3002 ring_buffer_read_prepare_sync();
3003 for_each_tracing_cpu(cpu) {
3004 ring_buffer_read_start(iter->buffer_iter[cpu]);
3005 tracing_iter_reset(iter, cpu);
3007 } else {
3008 cpu = iter->cpu_file;
3009 iter->buffer_iter[cpu] =
3010 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3011 ring_buffer_read_prepare_sync();
3012 ring_buffer_read_start(iter->buffer_iter[cpu]);
3013 tracing_iter_reset(iter, cpu);
3016 mutex_unlock(&trace_types_lock);
3018 return iter;
3020 fail:
3021 mutex_unlock(&trace_types_lock);
3022 kfree(iter->trace);
3023 kfree(iter->buffer_iter);
3024 release:
3025 seq_release_private(inode, file);
3026 return ERR_PTR(-ENOMEM);
3029 int tracing_open_generic(struct inode *inode, struct file *filp)
3031 if (tracing_disabled)
3032 return -ENODEV;
3034 filp->private_data = inode->i_private;
3035 return 0;
3038 bool tracing_is_disabled(void)
3040 return (tracing_disabled) ? true: false;
3044 * Open and update trace_array ref count.
3045 * Must have the current trace_array passed to it.
3047 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3049 struct trace_array *tr = inode->i_private;
3051 if (tracing_disabled)
3052 return -ENODEV;
3054 if (trace_array_get(tr) < 0)
3055 return -ENODEV;
3057 filp->private_data = inode->i_private;
3059 return 0;
3062 static int tracing_release(struct inode *inode, struct file *file)
3064 struct trace_array *tr = inode->i_private;
3065 struct seq_file *m = file->private_data;
3066 struct trace_iterator *iter;
3067 int cpu;
3069 if (!(file->f_mode & FMODE_READ)) {
3070 trace_array_put(tr);
3071 return 0;
3074 /* Writes do not use seq_file */
3075 iter = m->private;
3076 mutex_lock(&trace_types_lock);
3078 for_each_tracing_cpu(cpu) {
3079 if (iter->buffer_iter[cpu])
3080 ring_buffer_read_finish(iter->buffer_iter[cpu]);
3083 if (iter->trace && iter->trace->close)
3084 iter->trace->close(iter);
3086 if (!iter->snapshot)
3087 /* reenable tracing if it was previously enabled */
3088 tracing_start_tr(tr);
3090 __trace_array_put(tr);
3092 mutex_unlock(&trace_types_lock);
3094 mutex_destroy(&iter->mutex);
3095 free_cpumask_var(iter->started);
3096 kfree(iter->trace);
3097 kfree(iter->buffer_iter);
3098 seq_release_private(inode, file);
3100 return 0;
3103 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3105 struct trace_array *tr = inode->i_private;
3107 trace_array_put(tr);
3108 return 0;
3111 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3113 struct trace_array *tr = inode->i_private;
3115 trace_array_put(tr);
3117 return single_release(inode, file);
3120 static int tracing_open(struct inode *inode, struct file *file)
3122 struct trace_array *tr = inode->i_private;
3123 struct trace_iterator *iter;
3124 int ret = 0;
3126 if (trace_array_get(tr) < 0)
3127 return -ENODEV;
3129 /* If this file was open for write, then erase contents */
3130 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3131 int cpu = tracing_get_cpu(inode);
3133 if (cpu == RING_BUFFER_ALL_CPUS)
3134 tracing_reset_online_cpus(&tr->trace_buffer);
3135 else
3136 tracing_reset(&tr->trace_buffer, cpu);
3139 if (file->f_mode & FMODE_READ) {
3140 iter = __tracing_open(inode, file, false);
3141 if (IS_ERR(iter))
3142 ret = PTR_ERR(iter);
3143 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3144 iter->iter_flags |= TRACE_FILE_LAT_FMT;
3147 if (ret < 0)
3148 trace_array_put(tr);
3150 return ret;
3153 static void *
3154 t_next(struct seq_file *m, void *v, loff_t *pos)
3156 struct tracer *t = v;
3158 (*pos)++;
3160 if (t)
3161 t = t->next;
3163 return t;
3166 static void *t_start(struct seq_file *m, loff_t *pos)
3168 struct tracer *t;
3169 loff_t l = 0;
3171 mutex_lock(&trace_types_lock);
3172 for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
3175 return t;
3178 static void t_stop(struct seq_file *m, void *p)
3180 mutex_unlock(&trace_types_lock);
3183 static int t_show(struct seq_file *m, void *v)
3185 struct tracer *t = v;
3187 if (!t)
3188 return 0;
3190 seq_printf(m, "%s", t->name);
3191 if (t->next)
3192 seq_putc(m, ' ');
3193 else
3194 seq_putc(m, '\n');
3196 return 0;
3199 static const struct seq_operations show_traces_seq_ops = {
3200 .start = t_start,
3201 .next = t_next,
3202 .stop = t_stop,
3203 .show = t_show,
3206 static int show_traces_open(struct inode *inode, struct file *file)
3208 if (tracing_disabled)
3209 return -ENODEV;
3211 return seq_open(file, &show_traces_seq_ops);
3214 static ssize_t
3215 tracing_write_stub(struct file *filp, const char __user *ubuf,
3216 size_t count, loff_t *ppos)
3218 return count;
3221 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3223 int ret;
3225 if (file->f_mode & FMODE_READ)
3226 ret = seq_lseek(file, offset, whence);
3227 else
3228 file->f_pos = ret = 0;
3230 return ret;
3233 static const struct file_operations tracing_fops = {
3234 .open = tracing_open,
3235 .read = seq_read,
3236 .write = tracing_write_stub,
3237 .llseek = tracing_lseek,
3238 .release = tracing_release,
3241 static const struct file_operations show_traces_fops = {
3242 .open = show_traces_open,
3243 .read = seq_read,
3244 .release = seq_release,
3245 .llseek = seq_lseek,
3249 * The tracer itself will not take this lock, but still we want
3250 * to provide a consistent cpumask to user-space:
3252 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3255 * Temporary storage for the character representation of the
3256 * CPU bitmask (and one more byte for the newline):
3258 static char mask_str[NR_CPUS + 1];
3260 static ssize_t
3261 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3262 size_t count, loff_t *ppos)
3264 struct trace_array *tr = file_inode(filp)->i_private;
3265 int len;
3267 mutex_lock(&tracing_cpumask_update_lock);
3269 len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3270 if (count - len < 2) {
3271 count = -EINVAL;
3272 goto out_err;
3274 len += sprintf(mask_str + len, "\n");
3275 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3277 out_err:
3278 mutex_unlock(&tracing_cpumask_update_lock);
3280 return count;
3283 static ssize_t
3284 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3285 size_t count, loff_t *ppos)
3287 struct trace_array *tr = file_inode(filp)->i_private;
3288 cpumask_var_t tracing_cpumask_new;
3289 int err, cpu;
3291 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3292 return -ENOMEM;
3294 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3295 if (err)
3296 goto err_unlock;
3298 mutex_lock(&tracing_cpumask_update_lock);
3300 local_irq_disable();
3301 arch_spin_lock(&ftrace_max_lock);
3302 for_each_tracing_cpu(cpu) {
3304 * Increase/decrease the disabled counter if we are
3305 * about to flip a bit in the cpumask:
3307 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3308 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3309 atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3310 ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3312 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3313 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3314 atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3315 ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3318 arch_spin_unlock(&ftrace_max_lock);
3319 local_irq_enable();
3321 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3323 mutex_unlock(&tracing_cpumask_update_lock);
3324 free_cpumask_var(tracing_cpumask_new);
3326 return count;
3328 err_unlock:
3329 free_cpumask_var(tracing_cpumask_new);
3331 return err;
3334 static const struct file_operations tracing_cpumask_fops = {
3335 .open = tracing_open_generic_tr,
3336 .read = tracing_cpumask_read,
3337 .write = tracing_cpumask_write,
3338 .release = tracing_release_generic_tr,
3339 .llseek = generic_file_llseek,
3342 static int tracing_trace_options_show(struct seq_file *m, void *v)
3344 struct tracer_opt *trace_opts;
3345 struct trace_array *tr = m->private;
3346 u32 tracer_flags;
3347 int i;
3349 mutex_lock(&trace_types_lock);
3350 tracer_flags = tr->current_trace->flags->val;
3351 trace_opts = tr->current_trace->flags->opts;
3353 for (i = 0; trace_options[i]; i++) {
3354 if (trace_flags & (1 << i))
3355 seq_printf(m, "%s\n", trace_options[i]);
3356 else
3357 seq_printf(m, "no%s\n", trace_options[i]);
3360 for (i = 0; trace_opts[i].name; i++) {
3361 if (tracer_flags & trace_opts[i].bit)
3362 seq_printf(m, "%s\n", trace_opts[i].name);
3363 else
3364 seq_printf(m, "no%s\n", trace_opts[i].name);
3366 mutex_unlock(&trace_types_lock);
3368 return 0;
3371 static int __set_tracer_option(struct tracer *trace,
3372 struct tracer_flags *tracer_flags,
3373 struct tracer_opt *opts, int neg)
3375 int ret;
3377 ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
3378 if (ret)
3379 return ret;
3381 if (neg)
3382 tracer_flags->val &= ~opts->bit;
3383 else
3384 tracer_flags->val |= opts->bit;
3385 return 0;
3388 /* Try to assign a tracer specific option */
3389 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
3391 struct tracer_flags *tracer_flags = trace->flags;
3392 struct tracer_opt *opts = NULL;
3393 int i;
3395 for (i = 0; tracer_flags->opts[i].name; i++) {
3396 opts = &tracer_flags->opts[i];
3398 if (strcmp(cmp, opts->name) == 0)
3399 return __set_tracer_option(trace, trace->flags,
3400 opts, neg);
3403 return -EINVAL;
3406 /* Some tracers require overwrite to stay enabled */
3407 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3409 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3410 return -1;
3412 return 0;
3415 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3417 /* do nothing if flag is already set */
3418 if (!!(trace_flags & mask) == !!enabled)
3419 return 0;
3421 /* Give the tracer a chance to approve the change */
3422 if (tr->current_trace->flag_changed)
3423 if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled))
3424 return -EINVAL;
3426 if (enabled)
3427 trace_flags |= mask;
3428 else
3429 trace_flags &= ~mask;
3431 if (mask == TRACE_ITER_RECORD_CMD)
3432 trace_event_enable_cmd_record(enabled);
3434 if (mask == TRACE_ITER_OVERWRITE) {
3435 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3436 #ifdef CONFIG_TRACER_MAX_TRACE
3437 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3438 #endif
3441 if (mask == TRACE_ITER_PRINTK)
3442 trace_printk_start_stop_comm(enabled);
3444 return 0;
3447 static int trace_set_options(struct trace_array *tr, char *option)
3449 char *cmp;
3450 int neg = 0;
3451 int ret = -ENODEV;
3452 int i;
3454 cmp = strstrip(option);
3456 if (strncmp(cmp, "no", 2) == 0) {
3457 neg = 1;
3458 cmp += 2;
3461 mutex_lock(&trace_types_lock);
3463 for (i = 0; trace_options[i]; i++) {
3464 if (strcmp(cmp, trace_options[i]) == 0) {
3465 ret = set_tracer_flag(tr, 1 << i, !neg);
3466 break;
3470 /* If no option could be set, test the specific tracer options */
3471 if (!trace_options[i])
3472 ret = set_tracer_option(tr->current_trace, cmp, neg);
3474 mutex_unlock(&trace_types_lock);
3476 return ret;
3479 static ssize_t
3480 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3481 size_t cnt, loff_t *ppos)
3483 struct seq_file *m = filp->private_data;
3484 struct trace_array *tr = m->private;
3485 char buf[64];
3486 int ret;
3488 if (cnt >= sizeof(buf))
3489 return -EINVAL;
3491 if (copy_from_user(&buf, ubuf, cnt))
3492 return -EFAULT;
3494 buf[cnt] = 0;
3496 ret = trace_set_options(tr, buf);
3497 if (ret < 0)
3498 return ret;
3500 *ppos += cnt;
3502 return cnt;
3505 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3507 struct trace_array *tr = inode->i_private;
3508 int ret;
3510 if (tracing_disabled)
3511 return -ENODEV;
3513 if (trace_array_get(tr) < 0)
3514 return -ENODEV;
3516 ret = single_open(file, tracing_trace_options_show, inode->i_private);
3517 if (ret < 0)
3518 trace_array_put(tr);
3520 return ret;
3523 static const struct file_operations tracing_iter_fops = {
3524 .open = tracing_trace_options_open,
3525 .read = seq_read,
3526 .llseek = seq_lseek,
3527 .release = tracing_single_release_tr,
3528 .write = tracing_trace_options_write,
3531 static const char readme_msg[] =
3532 "tracing mini-HOWTO:\n\n"
3533 "# echo 0 > tracing_on : quick way to disable tracing\n"
3534 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3535 " Important files:\n"
3536 " trace\t\t\t- The static contents of the buffer\n"
3537 "\t\t\t To clear the buffer write into this file: echo > trace\n"
3538 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3539 " current_tracer\t- function and latency tracers\n"
3540 " available_tracers\t- list of configured tracers for current_tracer\n"
3541 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
3542 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
3543 " trace_clock\t\t-change the clock used to order events\n"
3544 " local: Per cpu clock but may not be synced across CPUs\n"
3545 " global: Synced across CPUs but slows tracing down.\n"
3546 " counter: Not a clock, but just an increment\n"
3547 " uptime: Jiffy counter from time of boot\n"
3548 " perf: Same clock that perf events use\n"
3549 #ifdef CONFIG_X86_64
3550 " x86-tsc: TSC cycle counter\n"
3551 #endif
3552 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3553 " tracing_cpumask\t- Limit which CPUs to trace\n"
3554 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3555 "\t\t\t Remove sub-buffer with rmdir\n"
3556 " trace_options\t\t- Set format or modify how tracing happens\n"
3557 "\t\t\t Disable an option by adding a suffix 'no' to the\n"
3558 "\t\t\t option name\n"
3559 #ifdef CONFIG_DYNAMIC_FTRACE
3560 "\n available_filter_functions - list of functions that can be filtered on\n"
3561 " set_ftrace_filter\t- echo function name in here to only trace these\n"
3562 "\t\t\t functions\n"
3563 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3564 "\t modules: Can select a group via module\n"
3565 "\t Format: :mod:<module-name>\n"
3566 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
3567 "\t triggers: a command to perform when function is hit\n"
3568 "\t Format: <function>:<trigger>[:count]\n"
3569 "\t trigger: traceon, traceoff\n"
3570 "\t\t enable_event:<system>:<event>\n"
3571 "\t\t disable_event:<system>:<event>\n"
3572 #ifdef CONFIG_STACKTRACE
3573 "\t\t stacktrace\n"
3574 #endif
3575 #ifdef CONFIG_TRACER_SNAPSHOT
3576 "\t\t snapshot\n"
3577 #endif
3578 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
3579 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
3580 "\t The first one will disable tracing every time do_fault is hit\n"
3581 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
3582 "\t The first time do trap is hit and it disables tracing, the\n"
3583 "\t counter will decrement to 2. If tracing is already disabled,\n"
3584 "\t the counter will not decrement. It only decrements when the\n"
3585 "\t trigger did work\n"
3586 "\t To remove trigger without count:\n"
3587 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
3588 "\t To remove trigger with a count:\n"
3589 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3590 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
3591 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3592 "\t modules: Can select a group via module command :mod:\n"
3593 "\t Does not accept triggers\n"
3594 #endif /* CONFIG_DYNAMIC_FTRACE */
3595 #ifdef CONFIG_FUNCTION_TRACER
3596 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3597 "\t\t (function)\n"
3598 #endif
3599 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3600 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3601 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3602 #endif
3603 #ifdef CONFIG_TRACER_SNAPSHOT
3604 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
3605 "\t\t\t snapshot buffer. Read the contents for more\n"
3606 "\t\t\t information\n"
3607 #endif
3608 #ifdef CONFIG_STACK_TRACER
3609 " stack_trace\t\t- Shows the max stack trace when active\n"
3610 " stack_max_size\t- Shows current max stack size that was traced\n"
3611 "\t\t\t Write into this file to reset the max size (trigger a\n"
3612 "\t\t\t new trace)\n"
3613 #ifdef CONFIG_DYNAMIC_FTRACE
3614 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3615 "\t\t\t traces\n"
3616 #endif
3617 #endif /* CONFIG_STACK_TRACER */
3618 " events/\t\t- Directory containing all trace event subsystems:\n"
3619 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3620 " events/<system>/\t- Directory containing all trace events for <system>:\n"
3621 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3622 "\t\t\t events\n"
3623 " filter\t\t- If set, only events passing filter are traced\n"
3624 " events/<system>/<event>/\t- Directory containing control files for\n"
3625 "\t\t\t <event>:\n"
3626 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3627 " filter\t\t- If set, only events passing filter are traced\n"
3628 " trigger\t\t- If set, a command to perform when event is hit\n"
3629 "\t Format: <trigger>[:count][if <filter>]\n"
3630 "\t trigger: traceon, traceoff\n"
3631 "\t enable_event:<system>:<event>\n"
3632 "\t disable_event:<system>:<event>\n"
3633 #ifdef CONFIG_STACKTRACE
3634 "\t\t stacktrace\n"
3635 #endif
3636 #ifdef CONFIG_TRACER_SNAPSHOT
3637 "\t\t snapshot\n"
3638 #endif
3639 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
3640 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
3641 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3642 "\t events/block/block_unplug/trigger\n"
3643 "\t The first disables tracing every time block_unplug is hit.\n"
3644 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
3645 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
3646 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3647 "\t Like function triggers, the counter is only decremented if it\n"
3648 "\t enabled or disabled tracing.\n"
3649 "\t To remove a trigger without a count:\n"
3650 "\t echo '!<trigger> > <system>/<event>/trigger\n"
3651 "\t To remove a trigger with a count:\n"
3652 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
3653 "\t Filters can be ignored when removing a trigger.\n"
3656 static ssize_t
3657 tracing_readme_read(struct file *filp, char __user *ubuf,
3658 size_t cnt, loff_t *ppos)
3660 return simple_read_from_buffer(ubuf, cnt, ppos,
3661 readme_msg, strlen(readme_msg));
3664 static const struct file_operations tracing_readme_fops = {
3665 .open = tracing_open_generic,
3666 .read = tracing_readme_read,
3667 .llseek = generic_file_llseek,
3670 static ssize_t
3671 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
3672 size_t cnt, loff_t *ppos)
3674 char *buf_comm;
3675 char *file_buf;
3676 char *buf;
3677 int len = 0;
3678 int pid;
3679 int i;
3681 file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
3682 if (!file_buf)
3683 return -ENOMEM;
3685 buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
3686 if (!buf_comm) {
3687 kfree(file_buf);
3688 return -ENOMEM;
3691 buf = file_buf;
3693 for (i = 0; i < SAVED_CMDLINES; i++) {
3694 int r;
3696 pid = map_cmdline_to_pid[i];
3697 if (pid == -1 || pid == NO_CMDLINE_MAP)
3698 continue;
3700 trace_find_cmdline(pid, buf_comm);
3701 r = sprintf(buf, "%d %s\n", pid, buf_comm);
3702 buf += r;
3703 len += r;
3706 len = simple_read_from_buffer(ubuf, cnt, ppos,
3707 file_buf, len);
3709 kfree(file_buf);
3710 kfree(buf_comm);
3712 return len;
3715 static const struct file_operations tracing_saved_cmdlines_fops = {
3716 .open = tracing_open_generic,
3717 .read = tracing_saved_cmdlines_read,
3718 .llseek = generic_file_llseek,
3721 static ssize_t
3722 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3723 size_t cnt, loff_t *ppos)
3725 struct trace_array *tr = filp->private_data;
3726 char buf[MAX_TRACER_SIZE+2];
3727 int r;
3729 mutex_lock(&trace_types_lock);
3730 r = sprintf(buf, "%s\n", tr->current_trace->name);
3731 mutex_unlock(&trace_types_lock);
3733 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3736 int tracer_init(struct tracer *t, struct trace_array *tr)
3738 tracing_reset_online_cpus(&tr->trace_buffer);
3739 return t->init(tr);
3742 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3744 int cpu;
3746 for_each_tracing_cpu(cpu)
3747 per_cpu_ptr(buf->data, cpu)->entries = val;
3750 #ifdef CONFIG_TRACER_MAX_TRACE
3751 /* resize @tr's buffer to the size of @size_tr's entries */
3752 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3753 struct trace_buffer *size_buf, int cpu_id)
3755 int cpu, ret = 0;
3757 if (cpu_id == RING_BUFFER_ALL_CPUS) {
3758 for_each_tracing_cpu(cpu) {
3759 ret = ring_buffer_resize(trace_buf->buffer,
3760 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3761 if (ret < 0)
3762 break;
3763 per_cpu_ptr(trace_buf->data, cpu)->entries =
3764 per_cpu_ptr(size_buf->data, cpu)->entries;
3766 } else {
3767 ret = ring_buffer_resize(trace_buf->buffer,
3768 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3769 if (ret == 0)
3770 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3771 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3774 return ret;
3776 #endif /* CONFIG_TRACER_MAX_TRACE */
3778 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3779 unsigned long size, int cpu)
3781 int ret;
3784 * If kernel or user changes the size of the ring buffer
3785 * we use the size that was given, and we can forget about
3786 * expanding it later.
3788 ring_buffer_expanded = true;
3790 /* May be called before buffers are initialized */
3791 if (!tr->trace_buffer.buffer)
3792 return 0;
3794 ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3795 if (ret < 0)
3796 return ret;
3798 #ifdef CONFIG_TRACER_MAX_TRACE
3799 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3800 !tr->current_trace->use_max_tr)
3801 goto out;
3803 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3804 if (ret < 0) {
3805 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3806 &tr->trace_buffer, cpu);
3807 if (r < 0) {
3809 * AARGH! We are left with different
3810 * size max buffer!!!!
3811 * The max buffer is our "snapshot" buffer.
3812 * When a tracer needs a snapshot (one of the
3813 * latency tracers), it swaps the max buffer
3814 * with the saved snap shot. We succeeded to
3815 * update the size of the main buffer, but failed to
3816 * update the size of the max buffer. But when we tried
3817 * to reset the main buffer to the original size, we
3818 * failed there too. This is very unlikely to
3819 * happen, but if it does, warn and kill all
3820 * tracing.
3822 WARN_ON(1);
3823 tracing_disabled = 1;
3825 return ret;
3828 if (cpu == RING_BUFFER_ALL_CPUS)
3829 set_buffer_entries(&tr->max_buffer, size);
3830 else
3831 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3833 out:
3834 #endif /* CONFIG_TRACER_MAX_TRACE */
3836 if (cpu == RING_BUFFER_ALL_CPUS)
3837 set_buffer_entries(&tr->trace_buffer, size);
3838 else
3839 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
3841 return ret;
3844 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
3845 unsigned long size, int cpu_id)
3847 int ret = size;
3849 mutex_lock(&trace_types_lock);
3851 if (cpu_id != RING_BUFFER_ALL_CPUS) {
3852 /* make sure, this cpu is enabled in the mask */
3853 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3854 ret = -EINVAL;
3855 goto out;
3859 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
3860 if (ret < 0)
3861 ret = -ENOMEM;
3863 out:
3864 mutex_unlock(&trace_types_lock);
3866 return ret;
3871 * tracing_update_buffers - used by tracing facility to expand ring buffers
3873 * To save on memory when the tracing is never used on a system with it
3874 * configured in. The ring buffers are set to a minimum size. But once
3875 * a user starts to use the tracing facility, then they need to grow
3876 * to their default size.
3878 * This function is to be called when a tracer is about to be used.
3880 int tracing_update_buffers(void)
3882 int ret = 0;
3884 mutex_lock(&trace_types_lock);
3885 if (!ring_buffer_expanded)
3886 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
3887 RING_BUFFER_ALL_CPUS);
3888 mutex_unlock(&trace_types_lock);
3890 return ret;
3893 struct trace_option_dentry;
3895 static struct trace_option_dentry *
3896 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
3898 static void
3899 destroy_trace_option_files(struct trace_option_dentry *topts);
3901 static int tracing_set_tracer(const char *buf)
3903 static struct trace_option_dentry *topts;
3904 struct trace_array *tr = &global_trace;
3905 struct tracer *t;
3906 #ifdef CONFIG_TRACER_MAX_TRACE
3907 bool had_max_tr;
3908 #endif
3909 int ret = 0;
3911 mutex_lock(&trace_types_lock);
3913 if (!ring_buffer_expanded) {
3914 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
3915 RING_BUFFER_ALL_CPUS);
3916 if (ret < 0)
3917 goto out;
3918 ret = 0;
3921 for (t = trace_types; t; t = t->next) {
3922 if (strcmp(t->name, buf) == 0)
3923 break;
3925 if (!t) {
3926 ret = -EINVAL;
3927 goto out;
3929 if (t == tr->current_trace)
3930 goto out;
3932 trace_branch_disable();
3934 tr->current_trace->enabled = false;
3936 if (tr->current_trace->reset)
3937 tr->current_trace->reset(tr);
3939 /* Current trace needs to be nop_trace before synchronize_sched */
3940 tr->current_trace = &nop_trace;
3942 #ifdef CONFIG_TRACER_MAX_TRACE
3943 had_max_tr = tr->allocated_snapshot;
3945 if (had_max_tr && !t->use_max_tr) {
3947 * We need to make sure that the update_max_tr sees that
3948 * current_trace changed to nop_trace to keep it from
3949 * swapping the buffers after we resize it.
3950 * The update_max_tr is called from interrupts disabled
3951 * so a synchronized_sched() is sufficient.
3953 synchronize_sched();
3954 free_snapshot(tr);
3956 #endif
3957 destroy_trace_option_files(topts);
3959 topts = create_trace_option_files(tr, t);
3961 #ifdef CONFIG_TRACER_MAX_TRACE
3962 if (t->use_max_tr && !had_max_tr) {
3963 ret = alloc_snapshot(tr);
3964 if (ret < 0)
3965 goto out;
3967 #endif
3969 if (t->init) {
3970 ret = tracer_init(t, tr);
3971 if (ret)
3972 goto out;
3975 tr->current_trace = t;
3976 tr->current_trace->enabled = true;
3977 trace_branch_enable(tr);
3978 out:
3979 mutex_unlock(&trace_types_lock);
3981 return ret;
3984 static ssize_t
3985 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
3986 size_t cnt, loff_t *ppos)
3988 char buf[MAX_TRACER_SIZE+1];
3989 int i;
3990 size_t ret;
3991 int err;
3993 ret = cnt;
3995 if (cnt > MAX_TRACER_SIZE)
3996 cnt = MAX_TRACER_SIZE;
3998 if (copy_from_user(&buf, ubuf, cnt))
3999 return -EFAULT;
4001 buf[cnt] = 0;
4003 /* strip ending whitespace. */
4004 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4005 buf[i] = 0;
4007 err = tracing_set_tracer(buf);
4008 if (err)
4009 return err;
4011 *ppos += ret;
4013 return ret;
4016 static ssize_t
4017 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4018 size_t cnt, loff_t *ppos)
4020 unsigned long *ptr = filp->private_data;
4021 char buf[64];
4022 int r;
4024 r = snprintf(buf, sizeof(buf), "%ld\n",
4025 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4026 if (r > sizeof(buf))
4027 r = sizeof(buf);
4028 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4031 static ssize_t
4032 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4033 size_t cnt, loff_t *ppos)
4035 unsigned long *ptr = filp->private_data;
4036 unsigned long val;
4037 int ret;
4039 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4040 if (ret)
4041 return ret;
4043 *ptr = val * 1000;
4045 return cnt;
4048 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4050 struct trace_array *tr = inode->i_private;
4051 struct trace_iterator *iter;
4052 int ret = 0;
4054 if (tracing_disabled)
4055 return -ENODEV;
4057 if (trace_array_get(tr) < 0)
4058 return -ENODEV;
4060 mutex_lock(&trace_types_lock);
4062 /* create a buffer to store the information to pass to userspace */
4063 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4064 if (!iter) {
4065 ret = -ENOMEM;
4066 __trace_array_put(tr);
4067 goto out;
4071 * We make a copy of the current tracer to avoid concurrent
4072 * changes on it while we are reading.
4074 iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4075 if (!iter->trace) {
4076 ret = -ENOMEM;
4077 goto fail;
4079 *iter->trace = *tr->current_trace;
4081 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4082 ret = -ENOMEM;
4083 goto fail;
4086 /* trace pipe does not show start of buffer */
4087 cpumask_setall(iter->started);
4089 if (trace_flags & TRACE_ITER_LATENCY_FMT)
4090 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4092 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4093 if (trace_clocks[tr->clock_id].in_ns)
4094 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4096 iter->tr = tr;
4097 iter->trace_buffer = &tr->trace_buffer;
4098 iter->cpu_file = tracing_get_cpu(inode);
4099 mutex_init(&iter->mutex);
4100 filp->private_data = iter;
4102 if (iter->trace->pipe_open)
4103 iter->trace->pipe_open(iter);
4105 nonseekable_open(inode, filp);
4106 out:
4107 mutex_unlock(&trace_types_lock);
4108 return ret;
4110 fail:
4111 kfree(iter->trace);
4112 kfree(iter);
4113 __trace_array_put(tr);
4114 mutex_unlock(&trace_types_lock);
4115 return ret;
4118 static int tracing_release_pipe(struct inode *inode, struct file *file)
4120 struct trace_iterator *iter = file->private_data;
4121 struct trace_array *tr = inode->i_private;
4123 mutex_lock(&trace_types_lock);
4125 if (iter->trace->pipe_close)
4126 iter->trace->pipe_close(iter);
4128 mutex_unlock(&trace_types_lock);
4130 free_cpumask_var(iter->started);
4131 mutex_destroy(&iter->mutex);
4132 kfree(iter->trace);
4133 kfree(iter);
4135 trace_array_put(tr);
4137 return 0;
4140 static unsigned int
4141 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4143 /* Iterators are static, they should be filled or empty */
4144 if (trace_buffer_iter(iter, iter->cpu_file))
4145 return POLLIN | POLLRDNORM;
4147 if (trace_flags & TRACE_ITER_BLOCK)
4149 * Always select as readable when in blocking mode
4151 return POLLIN | POLLRDNORM;
4152 else
4153 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4154 filp, poll_table);
4157 static unsigned int
4158 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4160 struct trace_iterator *iter = filp->private_data;
4162 return trace_poll(iter, filp, poll_table);
4166 * This is a make-shift waitqueue.
4167 * A tracer might use this callback on some rare cases:
4169 * 1) the current tracer might hold the runqueue lock when it wakes up
4170 * a reader, hence a deadlock (sched, function, and function graph tracers)
4171 * 2) the function tracers, trace all functions, we don't want
4172 * the overhead of calling wake_up and friends
4173 * (and tracing them too)
4175 * Anyway, this is really very primitive wakeup.
4177 int poll_wait_pipe(struct trace_iterator *iter)
4179 set_current_state(TASK_INTERRUPTIBLE);
4180 /* sleep for 100 msecs, and try again. */
4181 schedule_timeout(HZ / 10);
4182 return 0;
4185 /* Must be called with trace_types_lock mutex held. */
4186 static int tracing_wait_pipe(struct file *filp)
4188 struct trace_iterator *iter = filp->private_data;
4189 int ret;
4191 while (trace_empty(iter)) {
4193 if ((filp->f_flags & O_NONBLOCK)) {
4194 return -EAGAIN;
4197 mutex_unlock(&iter->mutex);
4199 ret = iter->trace->wait_pipe(iter);
4201 mutex_lock(&iter->mutex);
4203 if (ret)
4204 return ret;
4206 if (signal_pending(current))
4207 return -EINTR;
4210 * We block until we read something and tracing is disabled.
4211 * We still block if tracing is disabled, but we have never
4212 * read anything. This allows a user to cat this file, and
4213 * then enable tracing. But after we have read something,
4214 * we give an EOF when tracing is again disabled.
4216 * iter->pos will be 0 if we haven't read anything.
4218 if (!tracing_is_on() && iter->pos)
4219 break;
4222 return 1;
4226 * Consumer reader.
4228 static ssize_t
4229 tracing_read_pipe(struct file *filp, char __user *ubuf,
4230 size_t cnt, loff_t *ppos)
4232 struct trace_iterator *iter = filp->private_data;
4233 struct trace_array *tr = iter->tr;
4234 ssize_t sret;
4236 /* return any leftover data */
4237 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4238 if (sret != -EBUSY)
4239 return sret;
4241 trace_seq_init(&iter->seq);
4243 /* copy the tracer to avoid using a global lock all around */
4244 mutex_lock(&trace_types_lock);
4245 if (unlikely(iter->trace->name != tr->current_trace->name))
4246 *iter->trace = *tr->current_trace;
4247 mutex_unlock(&trace_types_lock);
4250 * Avoid more than one consumer on a single file descriptor
4251 * This is just a matter of traces coherency, the ring buffer itself
4252 * is protected.
4254 mutex_lock(&iter->mutex);
4255 if (iter->trace->read) {
4256 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4257 if (sret)
4258 goto out;
4261 waitagain:
4262 sret = tracing_wait_pipe(filp);
4263 if (sret <= 0)
4264 goto out;
4266 /* stop when tracing is finished */
4267 if (trace_empty(iter)) {
4268 sret = 0;
4269 goto out;
4272 if (cnt >= PAGE_SIZE)
4273 cnt = PAGE_SIZE - 1;
4275 /* reset all but tr, trace, and overruns */
4276 memset(&iter->seq, 0,
4277 sizeof(struct trace_iterator) -
4278 offsetof(struct trace_iterator, seq));
4279 cpumask_clear(iter->started);
4280 iter->pos = -1;
4282 trace_event_read_lock();
4283 trace_access_lock(iter->cpu_file);
4284 while (trace_find_next_entry_inc(iter) != NULL) {
4285 enum print_line_t ret;
4286 int len = iter->seq.len;
4288 ret = print_trace_line(iter);
4289 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4290 /* don't print partial lines */
4291 iter->seq.len = len;
4292 break;
4294 if (ret != TRACE_TYPE_NO_CONSUME)
4295 trace_consume(iter);
4297 if (iter->seq.len >= cnt)
4298 break;
4301 * Setting the full flag means we reached the trace_seq buffer
4302 * size and we should leave by partial output condition above.
4303 * One of the trace_seq_* functions is not used properly.
4305 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4306 iter->ent->type);
4308 trace_access_unlock(iter->cpu_file);
4309 trace_event_read_unlock();
4311 /* Now copy what we have to the user */
4312 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4313 if (iter->seq.readpos >= iter->seq.len)
4314 trace_seq_init(&iter->seq);
4317 * If there was nothing to send to user, in spite of consuming trace
4318 * entries, go back to wait for more entries.
4320 if (sret == -EBUSY)
4321 goto waitagain;
4323 out:
4324 mutex_unlock(&iter->mutex);
4326 return sret;
4329 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4330 unsigned int idx)
4332 __free_page(spd->pages[idx]);
4335 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4336 .can_merge = 0,
4337 .map = generic_pipe_buf_map,
4338 .unmap = generic_pipe_buf_unmap,
4339 .confirm = generic_pipe_buf_confirm,
4340 .release = generic_pipe_buf_release,
4341 .steal = generic_pipe_buf_steal,
4342 .get = generic_pipe_buf_get,
4345 static size_t
4346 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4348 size_t count;
4349 int ret;
4351 /* Seq buffer is page-sized, exactly what we need. */
4352 for (;;) {
4353 count = iter->seq.len;
4354 ret = print_trace_line(iter);
4355 count = iter->seq.len - count;
4356 if (rem < count) {
4357 rem = 0;
4358 iter->seq.len -= count;
4359 break;
4361 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4362 iter->seq.len -= count;
4363 break;
4366 if (ret != TRACE_TYPE_NO_CONSUME)
4367 trace_consume(iter);
4368 rem -= count;
4369 if (!trace_find_next_entry_inc(iter)) {
4370 rem = 0;
4371 iter->ent = NULL;
4372 break;
4376 return rem;
4379 static ssize_t tracing_splice_read_pipe(struct file *filp,
4380 loff_t *ppos,
4381 struct pipe_inode_info *pipe,
4382 size_t len,
4383 unsigned int flags)
4385 struct page *pages_def[PIPE_DEF_BUFFERS];
4386 struct partial_page partial_def[PIPE_DEF_BUFFERS];
4387 struct trace_iterator *iter = filp->private_data;
4388 struct splice_pipe_desc spd = {
4389 .pages = pages_def,
4390 .partial = partial_def,
4391 .nr_pages = 0, /* This gets updated below. */
4392 .nr_pages_max = PIPE_DEF_BUFFERS,
4393 .flags = flags,
4394 .ops = &tracing_pipe_buf_ops,
4395 .spd_release = tracing_spd_release_pipe,
4397 struct trace_array *tr = iter->tr;
4398 ssize_t ret;
4399 size_t rem;
4400 unsigned int i;
4402 if (splice_grow_spd(pipe, &spd))
4403 return -ENOMEM;
4405 /* copy the tracer to avoid using a global lock all around */
4406 mutex_lock(&trace_types_lock);
4407 if (unlikely(iter->trace->name != tr->current_trace->name))
4408 *iter->trace = *tr->current_trace;
4409 mutex_unlock(&trace_types_lock);
4411 mutex_lock(&iter->mutex);
4413 if (iter->trace->splice_read) {
4414 ret = iter->trace->splice_read(iter, filp,
4415 ppos, pipe, len, flags);
4416 if (ret)
4417 goto out_err;
4420 ret = tracing_wait_pipe(filp);
4421 if (ret <= 0)
4422 goto out_err;
4424 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4425 ret = -EFAULT;
4426 goto out_err;
4429 trace_event_read_lock();
4430 trace_access_lock(iter->cpu_file);
4432 /* Fill as many pages as possible. */
4433 for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
4434 spd.pages[i] = alloc_page(GFP_KERNEL);
4435 if (!spd.pages[i])
4436 break;
4438 rem = tracing_fill_pipe_page(rem, iter);
4440 /* Copy the data into the page, so we can start over. */
4441 ret = trace_seq_to_buffer(&iter->seq,
4442 page_address(spd.pages[i]),
4443 iter->seq.len);
4444 if (ret < 0) {
4445 __free_page(spd.pages[i]);
4446 break;
4448 spd.partial[i].offset = 0;
4449 spd.partial[i].len = iter->seq.len;
4451 trace_seq_init(&iter->seq);
4454 trace_access_unlock(iter->cpu_file);
4455 trace_event_read_unlock();
4456 mutex_unlock(&iter->mutex);
4458 spd.nr_pages = i;
4460 ret = splice_to_pipe(pipe, &spd);
4461 out:
4462 splice_shrink_spd(&spd);
4463 return ret;
4465 out_err:
4466 mutex_unlock(&iter->mutex);
4467 goto out;
4470 static ssize_t
4471 tracing_entries_read(struct file *filp, char __user *ubuf,
4472 size_t cnt, loff_t *ppos)
4474 struct inode *inode = file_inode(filp);
4475 struct trace_array *tr = inode->i_private;
4476 int cpu = tracing_get_cpu(inode);
4477 char buf[64];
4478 int r = 0;
4479 ssize_t ret;
4481 mutex_lock(&trace_types_lock);
4483 if (cpu == RING_BUFFER_ALL_CPUS) {
4484 int cpu, buf_size_same;
4485 unsigned long size;
4487 size = 0;
4488 buf_size_same = 1;
4489 /* check if all cpu sizes are same */
4490 for_each_tracing_cpu(cpu) {
4491 /* fill in the size from first enabled cpu */
4492 if (size == 0)
4493 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4494 if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4495 buf_size_same = 0;
4496 break;
4500 if (buf_size_same) {
4501 if (!ring_buffer_expanded)
4502 r = sprintf(buf, "%lu (expanded: %lu)\n",
4503 size >> 10,
4504 trace_buf_size >> 10);
4505 else
4506 r = sprintf(buf, "%lu\n", size >> 10);
4507 } else
4508 r = sprintf(buf, "X\n");
4509 } else
4510 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4512 mutex_unlock(&trace_types_lock);
4514 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4515 return ret;
4518 static ssize_t
4519 tracing_entries_write(struct file *filp, const char __user *ubuf,
4520 size_t cnt, loff_t *ppos)
4522 struct inode *inode = file_inode(filp);
4523 struct trace_array *tr = inode->i_private;
4524 unsigned long val;
4525 int ret;
4527 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4528 if (ret)
4529 return ret;
4531 /* must have at least 1 entry */
4532 if (!val)
4533 return -EINVAL;
4535 /* value is in KB */
4536 val <<= 10;
4537 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4538 if (ret < 0)
4539 return ret;
4541 *ppos += cnt;
4543 return cnt;
4546 static ssize_t
4547 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4548 size_t cnt, loff_t *ppos)
4550 struct trace_array *tr = filp->private_data;
4551 char buf[64];
4552 int r, cpu;
4553 unsigned long size = 0, expanded_size = 0;
4555 mutex_lock(&trace_types_lock);
4556 for_each_tracing_cpu(cpu) {
4557 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4558 if (!ring_buffer_expanded)
4559 expanded_size += trace_buf_size >> 10;
4561 if (ring_buffer_expanded)
4562 r = sprintf(buf, "%lu\n", size);
4563 else
4564 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4565 mutex_unlock(&trace_types_lock);
4567 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4570 static ssize_t
4571 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4572 size_t cnt, loff_t *ppos)
4575 * There is no need to read what the user has written, this function
4576 * is just to make sure that there is no error when "echo" is used
4579 *ppos += cnt;
4581 return cnt;
4584 static int
4585 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4587 struct trace_array *tr = inode->i_private;
4589 /* disable tracing ? */
4590 if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4591 tracer_tracing_off(tr);
4592 /* resize the ring buffer to 0 */
4593 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4595 trace_array_put(tr);
4597 return 0;
4600 static ssize_t
4601 tracing_mark_write(struct file *filp, const char __user *ubuf,
4602 size_t cnt, loff_t *fpos)
4604 unsigned long addr = (unsigned long)ubuf;
4605 struct trace_array *tr = filp->private_data;
4606 struct ring_buffer_event *event;
4607 struct ring_buffer *buffer;
4608 struct print_entry *entry;
4609 unsigned long irq_flags;
4610 struct page *pages[2];
4611 void *map_page[2];
4612 int nr_pages = 1;
4613 ssize_t written;
4614 int offset;
4615 int size;
4616 int len;
4617 int ret;
4618 int i;
4620 if (tracing_disabled)
4621 return -EINVAL;
4623 if (!(trace_flags & TRACE_ITER_MARKERS))
4624 return -EINVAL;
4626 if (cnt > TRACE_BUF_SIZE)
4627 cnt = TRACE_BUF_SIZE;
4630 * Userspace is injecting traces into the kernel trace buffer.
4631 * We want to be as non intrusive as possible.
4632 * To do so, we do not want to allocate any special buffers
4633 * or take any locks, but instead write the userspace data
4634 * straight into the ring buffer.
4636 * First we need to pin the userspace buffer into memory,
4637 * which, most likely it is, because it just referenced it.
4638 * But there's no guarantee that it is. By using get_user_pages_fast()
4639 * and kmap_atomic/kunmap_atomic() we can get access to the
4640 * pages directly. We then write the data directly into the
4641 * ring buffer.
4643 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4645 /* check if we cross pages */
4646 if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4647 nr_pages = 2;
4649 offset = addr & (PAGE_SIZE - 1);
4650 addr &= PAGE_MASK;
4652 ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4653 if (ret < nr_pages) {
4654 while (--ret >= 0)
4655 put_page(pages[ret]);
4656 written = -EFAULT;
4657 goto out;
4660 for (i = 0; i < nr_pages; i++)
4661 map_page[i] = kmap_atomic(pages[i]);
4663 local_save_flags(irq_flags);
4664 size = sizeof(*entry) + cnt + 2; /* possible \n added */
4665 buffer = tr->trace_buffer.buffer;
4666 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4667 irq_flags, preempt_count());
4668 if (!event) {
4669 /* Ring buffer disabled, return as if not open for write */
4670 written = -EBADF;
4671 goto out_unlock;
4674 entry = ring_buffer_event_data(event);
4675 entry->ip = _THIS_IP_;
4677 if (nr_pages == 2) {
4678 len = PAGE_SIZE - offset;
4679 memcpy(&entry->buf, map_page[0] + offset, len);
4680 memcpy(&entry->buf[len], map_page[1], cnt - len);
4681 } else
4682 memcpy(&entry->buf, map_page[0] + offset, cnt);
4684 if (entry->buf[cnt - 1] != '\n') {
4685 entry->buf[cnt] = '\n';
4686 entry->buf[cnt + 1] = '\0';
4687 } else
4688 entry->buf[cnt] = '\0';
4690 __buffer_unlock_commit(buffer, event);
4692 written = cnt;
4694 *fpos += written;
4696 out_unlock:
4697 for (i = 0; i < nr_pages; i++){
4698 kunmap_atomic(map_page[i]);
4699 put_page(pages[i]);
4701 out:
4702 return written;
4705 static int tracing_clock_show(struct seq_file *m, void *v)
4707 struct trace_array *tr = m->private;
4708 int i;
4710 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4711 seq_printf(m,
4712 "%s%s%s%s", i ? " " : "",
4713 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4714 i == tr->clock_id ? "]" : "");
4715 seq_putc(m, '\n');
4717 return 0;
4720 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4721 size_t cnt, loff_t *fpos)
4723 struct seq_file *m = filp->private_data;
4724 struct trace_array *tr = m->private;
4725 char buf[64];
4726 const char *clockstr;
4727 int i;
4729 if (cnt >= sizeof(buf))
4730 return -EINVAL;
4732 if (copy_from_user(&buf, ubuf, cnt))
4733 return -EFAULT;
4735 buf[cnt] = 0;
4737 clockstr = strstrip(buf);
4739 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4740 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4741 break;
4743 if (i == ARRAY_SIZE(trace_clocks))
4744 return -EINVAL;
4746 mutex_lock(&trace_types_lock);
4748 tr->clock_id = i;
4750 ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4753 * New clock may not be consistent with the previous clock.
4754 * Reset the buffer so that it doesn't have incomparable timestamps.
4756 tracing_reset_online_cpus(&tr->trace_buffer);
4758 #ifdef CONFIG_TRACER_MAX_TRACE
4759 if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4760 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4761 tracing_reset_online_cpus(&tr->max_buffer);
4762 #endif
4764 mutex_unlock(&trace_types_lock);
4766 *fpos += cnt;
4768 return cnt;
4771 static int tracing_clock_open(struct inode *inode, struct file *file)
4773 struct trace_array *tr = inode->i_private;
4774 int ret;
4776 if (tracing_disabled)
4777 return -ENODEV;
4779 if (trace_array_get(tr))
4780 return -ENODEV;
4782 ret = single_open(file, tracing_clock_show, inode->i_private);
4783 if (ret < 0)
4784 trace_array_put(tr);
4786 return ret;
4789 struct ftrace_buffer_info {
4790 struct trace_iterator iter;
4791 void *spare;
4792 unsigned int read;
4795 #ifdef CONFIG_TRACER_SNAPSHOT
4796 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4798 struct trace_array *tr = inode->i_private;
4799 struct trace_iterator *iter;
4800 struct seq_file *m;
4801 int ret = 0;
4803 if (trace_array_get(tr) < 0)
4804 return -ENODEV;
4806 if (file->f_mode & FMODE_READ) {
4807 iter = __tracing_open(inode, file, true);
4808 if (IS_ERR(iter))
4809 ret = PTR_ERR(iter);
4810 } else {
4811 /* Writes still need the seq_file to hold the private data */
4812 ret = -ENOMEM;
4813 m = kzalloc(sizeof(*m), GFP_KERNEL);
4814 if (!m)
4815 goto out;
4816 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4817 if (!iter) {
4818 kfree(m);
4819 goto out;
4821 ret = 0;
4823 iter->tr = tr;
4824 iter->trace_buffer = &tr->max_buffer;
4825 iter->cpu_file = tracing_get_cpu(inode);
4826 m->private = iter;
4827 file->private_data = m;
4829 out:
4830 if (ret < 0)
4831 trace_array_put(tr);
4833 return ret;
4836 static ssize_t
4837 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4838 loff_t *ppos)
4840 struct seq_file *m = filp->private_data;
4841 struct trace_iterator *iter = m->private;
4842 struct trace_array *tr = iter->tr;
4843 unsigned long val;
4844 int ret;
4846 ret = tracing_update_buffers();
4847 if (ret < 0)
4848 return ret;
4850 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4851 if (ret)
4852 return ret;
4854 mutex_lock(&trace_types_lock);
4856 if (tr->current_trace->use_max_tr) {
4857 ret = -EBUSY;
4858 goto out;
4861 switch (val) {
4862 case 0:
4863 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4864 ret = -EINVAL;
4865 break;
4867 if (tr->allocated_snapshot)
4868 free_snapshot(tr);
4869 break;
4870 case 1:
4871 /* Only allow per-cpu swap if the ring buffer supports it */
4872 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
4873 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4874 ret = -EINVAL;
4875 break;
4877 #endif
4878 if (!tr->allocated_snapshot) {
4879 ret = alloc_snapshot(tr);
4880 if (ret < 0)
4881 break;
4883 local_irq_disable();
4884 /* Now, we're going to swap */
4885 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4886 update_max_tr(tr, current, smp_processor_id());
4887 else
4888 update_max_tr_single(tr, current, iter->cpu_file);
4889 local_irq_enable();
4890 break;
4891 default:
4892 if (tr->allocated_snapshot) {
4893 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4894 tracing_reset_online_cpus(&tr->max_buffer);
4895 else
4896 tracing_reset(&tr->max_buffer, iter->cpu_file);
4898 break;
4901 if (ret >= 0) {
4902 *ppos += cnt;
4903 ret = cnt;
4905 out:
4906 mutex_unlock(&trace_types_lock);
4907 return ret;
4910 static int tracing_snapshot_release(struct inode *inode, struct file *file)
4912 struct seq_file *m = file->private_data;
4913 int ret;
4915 ret = tracing_release(inode, file);
4917 if (file->f_mode & FMODE_READ)
4918 return ret;
4920 /* If write only, the seq_file is just a stub */
4921 if (m)
4922 kfree(m->private);
4923 kfree(m);
4925 return 0;
4928 static int tracing_buffers_open(struct inode *inode, struct file *filp);
4929 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
4930 size_t count, loff_t *ppos);
4931 static int tracing_buffers_release(struct inode *inode, struct file *file);
4932 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4933 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
4935 static int snapshot_raw_open(struct inode *inode, struct file *filp)
4937 struct ftrace_buffer_info *info;
4938 int ret;
4940 ret = tracing_buffers_open(inode, filp);
4941 if (ret < 0)
4942 return ret;
4944 info = filp->private_data;
4946 if (info->iter.trace->use_max_tr) {
4947 tracing_buffers_release(inode, filp);
4948 return -EBUSY;
4951 info->iter.snapshot = true;
4952 info->iter.trace_buffer = &info->iter.tr->max_buffer;
4954 return ret;
4957 #endif /* CONFIG_TRACER_SNAPSHOT */
4960 static const struct file_operations tracing_max_lat_fops = {
4961 .open = tracing_open_generic,
4962 .read = tracing_max_lat_read,
4963 .write = tracing_max_lat_write,
4964 .llseek = generic_file_llseek,
4967 static const struct file_operations set_tracer_fops = {
4968 .open = tracing_open_generic,
4969 .read = tracing_set_trace_read,
4970 .write = tracing_set_trace_write,
4971 .llseek = generic_file_llseek,
4974 static const struct file_operations tracing_pipe_fops = {
4975 .open = tracing_open_pipe,
4976 .poll = tracing_poll_pipe,
4977 .read = tracing_read_pipe,
4978 .splice_read = tracing_splice_read_pipe,
4979 .release = tracing_release_pipe,
4980 .llseek = no_llseek,
4983 static const struct file_operations tracing_entries_fops = {
4984 .open = tracing_open_generic_tr,
4985 .read = tracing_entries_read,
4986 .write = tracing_entries_write,
4987 .llseek = generic_file_llseek,
4988 .release = tracing_release_generic_tr,
4991 static const struct file_operations tracing_total_entries_fops = {
4992 .open = tracing_open_generic_tr,
4993 .read = tracing_total_entries_read,
4994 .llseek = generic_file_llseek,
4995 .release = tracing_release_generic_tr,
4998 static const struct file_operations tracing_free_buffer_fops = {
4999 .open = tracing_open_generic_tr,
5000 .write = tracing_free_buffer_write,
5001 .release = tracing_free_buffer_release,
5004 static const struct file_operations tracing_mark_fops = {
5005 .open = tracing_open_generic_tr,
5006 .write = tracing_mark_write,
5007 .llseek = generic_file_llseek,
5008 .release = tracing_release_generic_tr,
5011 static const struct file_operations trace_clock_fops = {
5012 .open = tracing_clock_open,
5013 .read = seq_read,
5014 .llseek = seq_lseek,
5015 .release = tracing_single_release_tr,
5016 .write = tracing_clock_write,
5019 #ifdef CONFIG_TRACER_SNAPSHOT
5020 static const struct file_operations snapshot_fops = {
5021 .open = tracing_snapshot_open,
5022 .read = seq_read,
5023 .write = tracing_snapshot_write,
5024 .llseek = tracing_lseek,
5025 .release = tracing_snapshot_release,
5028 static const struct file_operations snapshot_raw_fops = {
5029 .open = snapshot_raw_open,
5030 .read = tracing_buffers_read,
5031 .release = tracing_buffers_release,
5032 .splice_read = tracing_buffers_splice_read,
5033 .llseek = no_llseek,
5036 #endif /* CONFIG_TRACER_SNAPSHOT */
5038 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5040 struct trace_array *tr = inode->i_private;
5041 struct ftrace_buffer_info *info;
5042 int ret;
5044 if (tracing_disabled)
5045 return -ENODEV;
5047 if (trace_array_get(tr) < 0)
5048 return -ENODEV;
5050 info = kzalloc(sizeof(*info), GFP_KERNEL);
5051 if (!info) {
5052 trace_array_put(tr);
5053 return -ENOMEM;
5056 mutex_lock(&trace_types_lock);
5058 info->iter.tr = tr;
5059 info->iter.cpu_file = tracing_get_cpu(inode);
5060 info->iter.trace = tr->current_trace;
5061 info->iter.trace_buffer = &tr->trace_buffer;
5062 info->spare = NULL;
5063 /* Force reading ring buffer for first read */
5064 info->read = (unsigned int)-1;
5066 filp->private_data = info;
5068 mutex_unlock(&trace_types_lock);
5070 ret = nonseekable_open(inode, filp);
5071 if (ret < 0)
5072 trace_array_put(tr);
5074 return ret;
5077 static unsigned int
5078 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5080 struct ftrace_buffer_info *info = filp->private_data;
5081 struct trace_iterator *iter = &info->iter;
5083 return trace_poll(iter, filp, poll_table);
5086 static ssize_t
5087 tracing_buffers_read(struct file *filp, char __user *ubuf,
5088 size_t count, loff_t *ppos)
5090 struct ftrace_buffer_info *info = filp->private_data;
5091 struct trace_iterator *iter = &info->iter;
5092 ssize_t ret;
5093 ssize_t size;
5095 if (!count)
5096 return 0;
5098 mutex_lock(&trace_types_lock);
5100 #ifdef CONFIG_TRACER_MAX_TRACE
5101 if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5102 size = -EBUSY;
5103 goto out_unlock;
5105 #endif
5107 if (!info->spare)
5108 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5109 iter->cpu_file);
5110 size = -ENOMEM;
5111 if (!info->spare)
5112 goto out_unlock;
5114 /* Do we have previous read data to read? */
5115 if (info->read < PAGE_SIZE)
5116 goto read;
5118 again:
5119 trace_access_lock(iter->cpu_file);
5120 ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5121 &info->spare,
5122 count,
5123 iter->cpu_file, 0);
5124 trace_access_unlock(iter->cpu_file);
5126 if (ret < 0) {
5127 if (trace_empty(iter)) {
5128 if ((filp->f_flags & O_NONBLOCK)) {
5129 size = -EAGAIN;
5130 goto out_unlock;
5132 mutex_unlock(&trace_types_lock);
5133 ret = iter->trace->wait_pipe(iter);
5134 mutex_lock(&trace_types_lock);
5135 if (ret) {
5136 size = ret;
5137 goto out_unlock;
5139 if (signal_pending(current)) {
5140 size = -EINTR;
5141 goto out_unlock;
5143 goto again;
5145 size = 0;
5146 goto out_unlock;
5149 info->read = 0;
5150 read:
5151 size = PAGE_SIZE - info->read;
5152 if (size > count)
5153 size = count;
5155 ret = copy_to_user(ubuf, info->spare + info->read, size);
5156 if (ret == size) {
5157 size = -EFAULT;
5158 goto out_unlock;
5160 size -= ret;
5162 *ppos += size;
5163 info->read += size;
5165 out_unlock:
5166 mutex_unlock(&trace_types_lock);
5168 return size;
5171 static int tracing_buffers_release(struct inode *inode, struct file *file)
5173 struct ftrace_buffer_info *info = file->private_data;
5174 struct trace_iterator *iter = &info->iter;
5176 mutex_lock(&trace_types_lock);
5178 __trace_array_put(iter->tr);
5180 if (info->spare)
5181 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5182 kfree(info);
5184 mutex_unlock(&trace_types_lock);
5186 return 0;
5189 struct buffer_ref {
5190 struct ring_buffer *buffer;
5191 void *page;
5192 int ref;
5195 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5196 struct pipe_buffer *buf)
5198 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5200 if (--ref->ref)
5201 return;
5203 ring_buffer_free_read_page(ref->buffer, ref->page);
5204 kfree(ref);
5205 buf->private = 0;
5208 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5209 struct pipe_buffer *buf)
5211 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5213 ref->ref++;
5216 /* Pipe buffer operations for a buffer. */
5217 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5218 .can_merge = 0,
5219 .map = generic_pipe_buf_map,
5220 .unmap = generic_pipe_buf_unmap,
5221 .confirm = generic_pipe_buf_confirm,
5222 .release = buffer_pipe_buf_release,
5223 .steal = generic_pipe_buf_steal,
5224 .get = buffer_pipe_buf_get,
5228 * Callback from splice_to_pipe(), if we need to release some pages
5229 * at the end of the spd in case we error'ed out in filling the pipe.
5231 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5233 struct buffer_ref *ref =
5234 (struct buffer_ref *)spd->partial[i].private;
5236 if (--ref->ref)
5237 return;
5239 ring_buffer_free_read_page(ref->buffer, ref->page);
5240 kfree(ref);
5241 spd->partial[i].private = 0;
5244 static ssize_t
5245 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5246 struct pipe_inode_info *pipe, size_t len,
5247 unsigned int flags)
5249 struct ftrace_buffer_info *info = file->private_data;
5250 struct trace_iterator *iter = &info->iter;
5251 struct partial_page partial_def[PIPE_DEF_BUFFERS];
5252 struct page *pages_def[PIPE_DEF_BUFFERS];
5253 struct splice_pipe_desc spd = {
5254 .pages = pages_def,
5255 .partial = partial_def,
5256 .nr_pages_max = PIPE_DEF_BUFFERS,
5257 .flags = flags,
5258 .ops = &buffer_pipe_buf_ops,
5259 .spd_release = buffer_spd_release,
5261 struct buffer_ref *ref;
5262 int entries, size, i;
5263 ssize_t ret;
5265 mutex_lock(&trace_types_lock);
5267 #ifdef CONFIG_TRACER_MAX_TRACE
5268 if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5269 ret = -EBUSY;
5270 goto out;
5272 #endif
5274 if (splice_grow_spd(pipe, &spd)) {
5275 ret = -ENOMEM;
5276 goto out;
5279 if (*ppos & (PAGE_SIZE - 1)) {
5280 ret = -EINVAL;
5281 goto out;
5284 if (len & (PAGE_SIZE - 1)) {
5285 if (len < PAGE_SIZE) {
5286 ret = -EINVAL;
5287 goto out;
5289 len &= PAGE_MASK;
5292 again:
5293 trace_access_lock(iter->cpu_file);
5294 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5296 for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
5297 struct page *page;
5298 int r;
5300 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5301 if (!ref)
5302 break;
5304 ref->ref = 1;
5305 ref->buffer = iter->trace_buffer->buffer;
5306 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5307 if (!ref->page) {
5308 kfree(ref);
5309 break;
5312 r = ring_buffer_read_page(ref->buffer, &ref->page,
5313 len, iter->cpu_file, 1);
5314 if (r < 0) {
5315 ring_buffer_free_read_page(ref->buffer, ref->page);
5316 kfree(ref);
5317 break;
5321 * zero out any left over data, this is going to
5322 * user land.
5324 size = ring_buffer_page_len(ref->page);
5325 if (size < PAGE_SIZE)
5326 memset(ref->page + size, 0, PAGE_SIZE - size);
5328 page = virt_to_page(ref->page);
5330 spd.pages[i] = page;
5331 spd.partial[i].len = PAGE_SIZE;
5332 spd.partial[i].offset = 0;
5333 spd.partial[i].private = (unsigned long)ref;
5334 spd.nr_pages++;
5335 *ppos += PAGE_SIZE;
5337 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5340 trace_access_unlock(iter->cpu_file);
5341 spd.nr_pages = i;
5343 /* did we read anything? */
5344 if (!spd.nr_pages) {
5345 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5346 ret = -EAGAIN;
5347 goto out;
5349 mutex_unlock(&trace_types_lock);
5350 ret = iter->trace->wait_pipe(iter);
5351 mutex_lock(&trace_types_lock);
5352 if (ret)
5353 goto out;
5354 if (signal_pending(current)) {
5355 ret = -EINTR;
5356 goto out;
5358 goto again;
5361 ret = splice_to_pipe(pipe, &spd);
5362 splice_shrink_spd(&spd);
5363 out:
5364 mutex_unlock(&trace_types_lock);
5366 return ret;
5369 static const struct file_operations tracing_buffers_fops = {
5370 .open = tracing_buffers_open,
5371 .read = tracing_buffers_read,
5372 .poll = tracing_buffers_poll,
5373 .release = tracing_buffers_release,
5374 .splice_read = tracing_buffers_splice_read,
5375 .llseek = no_llseek,
5378 static ssize_t
5379 tracing_stats_read(struct file *filp, char __user *ubuf,
5380 size_t count, loff_t *ppos)
5382 struct inode *inode = file_inode(filp);
5383 struct trace_array *tr = inode->i_private;
5384 struct trace_buffer *trace_buf = &tr->trace_buffer;
5385 int cpu = tracing_get_cpu(inode);
5386 struct trace_seq *s;
5387 unsigned long cnt;
5388 unsigned long long t;
5389 unsigned long usec_rem;
5391 s = kmalloc(sizeof(*s), GFP_KERNEL);
5392 if (!s)
5393 return -ENOMEM;
5395 trace_seq_init(s);
5397 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5398 trace_seq_printf(s, "entries: %ld\n", cnt);
5400 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5401 trace_seq_printf(s, "overrun: %ld\n", cnt);
5403 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5404 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5406 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5407 trace_seq_printf(s, "bytes: %ld\n", cnt);
5409 if (trace_clocks[tr->clock_id].in_ns) {
5410 /* local or global for trace_clock */
5411 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5412 usec_rem = do_div(t, USEC_PER_SEC);
5413 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5414 t, usec_rem);
5416 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5417 usec_rem = do_div(t, USEC_PER_SEC);
5418 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5419 } else {
5420 /* counter or tsc mode for trace_clock */
5421 trace_seq_printf(s, "oldest event ts: %llu\n",
5422 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5424 trace_seq_printf(s, "now ts: %llu\n",
5425 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5428 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5429 trace_seq_printf(s, "dropped events: %ld\n", cnt);
5431 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5432 trace_seq_printf(s, "read events: %ld\n", cnt);
5434 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5436 kfree(s);
5438 return count;
5441 static const struct file_operations tracing_stats_fops = {
5442 .open = tracing_open_generic_tr,
5443 .read = tracing_stats_read,
5444 .llseek = generic_file_llseek,
5445 .release = tracing_release_generic_tr,
5448 #ifdef CONFIG_DYNAMIC_FTRACE
5450 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5452 return 0;
5455 static ssize_t
5456 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5457 size_t cnt, loff_t *ppos)
5459 static char ftrace_dyn_info_buffer[1024];
5460 static DEFINE_MUTEX(dyn_info_mutex);
5461 unsigned long *p = filp->private_data;
5462 char *buf = ftrace_dyn_info_buffer;
5463 int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5464 int r;
5466 mutex_lock(&dyn_info_mutex);
5467 r = sprintf(buf, "%ld ", *p);
5469 r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5470 buf[r++] = '\n';
5472 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5474 mutex_unlock(&dyn_info_mutex);
5476 return r;
5479 static const struct file_operations tracing_dyn_info_fops = {
5480 .open = tracing_open_generic,
5481 .read = tracing_read_dyn_info,
5482 .llseek = generic_file_llseek,
5484 #endif /* CONFIG_DYNAMIC_FTRACE */
5486 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5487 static void
5488 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5490 tracing_snapshot();
5493 static void
5494 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5496 unsigned long *count = (long *)data;
5498 if (!*count)
5499 return;
5501 if (*count != -1)
5502 (*count)--;
5504 tracing_snapshot();
5507 static int
5508 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5509 struct ftrace_probe_ops *ops, void *data)
5511 long count = (long)data;
5513 seq_printf(m, "%ps:", (void *)ip);
5515 seq_printf(m, "snapshot");
5517 if (count == -1)
5518 seq_printf(m, ":unlimited\n");
5519 else
5520 seq_printf(m, ":count=%ld\n", count);
5522 return 0;
5525 static struct ftrace_probe_ops snapshot_probe_ops = {
5526 .func = ftrace_snapshot,
5527 .print = ftrace_snapshot_print,
5530 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5531 .func = ftrace_count_snapshot,
5532 .print = ftrace_snapshot_print,
5535 static int
5536 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5537 char *glob, char *cmd, char *param, int enable)
5539 struct ftrace_probe_ops *ops;
5540 void *count = (void *)-1;
5541 char *number;
5542 int ret;
5544 /* hash funcs only work with set_ftrace_filter */
5545 if (!enable)
5546 return -EINVAL;
5548 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
5550 if (glob[0] == '!') {
5551 unregister_ftrace_function_probe_func(glob+1, ops);
5552 return 0;
5555 if (!param)
5556 goto out_reg;
5558 number = strsep(&param, ":");
5560 if (!strlen(number))
5561 goto out_reg;
5564 * We use the callback data field (which is a pointer)
5565 * as our counter.
5567 ret = kstrtoul(number, 0, (unsigned long *)&count);
5568 if (ret)
5569 return ret;
5571 out_reg:
5572 ret = register_ftrace_function_probe(glob, ops, count);
5574 if (ret >= 0)
5575 alloc_snapshot(&global_trace);
5577 return ret < 0 ? ret : 0;
5580 static struct ftrace_func_command ftrace_snapshot_cmd = {
5581 .name = "snapshot",
5582 .func = ftrace_trace_snapshot_callback,
5585 static __init int register_snapshot_cmd(void)
5587 return register_ftrace_command(&ftrace_snapshot_cmd);
5589 #else
5590 static inline __init int register_snapshot_cmd(void) { return 0; }
5591 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5593 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5595 if (tr->dir)
5596 return tr->dir;
5598 if (!debugfs_initialized())
5599 return NULL;
5601 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5602 tr->dir = debugfs_create_dir("tracing", NULL);
5604 if (!tr->dir)
5605 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5607 return tr->dir;
5610 struct dentry *tracing_init_dentry(void)
5612 return tracing_init_dentry_tr(&global_trace);
5615 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5617 struct dentry *d_tracer;
5619 if (tr->percpu_dir)
5620 return tr->percpu_dir;
5622 d_tracer = tracing_init_dentry_tr(tr);
5623 if (!d_tracer)
5624 return NULL;
5626 tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5628 WARN_ONCE(!tr->percpu_dir,
5629 "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5631 return tr->percpu_dir;
5634 static struct dentry *
5635 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5636 void *data, long cpu, const struct file_operations *fops)
5638 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5640 if (ret) /* See tracing_get_cpu() */
5641 ret->d_inode->i_cdev = (void *)(cpu + 1);
5642 return ret;
5645 static void
5646 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5648 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5649 struct dentry *d_cpu;
5650 char cpu_dir[30]; /* 30 characters should be more than enough */
5652 if (!d_percpu)
5653 return;
5655 snprintf(cpu_dir, 30, "cpu%ld", cpu);
5656 d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5657 if (!d_cpu) {
5658 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5659 return;
5662 /* per cpu trace_pipe */
5663 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5664 tr, cpu, &tracing_pipe_fops);
5666 /* per cpu trace */
5667 trace_create_cpu_file("trace", 0644, d_cpu,
5668 tr, cpu, &tracing_fops);
5670 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5671 tr, cpu, &tracing_buffers_fops);
5673 trace_create_cpu_file("stats", 0444, d_cpu,
5674 tr, cpu, &tracing_stats_fops);
5676 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5677 tr, cpu, &tracing_entries_fops);
5679 #ifdef CONFIG_TRACER_SNAPSHOT
5680 trace_create_cpu_file("snapshot", 0644, d_cpu,
5681 tr, cpu, &snapshot_fops);
5683 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5684 tr, cpu, &snapshot_raw_fops);
5685 #endif
5688 #ifdef CONFIG_FTRACE_SELFTEST
5689 /* Let selftest have access to static functions in this file */
5690 #include "trace_selftest.c"
5691 #endif
5693 struct trace_option_dentry {
5694 struct tracer_opt *opt;
5695 struct tracer_flags *flags;
5696 struct trace_array *tr;
5697 struct dentry *entry;
5700 static ssize_t
5701 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5702 loff_t *ppos)
5704 struct trace_option_dentry *topt = filp->private_data;
5705 char *buf;
5707 if (topt->flags->val & topt->opt->bit)
5708 buf = "1\n";
5709 else
5710 buf = "0\n";
5712 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5715 static ssize_t
5716 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5717 loff_t *ppos)
5719 struct trace_option_dentry *topt = filp->private_data;
5720 unsigned long val;
5721 int ret;
5723 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5724 if (ret)
5725 return ret;
5727 if (val != 0 && val != 1)
5728 return -EINVAL;
5730 if (!!(topt->flags->val & topt->opt->bit) != val) {
5731 mutex_lock(&trace_types_lock);
5732 ret = __set_tracer_option(topt->tr->current_trace, topt->flags,
5733 topt->opt, !val);
5734 mutex_unlock(&trace_types_lock);
5735 if (ret)
5736 return ret;
5739 *ppos += cnt;
5741 return cnt;
5745 static const struct file_operations trace_options_fops = {
5746 .open = tracing_open_generic,
5747 .read = trace_options_read,
5748 .write = trace_options_write,
5749 .llseek = generic_file_llseek,
5752 static ssize_t
5753 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5754 loff_t *ppos)
5756 long index = (long)filp->private_data;
5757 char *buf;
5759 if (trace_flags & (1 << index))
5760 buf = "1\n";
5761 else
5762 buf = "0\n";
5764 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5767 static ssize_t
5768 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5769 loff_t *ppos)
5771 struct trace_array *tr = &global_trace;
5772 long index = (long)filp->private_data;
5773 unsigned long val;
5774 int ret;
5776 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5777 if (ret)
5778 return ret;
5780 if (val != 0 && val != 1)
5781 return -EINVAL;
5783 mutex_lock(&trace_types_lock);
5784 ret = set_tracer_flag(tr, 1 << index, val);
5785 mutex_unlock(&trace_types_lock);
5787 if (ret < 0)
5788 return ret;
5790 *ppos += cnt;
5792 return cnt;
5795 static const struct file_operations trace_options_core_fops = {
5796 .open = tracing_open_generic,
5797 .read = trace_options_core_read,
5798 .write = trace_options_core_write,
5799 .llseek = generic_file_llseek,
5802 struct dentry *trace_create_file(const char *name,
5803 umode_t mode,
5804 struct dentry *parent,
5805 void *data,
5806 const struct file_operations *fops)
5808 struct dentry *ret;
5810 ret = debugfs_create_file(name, mode, parent, data, fops);
5811 if (!ret)
5812 pr_warning("Could not create debugfs '%s' entry\n", name);
5814 return ret;
5818 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5820 struct dentry *d_tracer;
5822 if (tr->options)
5823 return tr->options;
5825 d_tracer = tracing_init_dentry_tr(tr);
5826 if (!d_tracer)
5827 return NULL;
5829 tr->options = debugfs_create_dir("options", d_tracer);
5830 if (!tr->options) {
5831 pr_warning("Could not create debugfs directory 'options'\n");
5832 return NULL;
5835 return tr->options;
5838 static void
5839 create_trace_option_file(struct trace_array *tr,
5840 struct trace_option_dentry *topt,
5841 struct tracer_flags *flags,
5842 struct tracer_opt *opt)
5844 struct dentry *t_options;
5846 t_options = trace_options_init_dentry(tr);
5847 if (!t_options)
5848 return;
5850 topt->flags = flags;
5851 topt->opt = opt;
5852 topt->tr = tr;
5854 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
5855 &trace_options_fops);
5859 static struct trace_option_dentry *
5860 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
5862 struct trace_option_dentry *topts;
5863 struct tracer_flags *flags;
5864 struct tracer_opt *opts;
5865 int cnt;
5867 if (!tracer)
5868 return NULL;
5870 flags = tracer->flags;
5872 if (!flags || !flags->opts)
5873 return NULL;
5875 opts = flags->opts;
5877 for (cnt = 0; opts[cnt].name; cnt++)
5880 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
5881 if (!topts)
5882 return NULL;
5884 for (cnt = 0; opts[cnt].name; cnt++)
5885 create_trace_option_file(tr, &topts[cnt], flags,
5886 &opts[cnt]);
5888 return topts;
5891 static void
5892 destroy_trace_option_files(struct trace_option_dentry *topts)
5894 int cnt;
5896 if (!topts)
5897 return;
5899 for (cnt = 0; topts[cnt].opt; cnt++) {
5900 if (topts[cnt].entry)
5901 debugfs_remove(topts[cnt].entry);
5904 kfree(topts);
5907 static struct dentry *
5908 create_trace_option_core_file(struct trace_array *tr,
5909 const char *option, long index)
5911 struct dentry *t_options;
5913 t_options = trace_options_init_dentry(tr);
5914 if (!t_options)
5915 return NULL;
5917 return trace_create_file(option, 0644, t_options, (void *)index,
5918 &trace_options_core_fops);
5921 static __init void create_trace_options_dir(struct trace_array *tr)
5923 struct dentry *t_options;
5924 int i;
5926 t_options = trace_options_init_dentry(tr);
5927 if (!t_options)
5928 return;
5930 for (i = 0; trace_options[i]; i++)
5931 create_trace_option_core_file(tr, trace_options[i], i);
5934 static ssize_t
5935 rb_simple_read(struct file *filp, char __user *ubuf,
5936 size_t cnt, loff_t *ppos)
5938 struct trace_array *tr = filp->private_data;
5939 char buf[64];
5940 int r;
5942 r = tracer_tracing_is_on(tr);
5943 r = sprintf(buf, "%d\n", r);
5945 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5948 static ssize_t
5949 rb_simple_write(struct file *filp, const char __user *ubuf,
5950 size_t cnt, loff_t *ppos)
5952 struct trace_array *tr = filp->private_data;
5953 struct ring_buffer *buffer = tr->trace_buffer.buffer;
5954 unsigned long val;
5955 int ret;
5957 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5958 if (ret)
5959 return ret;
5961 if (buffer) {
5962 mutex_lock(&trace_types_lock);
5963 if (val) {
5964 tracer_tracing_on(tr);
5965 if (tr->current_trace->start)
5966 tr->current_trace->start(tr);
5967 } else {
5968 tracer_tracing_off(tr);
5969 if (tr->current_trace->stop)
5970 tr->current_trace->stop(tr);
5972 mutex_unlock(&trace_types_lock);
5975 (*ppos)++;
5977 return cnt;
5980 static const struct file_operations rb_simple_fops = {
5981 .open = tracing_open_generic_tr,
5982 .read = rb_simple_read,
5983 .write = rb_simple_write,
5984 .release = tracing_release_generic_tr,
5985 .llseek = default_llseek,
5988 struct dentry *trace_instance_dir;
5990 static void
5991 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
5993 static int
5994 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
5996 enum ring_buffer_flags rb_flags;
5998 rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6000 buf->tr = tr;
6002 buf->buffer = ring_buffer_alloc(size, rb_flags);
6003 if (!buf->buffer)
6004 return -ENOMEM;
6006 buf->data = alloc_percpu(struct trace_array_cpu);
6007 if (!buf->data) {
6008 ring_buffer_free(buf->buffer);
6009 return -ENOMEM;
6012 /* Allocate the first page for all buffers */
6013 set_buffer_entries(&tr->trace_buffer,
6014 ring_buffer_size(tr->trace_buffer.buffer, 0));
6016 return 0;
6019 static int allocate_trace_buffers(struct trace_array *tr, int size)
6021 int ret;
6023 ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6024 if (ret)
6025 return ret;
6027 #ifdef CONFIG_TRACER_MAX_TRACE
6028 ret = allocate_trace_buffer(tr, &tr->max_buffer,
6029 allocate_snapshot ? size : 1);
6030 if (WARN_ON(ret)) {
6031 ring_buffer_free(tr->trace_buffer.buffer);
6032 free_percpu(tr->trace_buffer.data);
6033 return -ENOMEM;
6035 tr->allocated_snapshot = allocate_snapshot;
6038 * Only the top level trace array gets its snapshot allocated
6039 * from the kernel command line.
6041 allocate_snapshot = false;
6042 #endif
6043 return 0;
6046 static int new_instance_create(const char *name)
6048 struct trace_array *tr;
6049 int ret;
6051 mutex_lock(&trace_types_lock);
6053 ret = -EEXIST;
6054 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6055 if (tr->name && strcmp(tr->name, name) == 0)
6056 goto out_unlock;
6059 ret = -ENOMEM;
6060 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6061 if (!tr)
6062 goto out_unlock;
6064 tr->name = kstrdup(name, GFP_KERNEL);
6065 if (!tr->name)
6066 goto out_free_tr;
6068 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6069 goto out_free_tr;
6071 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6073 raw_spin_lock_init(&tr->start_lock);
6075 tr->current_trace = &nop_trace;
6077 INIT_LIST_HEAD(&tr->systems);
6078 INIT_LIST_HEAD(&tr->events);
6080 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6081 goto out_free_tr;
6083 tr->dir = debugfs_create_dir(name, trace_instance_dir);
6084 if (!tr->dir)
6085 goto out_free_tr;
6087 ret = event_trace_add_tracer(tr->dir, tr);
6088 if (ret) {
6089 debugfs_remove_recursive(tr->dir);
6090 goto out_free_tr;
6093 init_tracer_debugfs(tr, tr->dir);
6095 list_add(&tr->list, &ftrace_trace_arrays);
6097 mutex_unlock(&trace_types_lock);
6099 return 0;
6101 out_free_tr:
6102 if (tr->trace_buffer.buffer)
6103 ring_buffer_free(tr->trace_buffer.buffer);
6104 free_cpumask_var(tr->tracing_cpumask);
6105 kfree(tr->name);
6106 kfree(tr);
6108 out_unlock:
6109 mutex_unlock(&trace_types_lock);
6111 return ret;
6115 static int instance_delete(const char *name)
6117 struct trace_array *tr;
6118 int found = 0;
6119 int ret;
6121 mutex_lock(&trace_types_lock);
6123 ret = -ENODEV;
6124 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6125 if (tr->name && strcmp(tr->name, name) == 0) {
6126 found = 1;
6127 break;
6130 if (!found)
6131 goto out_unlock;
6133 ret = -EBUSY;
6134 if (tr->ref)
6135 goto out_unlock;
6137 list_del(&tr->list);
6139 event_trace_del_tracer(tr);
6140 debugfs_remove_recursive(tr->dir);
6141 free_percpu(tr->trace_buffer.data);
6142 ring_buffer_free(tr->trace_buffer.buffer);
6144 kfree(tr->name);
6145 kfree(tr);
6147 ret = 0;
6149 out_unlock:
6150 mutex_unlock(&trace_types_lock);
6152 return ret;
6155 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6157 struct dentry *parent;
6158 int ret;
6160 /* Paranoid: Make sure the parent is the "instances" directory */
6161 parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6162 if (WARN_ON_ONCE(parent != trace_instance_dir))
6163 return -ENOENT;
6166 * The inode mutex is locked, but debugfs_create_dir() will also
6167 * take the mutex. As the instances directory can not be destroyed
6168 * or changed in any other way, it is safe to unlock it, and
6169 * let the dentry try. If two users try to make the same dir at
6170 * the same time, then the new_instance_create() will determine the
6171 * winner.
6173 mutex_unlock(&inode->i_mutex);
6175 ret = new_instance_create(dentry->d_iname);
6177 mutex_lock(&inode->i_mutex);
6179 return ret;
6182 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6184 struct dentry *parent;
6185 int ret;
6187 /* Paranoid: Make sure the parent is the "instances" directory */
6188 parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6189 if (WARN_ON_ONCE(parent != trace_instance_dir))
6190 return -ENOENT;
6192 /* The caller did a dget() on dentry */
6193 mutex_unlock(&dentry->d_inode->i_mutex);
6196 * The inode mutex is locked, but debugfs_create_dir() will also
6197 * take the mutex. As the instances directory can not be destroyed
6198 * or changed in any other way, it is safe to unlock it, and
6199 * let the dentry try. If two users try to make the same dir at
6200 * the same time, then the instance_delete() will determine the
6201 * winner.
6203 mutex_unlock(&inode->i_mutex);
6205 ret = instance_delete(dentry->d_iname);
6207 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6208 mutex_lock(&dentry->d_inode->i_mutex);
6210 return ret;
6213 static const struct inode_operations instance_dir_inode_operations = {
6214 .lookup = simple_lookup,
6215 .mkdir = instance_mkdir,
6216 .rmdir = instance_rmdir,
6219 static __init void create_trace_instances(struct dentry *d_tracer)
6221 trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6222 if (WARN_ON(!trace_instance_dir))
6223 return;
6225 /* Hijack the dir inode operations, to allow mkdir */
6226 trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6229 static void
6230 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6232 int cpu;
6234 trace_create_file("tracing_cpumask", 0644, d_tracer,
6235 tr, &tracing_cpumask_fops);
6237 trace_create_file("trace_options", 0644, d_tracer,
6238 tr, &tracing_iter_fops);
6240 trace_create_file("trace", 0644, d_tracer,
6241 tr, &tracing_fops);
6243 trace_create_file("trace_pipe", 0444, d_tracer,
6244 tr, &tracing_pipe_fops);
6246 trace_create_file("buffer_size_kb", 0644, d_tracer,
6247 tr, &tracing_entries_fops);
6249 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6250 tr, &tracing_total_entries_fops);
6252 trace_create_file("free_buffer", 0200, d_tracer,
6253 tr, &tracing_free_buffer_fops);
6255 trace_create_file("trace_marker", 0220, d_tracer,
6256 tr, &tracing_mark_fops);
6258 trace_create_file("trace_clock", 0644, d_tracer, tr,
6259 &trace_clock_fops);
6261 trace_create_file("tracing_on", 0644, d_tracer,
6262 tr, &rb_simple_fops);
6264 #ifdef CONFIG_TRACER_SNAPSHOT
6265 trace_create_file("snapshot", 0644, d_tracer,
6266 tr, &snapshot_fops);
6267 #endif
6269 for_each_tracing_cpu(cpu)
6270 tracing_init_debugfs_percpu(tr, cpu);
6274 static __init int tracer_init_debugfs(void)
6276 struct dentry *d_tracer;
6278 trace_access_lock_init();
6280 d_tracer = tracing_init_dentry();
6281 if (!d_tracer)
6282 return 0;
6284 init_tracer_debugfs(&global_trace, d_tracer);
6286 trace_create_file("available_tracers", 0444, d_tracer,
6287 &global_trace, &show_traces_fops);
6289 trace_create_file("current_tracer", 0644, d_tracer,
6290 &global_trace, &set_tracer_fops);
6292 #ifdef CONFIG_TRACER_MAX_TRACE
6293 trace_create_file("tracing_max_latency", 0644, d_tracer,
6294 &tracing_max_latency, &tracing_max_lat_fops);
6295 #endif
6297 trace_create_file("tracing_thresh", 0644, d_tracer,
6298 &tracing_thresh, &tracing_max_lat_fops);
6300 trace_create_file("README", 0444, d_tracer,
6301 NULL, &tracing_readme_fops);
6303 trace_create_file("saved_cmdlines", 0444, d_tracer,
6304 NULL, &tracing_saved_cmdlines_fops);
6306 #ifdef CONFIG_DYNAMIC_FTRACE
6307 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6308 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6309 #endif
6311 create_trace_instances(d_tracer);
6313 create_trace_options_dir(&global_trace);
6315 return 0;
6318 static int trace_panic_handler(struct notifier_block *this,
6319 unsigned long event, void *unused)
6321 if (ftrace_dump_on_oops)
6322 ftrace_dump(ftrace_dump_on_oops);
6323 return NOTIFY_OK;
6326 static struct notifier_block trace_panic_notifier = {
6327 .notifier_call = trace_panic_handler,
6328 .next = NULL,
6329 .priority = 150 /* priority: INT_MAX >= x >= 0 */
6332 static int trace_die_handler(struct notifier_block *self,
6333 unsigned long val,
6334 void *data)
6336 switch (val) {
6337 case DIE_OOPS:
6338 if (ftrace_dump_on_oops)
6339 ftrace_dump(ftrace_dump_on_oops);
6340 break;
6341 default:
6342 break;
6344 return NOTIFY_OK;
6347 static struct notifier_block trace_die_notifier = {
6348 .notifier_call = trace_die_handler,
6349 .priority = 200
6353 * printk is set to max of 1024, we really don't need it that big.
6354 * Nothing should be printing 1000 characters anyway.
6356 #define TRACE_MAX_PRINT 1000
6359 * Define here KERN_TRACE so that we have one place to modify
6360 * it if we decide to change what log level the ftrace dump
6361 * should be at.
6363 #define KERN_TRACE KERN_EMERG
6365 void
6366 trace_printk_seq(struct trace_seq *s)
6368 /* Probably should print a warning here. */
6369 if (s->len >= TRACE_MAX_PRINT)
6370 s->len = TRACE_MAX_PRINT;
6372 /* should be zero ended, but we are paranoid. */
6373 s->buffer[s->len] = 0;
6375 printk(KERN_TRACE "%s", s->buffer);
6377 trace_seq_init(s);
6380 void trace_init_global_iter(struct trace_iterator *iter)
6382 iter->tr = &global_trace;
6383 iter->trace = iter->tr->current_trace;
6384 iter->cpu_file = RING_BUFFER_ALL_CPUS;
6385 iter->trace_buffer = &global_trace.trace_buffer;
6387 if (iter->trace && iter->trace->open)
6388 iter->trace->open(iter);
6390 /* Annotate start of buffers if we had overruns */
6391 if (ring_buffer_overruns(iter->trace_buffer->buffer))
6392 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6394 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6395 if (trace_clocks[iter->tr->clock_id].in_ns)
6396 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6399 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6401 /* use static because iter can be a bit big for the stack */
6402 static struct trace_iterator iter;
6403 static atomic_t dump_running;
6404 unsigned int old_userobj;
6405 unsigned long flags;
6406 int cnt = 0, cpu;
6408 /* Only allow one dump user at a time. */
6409 if (atomic_inc_return(&dump_running) != 1) {
6410 atomic_dec(&dump_running);
6411 return;
6415 * Always turn off tracing when we dump.
6416 * We don't need to show trace output of what happens
6417 * between multiple crashes.
6419 * If the user does a sysrq-z, then they can re-enable
6420 * tracing with echo 1 > tracing_on.
6422 tracing_off();
6424 local_irq_save(flags);
6426 /* Simulate the iterator */
6427 trace_init_global_iter(&iter);
6429 for_each_tracing_cpu(cpu) {
6430 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6433 old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6435 /* don't look at user memory in panic mode */
6436 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6438 switch (oops_dump_mode) {
6439 case DUMP_ALL:
6440 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6441 break;
6442 case DUMP_ORIG:
6443 iter.cpu_file = raw_smp_processor_id();
6444 break;
6445 case DUMP_NONE:
6446 goto out_enable;
6447 default:
6448 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6449 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6452 printk(KERN_TRACE "Dumping ftrace buffer:\n");
6454 /* Did function tracer already get disabled? */
6455 if (ftrace_is_dead()) {
6456 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6457 printk("# MAY BE MISSING FUNCTION EVENTS\n");
6461 * We need to stop all tracing on all CPUS to read the
6462 * the next buffer. This is a bit expensive, but is
6463 * not done often. We fill all what we can read,
6464 * and then release the locks again.
6467 while (!trace_empty(&iter)) {
6469 if (!cnt)
6470 printk(KERN_TRACE "---------------------------------\n");
6472 cnt++;
6474 /* reset all but tr, trace, and overruns */
6475 memset(&iter.seq, 0,
6476 sizeof(struct trace_iterator) -
6477 offsetof(struct trace_iterator, seq));
6478 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6479 iter.pos = -1;
6481 if (trace_find_next_entry_inc(&iter) != NULL) {
6482 int ret;
6484 ret = print_trace_line(&iter);
6485 if (ret != TRACE_TYPE_NO_CONSUME)
6486 trace_consume(&iter);
6488 touch_nmi_watchdog();
6490 trace_printk_seq(&iter.seq);
6493 if (!cnt)
6494 printk(KERN_TRACE " (ftrace buffer empty)\n");
6495 else
6496 printk(KERN_TRACE "---------------------------------\n");
6498 out_enable:
6499 trace_flags |= old_userobj;
6501 for_each_tracing_cpu(cpu) {
6502 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6504 atomic_dec(&dump_running);
6505 local_irq_restore(flags);
6507 EXPORT_SYMBOL_GPL(ftrace_dump);
6509 __init static int tracer_alloc_buffers(void)
6511 int ring_buf_size;
6512 int ret = -ENOMEM;
6515 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6516 goto out;
6518 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6519 goto out_free_buffer_mask;
6521 /* Only allocate trace_printk buffers if a trace_printk exists */
6522 if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6523 /* Must be called before global_trace.buffer is allocated */
6524 trace_printk_init_buffers();
6526 /* To save memory, keep the ring buffer size to its minimum */
6527 if (ring_buffer_expanded)
6528 ring_buf_size = trace_buf_size;
6529 else
6530 ring_buf_size = 1;
6532 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6533 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6535 raw_spin_lock_init(&global_trace.start_lock);
6537 /* Used for event triggers */
6538 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6539 if (!temp_buffer)
6540 goto out_free_cpumask;
6542 /* TODO: make the number of buffers hot pluggable with CPUS */
6543 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6544 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6545 WARN_ON(1);
6546 goto out_free_temp_buffer;
6549 if (global_trace.buffer_disabled)
6550 tracing_off();
6552 trace_init_cmdlines();
6555 * register_tracer() might reference current_trace, so it
6556 * needs to be set before we register anything. This is
6557 * just a bootstrap of current_trace anyway.
6559 global_trace.current_trace = &nop_trace;
6561 register_tracer(&nop_trace);
6563 /* All seems OK, enable tracing */
6564 tracing_disabled = 0;
6566 atomic_notifier_chain_register(&panic_notifier_list,
6567 &trace_panic_notifier);
6569 register_die_notifier(&trace_die_notifier);
6571 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6573 INIT_LIST_HEAD(&global_trace.systems);
6574 INIT_LIST_HEAD(&global_trace.events);
6575 list_add(&global_trace.list, &ftrace_trace_arrays);
6577 while (trace_boot_options) {
6578 char *option;
6580 option = strsep(&trace_boot_options, ",");
6581 trace_set_options(&global_trace, option);
6584 register_snapshot_cmd();
6586 return 0;
6588 out_free_temp_buffer:
6589 ring_buffer_free(temp_buffer);
6590 out_free_cpumask:
6591 free_percpu(global_trace.trace_buffer.data);
6592 #ifdef CONFIG_TRACER_MAX_TRACE
6593 free_percpu(global_trace.max_buffer.data);
6594 #endif
6595 free_cpumask_var(global_trace.tracing_cpumask);
6596 out_free_buffer_mask:
6597 free_cpumask_var(tracing_buffer_mask);
6598 out:
6599 return ret;
6602 __init static int clear_boot_tracer(void)
6605 * The default tracer at boot buffer is an init section.
6606 * This function is called in lateinit. If we did not
6607 * find the boot tracer, then clear it out, to prevent
6608 * later registration from accessing the buffer that is
6609 * about to be freed.
6611 if (!default_bootup_tracer)
6612 return 0;
6614 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6615 default_bootup_tracer);
6616 default_bootup_tracer = NULL;
6618 return 0;
6621 early_initcall(tracer_alloc_buffers);
6622 fs_initcall(tracer_init_debugfs);
6623 late_initcall(clear_boot_tracer);