1 // SPDX-License-Identifier: GPL-2.0
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/drv_configs.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/symbol.h"
30 #include "util/cpumap.h"
31 #include "util/thread_map.h"
32 #include "util/data.h"
33 #include "util/perf_regs.h"
34 #include "util/auxtrace.h"
36 #include "util/parse-branch-options.h"
37 #include "util/parse-regs-options.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
55 #include <linux/time64.h>
57 struct switch_output
{
67 struct perf_tool tool
;
68 struct record_opts opts
;
70 struct perf_data data
;
71 struct auxtrace_record
*itr
;
72 struct perf_evlist
*evlist
;
73 struct perf_session
*session
;
77 bool no_buildid_cache
;
78 bool no_buildid_cache_set
;
80 bool timestamp_filename
;
81 bool timestamp_boundary
;
82 struct switch_output switch_output
;
83 unsigned long long samples
;
86 static volatile int auxtrace_record__snapshot_started
;
87 static DEFINE_TRIGGER(auxtrace_snapshot_trigger
);
88 static DEFINE_TRIGGER(switch_output_trigger
);
90 static bool switch_output_signal(struct record
*rec
)
92 return rec
->switch_output
.signal
&&
93 trigger_is_ready(&switch_output_trigger
);
96 static bool switch_output_size(struct record
*rec
)
98 return rec
->switch_output
.size
&&
99 trigger_is_ready(&switch_output_trigger
) &&
100 (rec
->bytes_written
>= rec
->switch_output
.size
);
103 static bool switch_output_time(struct record
*rec
)
105 return rec
->switch_output
.time
&&
106 trigger_is_ready(&switch_output_trigger
);
109 static int record__write(struct record
*rec
, struct perf_mmap
*map __maybe_unused
,
110 void *bf
, size_t size
)
112 struct perf_data_file
*file
= &rec
->session
->data
->file
;
114 if (perf_data_file__write(file
, bf
, size
) < 0) {
115 pr_err("failed to write perf data, error: %m\n");
119 rec
->bytes_written
+= size
;
121 if (switch_output_size(rec
))
122 trigger_hit(&switch_output_trigger
);
127 static int process_synthesized_event(struct perf_tool
*tool
,
128 union perf_event
*event
,
129 struct perf_sample
*sample __maybe_unused
,
130 struct machine
*machine __maybe_unused
)
132 struct record
*rec
= container_of(tool
, struct record
, tool
);
133 return record__write(rec
, NULL
, event
, event
->header
.size
);
136 static int record__pushfn(struct perf_mmap
*map
, void *to
, void *bf
, size_t size
)
138 struct record
*rec
= to
;
141 return record__write(rec
, map
, bf
, size
);
144 static volatile int done
;
145 static volatile int signr
= -1;
146 static volatile int child_finished
;
148 static void sig_handler(int sig
)
158 static void sigsegv_handler(int sig
)
160 perf_hooks__recover();
161 sighandler_dump_stack(sig
);
164 static void record__sig_exit(void)
169 signal(signr
, SIG_DFL
);
173 #ifdef HAVE_AUXTRACE_SUPPORT
175 static int record__process_auxtrace(struct perf_tool
*tool
,
176 struct perf_mmap
*map
,
177 union perf_event
*event
, void *data1
,
178 size_t len1
, void *data2
, size_t len2
)
180 struct record
*rec
= container_of(tool
, struct record
, tool
);
181 struct perf_data
*data
= &rec
->data
;
185 if (!perf_data__is_pipe(data
)) {
187 int fd
= perf_data__fd(data
);
190 file_offset
= lseek(fd
, 0, SEEK_CUR
);
191 if (file_offset
== -1)
193 err
= auxtrace_index__auxtrace_event(&rec
->session
->auxtrace_index
,
199 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
200 padding
= (len1
+ len2
) & 7;
202 padding
= 8 - padding
;
204 record__write(rec
, map
, event
, event
->header
.size
);
205 record__write(rec
, map
, data1
, len1
);
207 record__write(rec
, map
, data2
, len2
);
208 record__write(rec
, map
, &pad
, padding
);
213 static int record__auxtrace_mmap_read(struct record
*rec
,
214 struct perf_mmap
*map
)
218 ret
= auxtrace_mmap__read(map
, rec
->itr
, &rec
->tool
,
219 record__process_auxtrace
);
229 static int record__auxtrace_mmap_read_snapshot(struct record
*rec
,
230 struct perf_mmap
*map
)
234 ret
= auxtrace_mmap__read_snapshot(map
, rec
->itr
, &rec
->tool
,
235 record__process_auxtrace
,
236 rec
->opts
.auxtrace_snapshot_size
);
246 static int record__auxtrace_read_snapshot_all(struct record
*rec
)
251 for (i
= 0; i
< rec
->evlist
->nr_mmaps
; i
++) {
252 struct perf_mmap
*map
= &rec
->evlist
->mmap
[i
];
254 if (!map
->auxtrace_mmap
.base
)
257 if (record__auxtrace_mmap_read_snapshot(rec
, map
) != 0) {
266 static void record__read_auxtrace_snapshot(struct record
*rec
)
268 pr_debug("Recording AUX area tracing snapshot\n");
269 if (record__auxtrace_read_snapshot_all(rec
) < 0) {
270 trigger_error(&auxtrace_snapshot_trigger
);
272 if (auxtrace_record__snapshot_finish(rec
->itr
))
273 trigger_error(&auxtrace_snapshot_trigger
);
275 trigger_ready(&auxtrace_snapshot_trigger
);
279 static int record__auxtrace_init(struct record
*rec
)
284 rec
->itr
= auxtrace_record__init(rec
->evlist
, &err
);
289 err
= auxtrace_parse_snapshot_options(rec
->itr
, &rec
->opts
,
290 rec
->opts
.auxtrace_snapshot_opts
);
294 return auxtrace_parse_filters(rec
->evlist
);
300 int record__auxtrace_mmap_read(struct record
*rec __maybe_unused
,
301 struct perf_mmap
*map __maybe_unused
)
307 void record__read_auxtrace_snapshot(struct record
*rec __maybe_unused
)
312 int auxtrace_record__snapshot_start(struct auxtrace_record
*itr __maybe_unused
)
317 static int record__auxtrace_init(struct record
*rec __maybe_unused
)
324 static int record__mmap_evlist(struct record
*rec
,
325 struct perf_evlist
*evlist
)
327 struct record_opts
*opts
= &rec
->opts
;
330 if (perf_evlist__mmap_ex(evlist
, opts
->mmap_pages
,
331 opts
->auxtrace_mmap_pages
,
332 opts
->auxtrace_snapshot_mode
) < 0) {
333 if (errno
== EPERM
) {
334 pr_err("Permission error mapping pages.\n"
335 "Consider increasing "
336 "/proc/sys/kernel/perf_event_mlock_kb,\n"
337 "or try again with a smaller value of -m/--mmap_pages.\n"
338 "(current value: %u,%u)\n",
339 opts
->mmap_pages
, opts
->auxtrace_mmap_pages
);
342 pr_err("failed to mmap with %d (%s)\n", errno
,
343 str_error_r(errno
, msg
, sizeof(msg
)));
353 static int record__mmap(struct record
*rec
)
355 return record__mmap_evlist(rec
, rec
->evlist
);
358 static int record__open(struct record
*rec
)
361 struct perf_evsel
*pos
;
362 struct perf_evlist
*evlist
= rec
->evlist
;
363 struct perf_session
*session
= rec
->session
;
364 struct record_opts
*opts
= &rec
->opts
;
365 struct perf_evsel_config_term
*err_term
;
369 * For initial_delay we need to add a dummy event so that we can track
370 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
371 * real events, the ones asked by the user.
373 if (opts
->initial_delay
) {
374 if (perf_evlist__add_dummy(evlist
))
377 pos
= perf_evlist__first(evlist
);
379 pos
= perf_evlist__last(evlist
);
381 pos
->attr
.enable_on_exec
= 1;
384 perf_evlist__config(evlist
, opts
, &callchain_param
);
386 evlist__for_each_entry(evlist
, pos
) {
388 if (perf_evsel__open(pos
, pos
->cpus
, pos
->threads
) < 0) {
389 if (perf_evsel__fallback(pos
, errno
, msg
, sizeof(msg
))) {
391 ui__warning("%s\n", msg
);
394 if ((errno
== EINVAL
|| errno
== EBADF
) &&
395 pos
->leader
!= pos
&&
397 pos
= perf_evlist__reset_weak_group(evlist
, pos
);
401 perf_evsel__open_strerror(pos
, &opts
->target
,
402 errno
, msg
, sizeof(msg
));
403 ui__error("%s\n", msg
);
407 pos
->supported
= true;
410 if (perf_evlist__apply_filters(evlist
, &pos
)) {
411 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
412 pos
->filter
, perf_evsel__name(pos
), errno
,
413 str_error_r(errno
, msg
, sizeof(msg
)));
418 if (perf_evlist__apply_drv_configs(evlist
, &pos
, &err_term
)) {
419 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
420 err_term
->val
.drv_cfg
, perf_evsel__name(pos
), errno
,
421 str_error_r(errno
, msg
, sizeof(msg
)));
426 rc
= record__mmap(rec
);
430 session
->evlist
= evlist
;
431 perf_session__set_id_hdr_size(session
);
436 static int process_sample_event(struct perf_tool
*tool
,
437 union perf_event
*event
,
438 struct perf_sample
*sample
,
439 struct perf_evsel
*evsel
,
440 struct machine
*machine
)
442 struct record
*rec
= container_of(tool
, struct record
, tool
);
444 if (rec
->evlist
->first_sample_time
== 0)
445 rec
->evlist
->first_sample_time
= sample
->time
;
447 rec
->evlist
->last_sample_time
= sample
->time
;
449 if (rec
->buildid_all
)
453 return build_id__mark_dso_hit(tool
, event
, sample
, evsel
, machine
);
456 static int process_buildids(struct record
*rec
)
458 struct perf_data
*data
= &rec
->data
;
459 struct perf_session
*session
= rec
->session
;
465 * During this process, it'll load kernel map and replace the
466 * dso->long_name to a real pathname it found. In this case
467 * we prefer the vmlinux path like
468 * /lib/modules/3.16.4/build/vmlinux
470 * rather than build-id path (in debug directory).
471 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
473 symbol_conf
.ignore_vmlinux_buildid
= true;
476 * If --buildid-all is given, it marks all DSO regardless of hits,
477 * so no need to process samples. But if timestamp_boundary is enabled,
478 * it still needs to walk on all samples to get the timestamps of
479 * first/last samples.
481 if (rec
->buildid_all
&& !rec
->timestamp_boundary
)
482 rec
->tool
.sample
= NULL
;
484 return perf_session__process_events(session
);
487 static void perf_event__synthesize_guest_os(struct machine
*machine
, void *data
)
490 struct perf_tool
*tool
= data
;
492 *As for guest kernel when processing subcommand record&report,
493 *we arrange module mmap prior to guest kernel mmap and trigger
494 *a preload dso because default guest module symbols are loaded
495 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
496 *method is used to avoid symbol missing when the first addr is
497 *in module instead of in guest kernel.
499 err
= perf_event__synthesize_modules(tool
, process_synthesized_event
,
502 pr_err("Couldn't record guest kernel [%d]'s reference"
503 " relocation symbol.\n", machine
->pid
);
506 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
507 * have no _text sometimes.
509 err
= perf_event__synthesize_kernel_mmap(tool
, process_synthesized_event
,
512 pr_err("Couldn't record guest kernel [%d]'s reference"
513 " relocation symbol.\n", machine
->pid
);
516 static struct perf_event_header finished_round_event
= {
517 .size
= sizeof(struct perf_event_header
),
518 .type
= PERF_RECORD_FINISHED_ROUND
,
521 static int record__mmap_read_evlist(struct record
*rec
, struct perf_evlist
*evlist
,
524 u64 bytes_written
= rec
->bytes_written
;
527 struct perf_mmap
*maps
;
532 maps
= overwrite
? evlist
->overwrite_mmap
: evlist
->mmap
;
536 if (overwrite
&& evlist
->bkw_mmap_state
!= BKW_MMAP_DATA_PENDING
)
539 for (i
= 0; i
< evlist
->nr_mmaps
; i
++) {
540 struct perf_mmap
*map
= &maps
[i
];
543 if (perf_mmap__push(map
, rec
, record__pushfn
) != 0) {
549 if (map
->auxtrace_mmap
.base
&& !rec
->opts
.auxtrace_snapshot_mode
&&
550 record__auxtrace_mmap_read(rec
, map
) != 0) {
557 * Mark the round finished in case we wrote
558 * at least one event.
560 if (bytes_written
!= rec
->bytes_written
)
561 rc
= record__write(rec
, NULL
, &finished_round_event
, sizeof(finished_round_event
));
564 perf_evlist__toggle_bkw_mmap(evlist
, BKW_MMAP_EMPTY
);
569 static int record__mmap_read_all(struct record
*rec
)
573 err
= record__mmap_read_evlist(rec
, rec
->evlist
, false);
577 return record__mmap_read_evlist(rec
, rec
->evlist
, true);
580 static void record__init_features(struct record
*rec
)
582 struct perf_session
*session
= rec
->session
;
585 for (feat
= HEADER_FIRST_FEATURE
; feat
< HEADER_LAST_FEATURE
; feat
++)
586 perf_header__set_feat(&session
->header
, feat
);
589 perf_header__clear_feat(&session
->header
, HEADER_BUILD_ID
);
591 if (!have_tracepoints(&rec
->evlist
->entries
))
592 perf_header__clear_feat(&session
->header
, HEADER_TRACING_DATA
);
594 if (!rec
->opts
.branch_stack
)
595 perf_header__clear_feat(&session
->header
, HEADER_BRANCH_STACK
);
597 if (!rec
->opts
.full_auxtrace
)
598 perf_header__clear_feat(&session
->header
, HEADER_AUXTRACE
);
600 if (!(rec
->opts
.use_clockid
&& rec
->opts
.clockid_res_ns
))
601 perf_header__clear_feat(&session
->header
, HEADER_CLOCKID
);
603 perf_header__clear_feat(&session
->header
, HEADER_STAT
);
607 record__finish_output(struct record
*rec
)
609 struct perf_data
*data
= &rec
->data
;
610 int fd
= perf_data__fd(data
);
615 rec
->session
->header
.data_size
+= rec
->bytes_written
;
616 data
->size
= lseek(perf_data__fd(data
), 0, SEEK_CUR
);
618 if (!rec
->no_buildid
) {
619 process_buildids(rec
);
621 if (rec
->buildid_all
)
622 dsos__hit_all(rec
->session
);
624 perf_session__write_header(rec
->session
, rec
->evlist
, fd
, true);
629 static int record__synthesize_workload(struct record
*rec
, bool tail
)
632 struct thread_map
*thread_map
;
634 if (rec
->opts
.tail_synthesize
!= tail
)
637 thread_map
= thread_map__new_by_tid(rec
->evlist
->workload
.pid
);
638 if (thread_map
== NULL
)
641 err
= perf_event__synthesize_thread_map(&rec
->tool
, thread_map
,
642 process_synthesized_event
,
643 &rec
->session
->machines
.host
,
644 rec
->opts
.sample_address
,
645 rec
->opts
.proc_map_timeout
);
646 thread_map__put(thread_map
);
650 static int record__synthesize(struct record
*rec
, bool tail
);
653 record__switch_output(struct record
*rec
, bool at_exit
)
655 struct perf_data
*data
= &rec
->data
;
658 /* Same Size: "2015122520103046"*/
659 char timestamp
[] = "InvalidTimestamp";
661 record__synthesize(rec
, true);
662 if (target__none(&rec
->opts
.target
))
663 record__synthesize_workload(rec
, true);
666 record__finish_output(rec
);
667 err
= fetch_current_timestamp(timestamp
, sizeof(timestamp
));
669 pr_err("Failed to get current timestamp\n");
673 fd
= perf_data__switch(data
, timestamp
,
674 rec
->session
->header
.data_offset
,
676 if (fd
>= 0 && !at_exit
) {
677 rec
->bytes_written
= 0;
678 rec
->session
->header
.data_size
= 0;
682 fprintf(stderr
, "[ perf record: Dump %s.%s ]\n",
683 data
->file
.path
, timestamp
);
685 /* Output tracking events */
687 record__synthesize(rec
, false);
690 * In 'perf record --switch-output' without -a,
691 * record__synthesize() in record__switch_output() won't
692 * generate tracking events because there's no thread_map
693 * in evlist. Which causes newly created perf.data doesn't
694 * contain map and comm information.
695 * Create a fake thread_map and directly call
696 * perf_event__synthesize_thread_map() for those events.
698 if (target__none(&rec
->opts
.target
))
699 record__synthesize_workload(rec
, false);
704 static volatile int workload_exec_errno
;
707 * perf_evlist__prepare_workload will send a SIGUSR1
708 * if the fork fails, since we asked by setting its
709 * want_signal to true.
711 static void workload_exec_failed_signal(int signo __maybe_unused
,
713 void *ucontext __maybe_unused
)
715 workload_exec_errno
= info
->si_value
.sival_int
;
720 static void snapshot_sig_handler(int sig
);
721 static void alarm_sig_handler(int sig
);
724 perf_event__synth_time_conv(const struct perf_event_mmap_page
*pc __maybe_unused
,
725 struct perf_tool
*tool __maybe_unused
,
726 perf_event__handler_t process __maybe_unused
,
727 struct machine
*machine __maybe_unused
)
732 static const struct perf_event_mmap_page
*
733 perf_evlist__pick_pc(struct perf_evlist
*evlist
)
736 if (evlist
->mmap
&& evlist
->mmap
[0].base
)
737 return evlist
->mmap
[0].base
;
738 if (evlist
->overwrite_mmap
&& evlist
->overwrite_mmap
[0].base
)
739 return evlist
->overwrite_mmap
[0].base
;
744 static const struct perf_event_mmap_page
*record__pick_pc(struct record
*rec
)
746 const struct perf_event_mmap_page
*pc
;
748 pc
= perf_evlist__pick_pc(rec
->evlist
);
754 static int record__synthesize(struct record
*rec
, bool tail
)
756 struct perf_session
*session
= rec
->session
;
757 struct machine
*machine
= &session
->machines
.host
;
758 struct perf_data
*data
= &rec
->data
;
759 struct record_opts
*opts
= &rec
->opts
;
760 struct perf_tool
*tool
= &rec
->tool
;
761 int fd
= perf_data__fd(data
);
764 if (rec
->opts
.tail_synthesize
!= tail
)
769 * We need to synthesize events first, because some
770 * features works on top of them (on report side).
772 err
= perf_event__synthesize_attrs(tool
, rec
->evlist
,
773 process_synthesized_event
);
775 pr_err("Couldn't synthesize attrs.\n");
779 err
= perf_event__synthesize_features(tool
, session
, rec
->evlist
,
780 process_synthesized_event
);
782 pr_err("Couldn't synthesize features.\n");
786 if (have_tracepoints(&rec
->evlist
->entries
)) {
788 * FIXME err <= 0 here actually means that
789 * there were no tracepoints so its not really
790 * an error, just that we don't need to
791 * synthesize anything. We really have to
792 * return this more properly and also
793 * propagate errors that now are calling die()
795 err
= perf_event__synthesize_tracing_data(tool
, fd
, rec
->evlist
,
796 process_synthesized_event
);
798 pr_err("Couldn't record tracing data.\n");
801 rec
->bytes_written
+= err
;
805 err
= perf_event__synth_time_conv(record__pick_pc(rec
), tool
,
806 process_synthesized_event
, machine
);
810 if (rec
->opts
.full_auxtrace
) {
811 err
= perf_event__synthesize_auxtrace_info(rec
->itr
, tool
,
812 session
, process_synthesized_event
);
817 if (!perf_evlist__exclude_kernel(rec
->evlist
)) {
818 err
= perf_event__synthesize_kernel_mmap(tool
, process_synthesized_event
,
820 WARN_ONCE(err
< 0, "Couldn't record kernel reference relocation symbol\n"
821 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
822 "Check /proc/kallsyms permission or run as root.\n");
824 err
= perf_event__synthesize_modules(tool
, process_synthesized_event
,
826 WARN_ONCE(err
< 0, "Couldn't record kernel module information.\n"
827 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
828 "Check /proc/modules permission or run as root.\n");
832 machines__process_guests(&session
->machines
,
833 perf_event__synthesize_guest_os
, tool
);
836 err
= perf_event__synthesize_extra_attr(&rec
->tool
,
838 process_synthesized_event
,
843 err
= perf_event__synthesize_thread_map2(&rec
->tool
, rec
->evlist
->threads
,
844 process_synthesized_event
,
847 pr_err("Couldn't synthesize thread map.\n");
851 err
= perf_event__synthesize_cpu_map(&rec
->tool
, rec
->evlist
->cpus
,
852 process_synthesized_event
, NULL
);
854 pr_err("Couldn't synthesize cpu map.\n");
858 err
= __machine__synthesize_threads(machine
, tool
, &opts
->target
, rec
->evlist
->threads
,
859 process_synthesized_event
, opts
->sample_address
,
860 opts
->proc_map_timeout
, 1);
865 static int __cmd_record(struct record
*rec
, int argc
, const char **argv
)
869 unsigned long waking
= 0;
870 const bool forks
= argc
> 0;
871 struct perf_tool
*tool
= &rec
->tool
;
872 struct record_opts
*opts
= &rec
->opts
;
873 struct perf_data
*data
= &rec
->data
;
874 struct perf_session
*session
;
875 bool disabled
= false, draining
= false;
878 atexit(record__sig_exit
);
879 signal(SIGCHLD
, sig_handler
);
880 signal(SIGINT
, sig_handler
);
881 signal(SIGTERM
, sig_handler
);
882 signal(SIGSEGV
, sigsegv_handler
);
884 if (rec
->opts
.record_namespaces
)
885 tool
->namespace_events
= true;
887 if (rec
->opts
.auxtrace_snapshot_mode
|| rec
->switch_output
.enabled
) {
888 signal(SIGUSR2
, snapshot_sig_handler
);
889 if (rec
->opts
.auxtrace_snapshot_mode
)
890 trigger_on(&auxtrace_snapshot_trigger
);
891 if (rec
->switch_output
.enabled
)
892 trigger_on(&switch_output_trigger
);
894 signal(SIGUSR2
, SIG_IGN
);
897 session
= perf_session__new(data
, false, tool
);
898 if (session
== NULL
) {
899 pr_err("Perf session creation failed.\n");
903 fd
= perf_data__fd(data
);
904 rec
->session
= session
;
906 record__init_features(rec
);
908 if (rec
->opts
.use_clockid
&& rec
->opts
.clockid_res_ns
)
909 session
->header
.env
.clockid_res_ns
= rec
->opts
.clockid_res_ns
;
912 err
= perf_evlist__prepare_workload(rec
->evlist
, &opts
->target
,
914 workload_exec_failed_signal
);
916 pr_err("Couldn't run the workload!\n");
918 goto out_delete_session
;
923 * If we have just single event and are sending data
924 * through pipe, we need to force the ids allocation,
925 * because we synthesize event name through the pipe
926 * and need the id for that.
928 if (data
->is_pipe
&& rec
->evlist
->nr_entries
== 1)
929 rec
->opts
.sample_id
= true;
931 if (record__open(rec
) != 0) {
936 err
= bpf__apply_obj_config();
940 bpf__strerror_apply_obj_config(err
, errbuf
, sizeof(errbuf
));
941 pr_err("ERROR: Apply config to BPF failed: %s\n",
947 * Normally perf_session__new would do this, but it doesn't have the
950 if (rec
->tool
.ordered_events
&& !perf_evlist__sample_id_all(rec
->evlist
)) {
951 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
952 rec
->tool
.ordered_events
= false;
955 if (!rec
->evlist
->nr_groups
)
956 perf_header__clear_feat(&session
->header
, HEADER_GROUP_DESC
);
959 err
= perf_header__write_pipe(fd
);
963 err
= perf_session__write_header(session
, rec
->evlist
, fd
, false);
969 && !perf_header__has_feat(&session
->header
, HEADER_BUILD_ID
)) {
970 pr_err("Couldn't generate buildids. "
971 "Use --no-buildid to profile anyway.\n");
976 err
= record__synthesize(rec
, false);
980 if (rec
->realtime_prio
) {
981 struct sched_param param
;
983 param
.sched_priority
= rec
->realtime_prio
;
984 if (sched_setscheduler(0, SCHED_FIFO
, ¶m
)) {
985 pr_err("Could not set realtime priority.\n");
992 * When perf is starting the traced process, all the events
993 * (apart from group members) have enable_on_exec=1 set,
994 * so don't spoil it by prematurely enabling them.
996 if (!target__none(&opts
->target
) && !opts
->initial_delay
)
997 perf_evlist__enable(rec
->evlist
);
1003 struct machine
*machine
= &session
->machines
.host
;
1004 union perf_event
*event
;
1007 event
= malloc(sizeof(event
->comm
) + machine
->id_hdr_size
);
1008 if (event
== NULL
) {
1014 * Some H/W events are generated before COMM event
1015 * which is emitted during exec(), so perf script
1016 * cannot see a correct process name for those events.
1017 * Synthesize COMM event to prevent it.
1019 tgid
= perf_event__synthesize_comm(tool
, event
,
1020 rec
->evlist
->workload
.pid
,
1021 process_synthesized_event
,
1028 event
= malloc(sizeof(event
->namespaces
) +
1029 (NR_NAMESPACES
* sizeof(struct perf_ns_link_info
)) +
1030 machine
->id_hdr_size
);
1031 if (event
== NULL
) {
1037 * Synthesize NAMESPACES event for the command specified.
1039 perf_event__synthesize_namespaces(tool
, event
,
1040 rec
->evlist
->workload
.pid
,
1041 tgid
, process_synthesized_event
,
1045 perf_evlist__start_workload(rec
->evlist
);
1048 if (opts
->initial_delay
) {
1049 usleep(opts
->initial_delay
* USEC_PER_MSEC
);
1050 perf_evlist__enable(rec
->evlist
);
1053 trigger_ready(&auxtrace_snapshot_trigger
);
1054 trigger_ready(&switch_output_trigger
);
1055 perf_hooks__invoke_record_start();
1057 unsigned long long hits
= rec
->samples
;
1060 * rec->evlist->bkw_mmap_state is possible to be
1061 * BKW_MMAP_EMPTY here: when done == true and
1062 * hits != rec->samples in previous round.
1064 * perf_evlist__toggle_bkw_mmap ensure we never
1065 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1067 if (trigger_is_hit(&switch_output_trigger
) || done
|| draining
)
1068 perf_evlist__toggle_bkw_mmap(rec
->evlist
, BKW_MMAP_DATA_PENDING
);
1070 if (record__mmap_read_all(rec
) < 0) {
1071 trigger_error(&auxtrace_snapshot_trigger
);
1072 trigger_error(&switch_output_trigger
);
1077 if (auxtrace_record__snapshot_started
) {
1078 auxtrace_record__snapshot_started
= 0;
1079 if (!trigger_is_error(&auxtrace_snapshot_trigger
))
1080 record__read_auxtrace_snapshot(rec
);
1081 if (trigger_is_error(&auxtrace_snapshot_trigger
)) {
1082 pr_err("AUX area tracing snapshot failed\n");
1088 if (trigger_is_hit(&switch_output_trigger
)) {
1090 * If switch_output_trigger is hit, the data in
1091 * overwritable ring buffer should have been collected,
1092 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1094 * If SIGUSR2 raise after or during record__mmap_read_all(),
1095 * record__mmap_read_all() didn't collect data from
1096 * overwritable ring buffer. Read again.
1098 if (rec
->evlist
->bkw_mmap_state
== BKW_MMAP_RUNNING
)
1100 trigger_ready(&switch_output_trigger
);
1103 * Reenable events in overwrite ring buffer after
1104 * record__mmap_read_all(): we should have collected
1107 perf_evlist__toggle_bkw_mmap(rec
->evlist
, BKW_MMAP_RUNNING
);
1110 fprintf(stderr
, "[ perf record: dump data: Woken up %ld times ]\n",
1113 fd
= record__switch_output(rec
, false);
1115 pr_err("Failed to switch to new file\n");
1116 trigger_error(&switch_output_trigger
);
1121 /* re-arm the alarm */
1122 if (rec
->switch_output
.time
)
1123 alarm(rec
->switch_output
.time
);
1126 if (hits
== rec
->samples
) {
1127 if (done
|| draining
)
1129 err
= perf_evlist__poll(rec
->evlist
, -1);
1131 * Propagate error, only if there's any. Ignore positive
1132 * number of returned events and interrupt error.
1134 if (err
> 0 || (err
< 0 && errno
== EINTR
))
1138 if (perf_evlist__filter_pollfd(rec
->evlist
, POLLERR
| POLLHUP
) == 0)
1143 * When perf is starting the traced process, at the end events
1144 * die with the process and we wait for that. Thus no need to
1145 * disable events in this case.
1147 if (done
&& !disabled
&& !target__none(&opts
->target
)) {
1148 trigger_off(&auxtrace_snapshot_trigger
);
1149 perf_evlist__disable(rec
->evlist
);
1153 trigger_off(&auxtrace_snapshot_trigger
);
1154 trigger_off(&switch_output_trigger
);
1156 if (forks
&& workload_exec_errno
) {
1157 char msg
[STRERR_BUFSIZE
];
1158 const char *emsg
= str_error_r(workload_exec_errno
, msg
, sizeof(msg
));
1159 pr_err("Workload failed: %s\n", emsg
);
1165 fprintf(stderr
, "[ perf record: Woken up %ld times to write data ]\n", waking
);
1167 if (target__none(&rec
->opts
.target
))
1168 record__synthesize_workload(rec
, true);
1174 if (!child_finished
)
1175 kill(rec
->evlist
->workload
.pid
, SIGTERM
);
1181 else if (WIFEXITED(exit_status
))
1182 status
= WEXITSTATUS(exit_status
);
1183 else if (WIFSIGNALED(exit_status
))
1184 signr
= WTERMSIG(exit_status
);
1188 record__synthesize(rec
, true);
1189 /* this will be recalculated during process_buildids() */
1193 if (!rec
->timestamp_filename
) {
1194 record__finish_output(rec
);
1196 fd
= record__switch_output(rec
, true);
1199 goto out_delete_session
;
1204 perf_hooks__invoke_record_end();
1206 if (!err
&& !quiet
) {
1208 const char *postfix
= rec
->timestamp_filename
?
1209 ".<timestamp>" : "";
1211 if (rec
->samples
&& !rec
->opts
.full_auxtrace
)
1212 scnprintf(samples
, sizeof(samples
),
1213 " (%" PRIu64
" samples)", rec
->samples
);
1217 fprintf(stderr
, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1218 perf_data__size(data
) / 1024.0 / 1024.0,
1219 data
->file
.path
, postfix
, samples
);
1223 perf_session__delete(session
);
1227 static void callchain_debug(struct callchain_param
*callchain
)
1229 static const char *str
[CALLCHAIN_MAX
] = { "NONE", "FP", "DWARF", "LBR" };
1231 pr_debug("callchain: type %s\n", str
[callchain
->record_mode
]);
1233 if (callchain
->record_mode
== CALLCHAIN_DWARF
)
1234 pr_debug("callchain: stack dump size %d\n",
1235 callchain
->dump_size
);
1238 int record_opts__parse_callchain(struct record_opts
*record
,
1239 struct callchain_param
*callchain
,
1240 const char *arg
, bool unset
)
1243 callchain
->enabled
= !unset
;
1245 /* --no-call-graph */
1247 callchain
->record_mode
= CALLCHAIN_NONE
;
1248 pr_debug("callchain: disabled\n");
1252 ret
= parse_callchain_record_opt(arg
, callchain
);
1254 /* Enable data address sampling for DWARF unwind. */
1255 if (callchain
->record_mode
== CALLCHAIN_DWARF
)
1256 record
->sample_address
= true;
1257 callchain_debug(callchain
);
1263 int record_parse_callchain_opt(const struct option
*opt
,
1267 return record_opts__parse_callchain(opt
->value
, &callchain_param
, arg
, unset
);
1270 int record_callchain_opt(const struct option
*opt
,
1271 const char *arg __maybe_unused
,
1272 int unset __maybe_unused
)
1274 struct callchain_param
*callchain
= opt
->value
;
1276 callchain
->enabled
= true;
1278 if (callchain
->record_mode
== CALLCHAIN_NONE
)
1279 callchain
->record_mode
= CALLCHAIN_FP
;
1281 callchain_debug(callchain
);
1285 static int perf_record_config(const char *var
, const char *value
, void *cb
)
1287 struct record
*rec
= cb
;
1289 if (!strcmp(var
, "record.build-id")) {
1290 if (!strcmp(value
, "cache"))
1291 rec
->no_buildid_cache
= false;
1292 else if (!strcmp(value
, "no-cache"))
1293 rec
->no_buildid_cache
= true;
1294 else if (!strcmp(value
, "skip"))
1295 rec
->no_buildid
= true;
1300 if (!strcmp(var
, "record.call-graph")) {
1301 var
= "call-graph.record-mode";
1302 return perf_default_config(var
, value
, cb
);
1308 struct clockid_map
{
1313 #define CLOCKID_MAP(n, c) \
1314 { .name = n, .clockid = (c), }
1316 #define CLOCKID_END { .name = NULL, }
1320 * Add the missing ones, we need to build on many distros...
1322 #ifndef CLOCK_MONOTONIC_RAW
1323 #define CLOCK_MONOTONIC_RAW 4
1325 #ifndef CLOCK_BOOTTIME
1326 #define CLOCK_BOOTTIME 7
1329 #define CLOCK_TAI 11
1332 static const struct clockid_map clockids
[] = {
1333 /* available for all events, NMI safe */
1334 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC
),
1335 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW
),
1337 /* available for some events */
1338 CLOCKID_MAP("realtime", CLOCK_REALTIME
),
1339 CLOCKID_MAP("boottime", CLOCK_BOOTTIME
),
1340 CLOCKID_MAP("tai", CLOCK_TAI
),
1342 /* available for the lazy */
1343 CLOCKID_MAP("mono", CLOCK_MONOTONIC
),
1344 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW
),
1345 CLOCKID_MAP("real", CLOCK_REALTIME
),
1346 CLOCKID_MAP("boot", CLOCK_BOOTTIME
),
1351 static int get_clockid_res(clockid_t clk_id
, u64
*res_ns
)
1353 struct timespec res
;
1356 if (!clock_getres(clk_id
, &res
))
1357 *res_ns
= res
.tv_nsec
+ res
.tv_sec
* NSEC_PER_SEC
;
1359 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1364 static int parse_clockid(const struct option
*opt
, const char *str
, int unset
)
1366 struct record_opts
*opts
= (struct record_opts
*)opt
->value
;
1367 const struct clockid_map
*cm
;
1368 const char *ostr
= str
;
1371 opts
->use_clockid
= 0;
1379 /* no setting it twice */
1380 if (opts
->use_clockid
)
1383 opts
->use_clockid
= true;
1385 /* if its a number, we're done */
1386 if (sscanf(str
, "%d", &opts
->clockid
) == 1)
1387 return get_clockid_res(opts
->clockid
, &opts
->clockid_res_ns
);
1389 /* allow a "CLOCK_" prefix to the name */
1390 if (!strncasecmp(str
, "CLOCK_", 6))
1393 for (cm
= clockids
; cm
->name
; cm
++) {
1394 if (!strcasecmp(str
, cm
->name
)) {
1395 opts
->clockid
= cm
->clockid
;
1396 return get_clockid_res(opts
->clockid
,
1397 &opts
->clockid_res_ns
);
1401 opts
->use_clockid
= false;
1402 ui__warning("unknown clockid %s, check man page\n", ostr
);
1406 static int record__parse_mmap_pages(const struct option
*opt
,
1408 int unset __maybe_unused
)
1410 struct record_opts
*opts
= opt
->value
;
1412 unsigned int mmap_pages
;
1427 ret
= __perf_evlist__parse_mmap_pages(&mmap_pages
, s
);
1430 opts
->mmap_pages
= mmap_pages
;
1438 ret
= __perf_evlist__parse_mmap_pages(&mmap_pages
, p
+ 1);
1442 opts
->auxtrace_mmap_pages
= mmap_pages
;
1449 static void switch_output_size_warn(struct record
*rec
)
1451 u64 wakeup_size
= perf_evlist__mmap_size(rec
->opts
.mmap_pages
);
1452 struct switch_output
*s
= &rec
->switch_output
;
1456 if (s
->size
< wakeup_size
) {
1459 unit_number__scnprintf(buf
, sizeof(buf
), wakeup_size
);
1460 pr_warning("WARNING: switch-output data size lower than "
1461 "wakeup kernel buffer size (%s) "
1462 "expect bigger perf.data sizes\n", buf
);
1466 static int switch_output_setup(struct record
*rec
)
1468 struct switch_output
*s
= &rec
->switch_output
;
1469 static struct parse_tag tags_size
[] = {
1470 { .tag
= 'B', .mult
= 1 },
1471 { .tag
= 'K', .mult
= 1 << 10 },
1472 { .tag
= 'M', .mult
= 1 << 20 },
1473 { .tag
= 'G', .mult
= 1 << 30 },
1476 static struct parse_tag tags_time
[] = {
1477 { .tag
= 's', .mult
= 1 },
1478 { .tag
= 'm', .mult
= 60 },
1479 { .tag
= 'h', .mult
= 60*60 },
1480 { .tag
= 'd', .mult
= 60*60*24 },
1488 if (!strcmp(s
->str
, "signal")) {
1490 pr_debug("switch-output with SIGUSR2 signal\n");
1494 val
= parse_tag_value(s
->str
, tags_size
);
1495 if (val
!= (unsigned long) -1) {
1497 pr_debug("switch-output with %s size threshold\n", s
->str
);
1501 val
= parse_tag_value(s
->str
, tags_time
);
1502 if (val
!= (unsigned long) -1) {
1504 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1512 rec
->timestamp_filename
= true;
1515 if (s
->size
&& !rec
->opts
.no_buffering
)
1516 switch_output_size_warn(rec
);
1521 static const char * const __record_usage
[] = {
1522 "perf record [<options>] [<command>]",
1523 "perf record [<options>] -- <command> [<options>]",
1526 const char * const *record_usage
= __record_usage
;
1529 * XXX Ideally would be local to cmd_record() and passed to a record__new
1530 * because we need to have access to it in record__exit, that is called
1531 * after cmd_record() exits, but since record_options need to be accessible to
1532 * builtin-script, leave it here.
1534 * At least we don't ouch it in all the other functions here directly.
1536 * Just say no to tons of global variables, sigh.
1538 static struct record record
= {
1540 .sample_time
= true,
1541 .mmap_pages
= UINT_MAX
,
1542 .user_freq
= UINT_MAX
,
1543 .user_interval
= ULLONG_MAX
,
1547 .default_per_cpu
= true,
1549 .proc_map_timeout
= 500,
1552 .sample
= process_sample_event
,
1553 .fork
= perf_event__process_fork
,
1554 .exit
= perf_event__process_exit
,
1555 .comm
= perf_event__process_comm
,
1556 .namespaces
= perf_event__process_namespaces
,
1557 .mmap
= perf_event__process_mmap
,
1558 .mmap2
= perf_event__process_mmap2
,
1559 .ordered_events
= true,
1563 const char record_callchain_help
[] = CALLCHAIN_RECORD_HELP
1564 "\n\t\t\t\tDefault: fp";
1566 static bool dry_run
;
1569 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1570 * with it and switch to use the library functions in perf_evlist that came
1571 * from builtin-record.c, i.e. use record_opts,
1572 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1575 static struct option __record_options
[] = {
1576 OPT_CALLBACK('e', "event", &record
.evlist
, "event",
1577 "event selector. use 'perf list' to list available events",
1578 parse_events_option
),
1579 OPT_CALLBACK(0, "filter", &record
.evlist
, "filter",
1580 "event filter", parse_filter
),
1581 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record
.evlist
,
1582 NULL
, "don't record events from perf itself",
1584 OPT_STRING('p', "pid", &record
.opts
.target
.pid
, "pid",
1585 "record events on existing process id"),
1586 OPT_STRING('t', "tid", &record
.opts
.target
.tid
, "tid",
1587 "record events on existing thread id"),
1588 OPT_INTEGER('r', "realtime", &record
.realtime_prio
,
1589 "collect data with this RT SCHED_FIFO priority"),
1590 OPT_BOOLEAN(0, "no-buffering", &record
.opts
.no_buffering
,
1591 "collect data without buffering"),
1592 OPT_BOOLEAN('R', "raw-samples", &record
.opts
.raw_samples
,
1593 "collect raw sample records from all opened counters"),
1594 OPT_BOOLEAN('a', "all-cpus", &record
.opts
.target
.system_wide
,
1595 "system-wide collection from all CPUs"),
1596 OPT_STRING('C', "cpu", &record
.opts
.target
.cpu_list
, "cpu",
1597 "list of cpus to monitor"),
1598 OPT_U64('c', "count", &record
.opts
.user_interval
, "event period to sample"),
1599 OPT_STRING('o', "output", &record
.data
.file
.path
, "file",
1600 "output file name"),
1601 OPT_BOOLEAN_SET('i', "no-inherit", &record
.opts
.no_inherit
,
1602 &record
.opts
.no_inherit_set
,
1603 "child tasks do not inherit counters"),
1604 OPT_BOOLEAN(0, "tail-synthesize", &record
.opts
.tail_synthesize
,
1605 "synthesize non-sample events at the end of output"),
1606 OPT_BOOLEAN(0, "overwrite", &record
.opts
.overwrite
, "use overwrite mode"),
1607 OPT_BOOLEAN(0, "strict-freq", &record
.opts
.strict_freq
,
1608 "Fail if the specified frequency can't be used"),
1609 OPT_CALLBACK('F', "freq", &record
.opts
, "freq or 'max'",
1610 "profile at this frequency",
1611 record__parse_freq
),
1612 OPT_CALLBACK('m', "mmap-pages", &record
.opts
, "pages[,pages]",
1613 "number of mmap data pages and AUX area tracing mmap pages",
1614 record__parse_mmap_pages
),
1615 OPT_BOOLEAN(0, "group", &record
.opts
.group
,
1616 "put the counters into a counter group"),
1617 OPT_CALLBACK_NOOPT('g', NULL
, &callchain_param
,
1618 NULL
, "enables call-graph recording" ,
1619 &record_callchain_opt
),
1620 OPT_CALLBACK(0, "call-graph", &record
.opts
,
1621 "record_mode[,record_size]", record_callchain_help
,
1622 &record_parse_callchain_opt
),
1623 OPT_INCR('v', "verbose", &verbose
,
1624 "be more verbose (show counter open errors, etc)"),
1625 OPT_BOOLEAN('q', "quiet", &quiet
, "don't print any message"),
1626 OPT_BOOLEAN('s', "stat", &record
.opts
.inherit_stat
,
1627 "per thread counts"),
1628 OPT_BOOLEAN('d', "data", &record
.opts
.sample_address
, "Record the sample addresses"),
1629 OPT_BOOLEAN(0, "phys-data", &record
.opts
.sample_phys_addr
,
1630 "Record the sample physical addresses"),
1631 OPT_BOOLEAN(0, "sample-cpu", &record
.opts
.sample_cpu
, "Record the sample cpu"),
1632 OPT_BOOLEAN_SET('T', "timestamp", &record
.opts
.sample_time
,
1633 &record
.opts
.sample_time_set
,
1634 "Record the sample timestamps"),
1635 OPT_BOOLEAN_SET('P', "period", &record
.opts
.period
, &record
.opts
.period_set
,
1636 "Record the sample period"),
1637 OPT_BOOLEAN('n', "no-samples", &record
.opts
.no_samples
,
1639 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record
.no_buildid_cache
,
1640 &record
.no_buildid_cache_set
,
1641 "do not update the buildid cache"),
1642 OPT_BOOLEAN_SET('B', "no-buildid", &record
.no_buildid
,
1643 &record
.no_buildid_set
,
1644 "do not collect buildids in perf.data"),
1645 OPT_CALLBACK('G', "cgroup", &record
.evlist
, "name",
1646 "monitor event in cgroup name only",
1648 OPT_UINTEGER('D', "delay", &record
.opts
.initial_delay
,
1649 "ms to wait before starting measurement after program start"),
1650 OPT_STRING('u', "uid", &record
.opts
.target
.uid_str
, "user",
1653 OPT_CALLBACK_NOOPT('b', "branch-any", &record
.opts
.branch_stack
,
1654 "branch any", "sample any taken branches",
1655 parse_branch_stack
),
1657 OPT_CALLBACK('j', "branch-filter", &record
.opts
.branch_stack
,
1658 "branch filter mask", "branch stack filter modes",
1659 parse_branch_stack
),
1660 OPT_BOOLEAN('W', "weight", &record
.opts
.sample_weight
,
1661 "sample by weight (on special events only)"),
1662 OPT_BOOLEAN(0, "transaction", &record
.opts
.sample_transaction
,
1663 "sample transaction flags (special events only)"),
1664 OPT_BOOLEAN(0, "per-thread", &record
.opts
.target
.per_thread
,
1665 "use per-thread mmaps"),
1666 OPT_CALLBACK_OPTARG('I', "intr-regs", &record
.opts
.sample_intr_regs
, NULL
, "any register",
1667 "sample selected machine registers on interrupt,"
1668 " use -I ? to list register names", parse_regs
),
1669 OPT_CALLBACK_OPTARG(0, "user-regs", &record
.opts
.sample_user_regs
, NULL
, "any register",
1670 "sample selected machine registers on interrupt,"
1671 " use -I ? to list register names", parse_regs
),
1672 OPT_BOOLEAN(0, "running-time", &record
.opts
.running_time
,
1673 "Record running/enabled time of read (:S) events"),
1674 OPT_CALLBACK('k', "clockid", &record
.opts
,
1675 "clockid", "clockid to use for events, see clock_gettime()",
1677 OPT_STRING_OPTARG('S', "snapshot", &record
.opts
.auxtrace_snapshot_opts
,
1678 "opts", "AUX area tracing Snapshot Mode", ""),
1679 OPT_UINTEGER(0, "proc-map-timeout", &record
.opts
.proc_map_timeout
,
1680 "per thread proc mmap processing timeout in ms"),
1681 OPT_BOOLEAN(0, "namespaces", &record
.opts
.record_namespaces
,
1682 "Record namespaces events"),
1683 OPT_BOOLEAN(0, "switch-events", &record
.opts
.record_switch_events
,
1684 "Record context switch events"),
1685 OPT_BOOLEAN_FLAG(0, "all-kernel", &record
.opts
.all_kernel
,
1686 "Configure all used events to run in kernel space.",
1687 PARSE_OPT_EXCLUSIVE
),
1688 OPT_BOOLEAN_FLAG(0, "all-user", &record
.opts
.all_user
,
1689 "Configure all used events to run in user space.",
1690 PARSE_OPT_EXCLUSIVE
),
1691 OPT_STRING(0, "clang-path", &llvm_param
.clang_path
, "clang path",
1692 "clang binary to use for compiling BPF scriptlets"),
1693 OPT_STRING(0, "clang-opt", &llvm_param
.clang_opt
, "clang options",
1694 "options passed to clang when compiling BPF scriptlets"),
1695 OPT_STRING(0, "vmlinux", &symbol_conf
.vmlinux_name
,
1696 "file", "vmlinux pathname"),
1697 OPT_BOOLEAN(0, "buildid-all", &record
.buildid_all
,
1698 "Record build-id of all DSOs regardless of hits"),
1699 OPT_BOOLEAN(0, "timestamp-filename", &record
.timestamp_filename
,
1700 "append timestamp to output filename"),
1701 OPT_BOOLEAN(0, "timestamp-boundary", &record
.timestamp_boundary
,
1702 "Record timestamp boundary (time of first/last samples)"),
1703 OPT_STRING_OPTARG_SET(0, "switch-output", &record
.switch_output
.str
,
1704 &record
.switch_output
.set
, "signal,size,time",
1705 "Switch output when receive SIGUSR2 or cross size,time threshold",
1707 OPT_BOOLEAN(0, "dry-run", &dry_run
,
1708 "Parse options then exit"),
1712 struct option
*record_options
= __record_options
;
1714 int cmd_record(int argc
, const char **argv
)
1717 struct record
*rec
= &record
;
1718 char errbuf
[BUFSIZ
];
1720 setlocale(LC_ALL
, "");
1722 #ifndef HAVE_LIBBPF_SUPPORT
1723 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1724 set_nobuild('\0', "clang-path", true);
1725 set_nobuild('\0', "clang-opt", true);
1729 #ifndef HAVE_BPF_PROLOGUE
1730 # if !defined (HAVE_DWARF_SUPPORT)
1731 # define REASON "NO_DWARF=1"
1732 # elif !defined (HAVE_LIBBPF_SUPPORT)
1733 # define REASON "NO_LIBBPF=1"
1735 # define REASON "this architecture doesn't support BPF prologue"
1737 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1738 set_nobuild('\0', "vmlinux", true);
1743 rec
->evlist
= perf_evlist__new();
1744 if (rec
->evlist
== NULL
)
1747 err
= perf_config(perf_record_config
, rec
);
1751 argc
= parse_options(argc
, argv
, record_options
, record_usage
,
1752 PARSE_OPT_STOP_AT_NON_OPTION
);
1754 perf_quiet_option();
1756 /* Make system wide (-a) the default target. */
1757 if (!argc
&& target__none(&rec
->opts
.target
))
1758 rec
->opts
.target
.system_wide
= true;
1760 if (nr_cgroups
&& !rec
->opts
.target
.system_wide
) {
1761 usage_with_options_msg(record_usage
, record_options
,
1762 "cgroup monitoring only available in system-wide mode");
1765 if (rec
->opts
.record_switch_events
&&
1766 !perf_can_record_switch_events()) {
1767 ui__error("kernel does not support recording context switch events\n");
1768 parse_options_usage(record_usage
, record_options
, "switch-events", 0);
1772 if (switch_output_setup(rec
)) {
1773 parse_options_usage(record_usage
, record_options
, "switch-output", 0);
1777 if (rec
->switch_output
.time
) {
1778 signal(SIGALRM
, alarm_sig_handler
);
1779 alarm(rec
->switch_output
.time
);
1783 * Allow aliases to facilitate the lookup of symbols for address
1784 * filters. Refer to auxtrace_parse_filters().
1786 symbol_conf
.allow_aliases
= true;
1790 err
= record__auxtrace_init(rec
);
1797 err
= bpf__setup_stdout(rec
->evlist
);
1799 bpf__strerror_setup_stdout(rec
->evlist
, err
, errbuf
, sizeof(errbuf
));
1800 pr_err("ERROR: Setup BPF stdout failed: %s\n",
1807 if (symbol_conf
.kptr_restrict
&& !perf_evlist__exclude_kernel(rec
->evlist
))
1809 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1810 "check /proc/sys/kernel/kptr_restrict.\n\n"
1811 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1812 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1813 "Samples in kernel modules won't be resolved at all.\n\n"
1814 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1815 "even with a suitable vmlinux or kallsyms file.\n\n");
1817 if (rec
->no_buildid_cache
|| rec
->no_buildid
) {
1818 disable_buildid_cache();
1819 } else if (rec
->switch_output
.enabled
) {
1821 * In 'perf record --switch-output', disable buildid
1822 * generation by default to reduce data file switching
1823 * overhead. Still generate buildid if they are required
1826 * perf record --switch-output --no-no-buildid \
1827 * --no-no-buildid-cache
1829 * Following code equals to:
1831 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1832 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1833 * disable_buildid_cache();
1835 bool disable
= true;
1837 if (rec
->no_buildid_set
&& !rec
->no_buildid
)
1839 if (rec
->no_buildid_cache_set
&& !rec
->no_buildid_cache
)
1842 rec
->no_buildid
= true;
1843 rec
->no_buildid_cache
= true;
1844 disable_buildid_cache();
1848 if (record
.opts
.overwrite
)
1849 record
.opts
.tail_synthesize
= true;
1851 if (rec
->evlist
->nr_entries
== 0 &&
1852 __perf_evlist__add_default(rec
->evlist
, !record
.opts
.no_samples
) < 0) {
1853 pr_err("Not enough memory for event selector list\n");
1857 if (rec
->opts
.target
.tid
&& !rec
->opts
.no_inherit_set
)
1858 rec
->opts
.no_inherit
= true;
1860 err
= target__validate(&rec
->opts
.target
);
1862 target__strerror(&rec
->opts
.target
, err
, errbuf
, BUFSIZ
);
1863 ui__warning("%s\n", errbuf
);
1866 err
= target__parse_uid(&rec
->opts
.target
);
1868 int saved_errno
= errno
;
1870 target__strerror(&rec
->opts
.target
, err
, errbuf
, BUFSIZ
);
1871 ui__error("%s", errbuf
);
1877 /* Enable ignoring missing threads when -u/-p option is defined. */
1878 rec
->opts
.ignore_missing_thread
= rec
->opts
.target
.uid
!= UINT_MAX
|| rec
->opts
.target
.pid
;
1881 if (perf_evlist__create_maps(rec
->evlist
, &rec
->opts
.target
) < 0)
1882 usage_with_options(record_usage
, record_options
);
1884 err
= auxtrace_record__options(rec
->itr
, rec
->evlist
, &rec
->opts
);
1889 * We take all buildids when the file contains
1890 * AUX area tracing data because we do not decode the
1891 * trace because it would take too long.
1893 if (rec
->opts
.full_auxtrace
)
1894 rec
->buildid_all
= true;
1896 if (record_opts__config(&rec
->opts
)) {
1901 err
= __cmd_record(&record
, argc
, argv
);
1903 perf_evlist__delete(rec
->evlist
);
1905 auxtrace_record__free(rec
->itr
);
1909 static void snapshot_sig_handler(int sig __maybe_unused
)
1911 struct record
*rec
= &record
;
1913 if (trigger_is_ready(&auxtrace_snapshot_trigger
)) {
1914 trigger_hit(&auxtrace_snapshot_trigger
);
1915 auxtrace_record__snapshot_started
= 1;
1916 if (auxtrace_record__snapshot_start(record
.itr
))
1917 trigger_error(&auxtrace_snapshot_trigger
);
1920 if (switch_output_signal(rec
))
1921 trigger_hit(&switch_output_trigger
);
1924 static void alarm_sig_handler(int sig __maybe_unused
)
1926 struct record
*rec
= &record
;
1928 if (switch_output_time(rec
))
1929 trigger_hit(&switch_output_trigger
);