Input: uinput - add compat ioctl number translation for UI_*_FF_UPLOAD
[linux/fpc-iii.git] / tools / perf / builtin-record.c
blob488779bc4c8d2f6ed8dbcad69e1de5e477ede138
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * builtin-record.c
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
8 */
9 #include "builtin.h"
11 #include "perf.h"
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/drv_configs.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/symbol.h"
30 #include "util/cpumap.h"
31 #include "util/thread_map.h"
32 #include "util/data.h"
33 #include "util/perf_regs.h"
34 #include "util/auxtrace.h"
35 #include "util/tsc.h"
36 #include "util/parse-branch-options.h"
37 #include "util/parse-regs-options.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "asm/bug.h"
46 #include <errno.h>
47 #include <inttypes.h>
48 #include <locale.h>
49 #include <poll.h>
50 #include <unistd.h>
51 #include <sched.h>
52 #include <signal.h>
53 #include <sys/mman.h>
54 #include <sys/wait.h>
55 #include <linux/time64.h>
57 struct switch_output {
58 bool enabled;
59 bool signal;
60 unsigned long size;
61 unsigned long time;
62 const char *str;
63 bool set;
66 struct record {
67 struct perf_tool tool;
68 struct record_opts opts;
69 u64 bytes_written;
70 struct perf_data data;
71 struct auxtrace_record *itr;
72 struct perf_evlist *evlist;
73 struct perf_session *session;
74 int realtime_prio;
75 bool no_buildid;
76 bool no_buildid_set;
77 bool no_buildid_cache;
78 bool no_buildid_cache_set;
79 bool buildid_all;
80 bool timestamp_filename;
81 bool timestamp_boundary;
82 struct switch_output switch_output;
83 unsigned long long samples;
86 static volatile int auxtrace_record__snapshot_started;
87 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
88 static DEFINE_TRIGGER(switch_output_trigger);
90 static bool switch_output_signal(struct record *rec)
92 return rec->switch_output.signal &&
93 trigger_is_ready(&switch_output_trigger);
96 static bool switch_output_size(struct record *rec)
98 return rec->switch_output.size &&
99 trigger_is_ready(&switch_output_trigger) &&
100 (rec->bytes_written >= rec->switch_output.size);
103 static bool switch_output_time(struct record *rec)
105 return rec->switch_output.time &&
106 trigger_is_ready(&switch_output_trigger);
109 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
110 void *bf, size_t size)
112 struct perf_data_file *file = &rec->session->data->file;
114 if (perf_data_file__write(file, bf, size) < 0) {
115 pr_err("failed to write perf data, error: %m\n");
116 return -1;
119 rec->bytes_written += size;
121 if (switch_output_size(rec))
122 trigger_hit(&switch_output_trigger);
124 return 0;
127 static int process_synthesized_event(struct perf_tool *tool,
128 union perf_event *event,
129 struct perf_sample *sample __maybe_unused,
130 struct machine *machine __maybe_unused)
132 struct record *rec = container_of(tool, struct record, tool);
133 return record__write(rec, NULL, event, event->header.size);
136 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
138 struct record *rec = to;
140 rec->samples++;
141 return record__write(rec, map, bf, size);
144 static volatile int done;
145 static volatile int signr = -1;
146 static volatile int child_finished;
148 static void sig_handler(int sig)
150 if (sig == SIGCHLD)
151 child_finished = 1;
152 else
153 signr = sig;
155 done = 1;
158 static void sigsegv_handler(int sig)
160 perf_hooks__recover();
161 sighandler_dump_stack(sig);
164 static void record__sig_exit(void)
166 if (signr == -1)
167 return;
169 signal(signr, SIG_DFL);
170 raise(signr);
173 #ifdef HAVE_AUXTRACE_SUPPORT
175 static int record__process_auxtrace(struct perf_tool *tool,
176 struct perf_mmap *map,
177 union perf_event *event, void *data1,
178 size_t len1, void *data2, size_t len2)
180 struct record *rec = container_of(tool, struct record, tool);
181 struct perf_data *data = &rec->data;
182 size_t padding;
183 u8 pad[8] = {0};
185 if (!perf_data__is_pipe(data)) {
186 off_t file_offset;
187 int fd = perf_data__fd(data);
188 int err;
190 file_offset = lseek(fd, 0, SEEK_CUR);
191 if (file_offset == -1)
192 return -1;
193 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
194 event, file_offset);
195 if (err)
196 return err;
199 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
200 padding = (len1 + len2) & 7;
201 if (padding)
202 padding = 8 - padding;
204 record__write(rec, map, event, event->header.size);
205 record__write(rec, map, data1, len1);
206 if (len2)
207 record__write(rec, map, data2, len2);
208 record__write(rec, map, &pad, padding);
210 return 0;
213 static int record__auxtrace_mmap_read(struct record *rec,
214 struct perf_mmap *map)
216 int ret;
218 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
219 record__process_auxtrace);
220 if (ret < 0)
221 return ret;
223 if (ret)
224 rec->samples++;
226 return 0;
229 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
230 struct perf_mmap *map)
232 int ret;
234 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
235 record__process_auxtrace,
236 rec->opts.auxtrace_snapshot_size);
237 if (ret < 0)
238 return ret;
240 if (ret)
241 rec->samples++;
243 return 0;
246 static int record__auxtrace_read_snapshot_all(struct record *rec)
248 int i;
249 int rc = 0;
251 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
252 struct perf_mmap *map = &rec->evlist->mmap[i];
254 if (!map->auxtrace_mmap.base)
255 continue;
257 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
258 rc = -1;
259 goto out;
262 out:
263 return rc;
266 static void record__read_auxtrace_snapshot(struct record *rec)
268 pr_debug("Recording AUX area tracing snapshot\n");
269 if (record__auxtrace_read_snapshot_all(rec) < 0) {
270 trigger_error(&auxtrace_snapshot_trigger);
271 } else {
272 if (auxtrace_record__snapshot_finish(rec->itr))
273 trigger_error(&auxtrace_snapshot_trigger);
274 else
275 trigger_ready(&auxtrace_snapshot_trigger);
279 static int record__auxtrace_init(struct record *rec)
281 int err;
283 if (!rec->itr) {
284 rec->itr = auxtrace_record__init(rec->evlist, &err);
285 if (err)
286 return err;
289 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
290 rec->opts.auxtrace_snapshot_opts);
291 if (err)
292 return err;
294 return auxtrace_parse_filters(rec->evlist);
297 #else
299 static inline
300 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
301 struct perf_mmap *map __maybe_unused)
303 return 0;
306 static inline
307 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
311 static inline
312 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
314 return 0;
317 static int record__auxtrace_init(struct record *rec __maybe_unused)
319 return 0;
322 #endif
324 static int record__mmap_evlist(struct record *rec,
325 struct perf_evlist *evlist)
327 struct record_opts *opts = &rec->opts;
328 char msg[512];
330 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
331 opts->auxtrace_mmap_pages,
332 opts->auxtrace_snapshot_mode) < 0) {
333 if (errno == EPERM) {
334 pr_err("Permission error mapping pages.\n"
335 "Consider increasing "
336 "/proc/sys/kernel/perf_event_mlock_kb,\n"
337 "or try again with a smaller value of -m/--mmap_pages.\n"
338 "(current value: %u,%u)\n",
339 opts->mmap_pages, opts->auxtrace_mmap_pages);
340 return -errno;
341 } else {
342 pr_err("failed to mmap with %d (%s)\n", errno,
343 str_error_r(errno, msg, sizeof(msg)));
344 if (errno)
345 return -errno;
346 else
347 return -EINVAL;
350 return 0;
353 static int record__mmap(struct record *rec)
355 return record__mmap_evlist(rec, rec->evlist);
358 static int record__open(struct record *rec)
360 char msg[BUFSIZ];
361 struct perf_evsel *pos;
362 struct perf_evlist *evlist = rec->evlist;
363 struct perf_session *session = rec->session;
364 struct record_opts *opts = &rec->opts;
365 struct perf_evsel_config_term *err_term;
366 int rc = 0;
369 * For initial_delay we need to add a dummy event so that we can track
370 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
371 * real events, the ones asked by the user.
373 if (opts->initial_delay) {
374 if (perf_evlist__add_dummy(evlist))
375 return -ENOMEM;
377 pos = perf_evlist__first(evlist);
378 pos->tracking = 0;
379 pos = perf_evlist__last(evlist);
380 pos->tracking = 1;
381 pos->attr.enable_on_exec = 1;
384 perf_evlist__config(evlist, opts, &callchain_param);
386 evlist__for_each_entry(evlist, pos) {
387 try_again:
388 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
389 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
390 if (verbose > 0)
391 ui__warning("%s\n", msg);
392 goto try_again;
394 if ((errno == EINVAL || errno == EBADF) &&
395 pos->leader != pos &&
396 pos->weak_group) {
397 pos = perf_evlist__reset_weak_group(evlist, pos);
398 goto try_again;
400 rc = -errno;
401 perf_evsel__open_strerror(pos, &opts->target,
402 errno, msg, sizeof(msg));
403 ui__error("%s\n", msg);
404 goto out;
407 pos->supported = true;
410 if (perf_evlist__apply_filters(evlist, &pos)) {
411 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
412 pos->filter, perf_evsel__name(pos), errno,
413 str_error_r(errno, msg, sizeof(msg)));
414 rc = -1;
415 goto out;
418 if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
419 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
420 err_term->val.drv_cfg, perf_evsel__name(pos), errno,
421 str_error_r(errno, msg, sizeof(msg)));
422 rc = -1;
423 goto out;
426 rc = record__mmap(rec);
427 if (rc)
428 goto out;
430 session->evlist = evlist;
431 perf_session__set_id_hdr_size(session);
432 out:
433 return rc;
436 static int process_sample_event(struct perf_tool *tool,
437 union perf_event *event,
438 struct perf_sample *sample,
439 struct perf_evsel *evsel,
440 struct machine *machine)
442 struct record *rec = container_of(tool, struct record, tool);
444 if (rec->evlist->first_sample_time == 0)
445 rec->evlist->first_sample_time = sample->time;
447 rec->evlist->last_sample_time = sample->time;
449 if (rec->buildid_all)
450 return 0;
452 rec->samples++;
453 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
456 static int process_buildids(struct record *rec)
458 struct perf_data *data = &rec->data;
459 struct perf_session *session = rec->session;
461 if (data->size == 0)
462 return 0;
465 * During this process, it'll load kernel map and replace the
466 * dso->long_name to a real pathname it found. In this case
467 * we prefer the vmlinux path like
468 * /lib/modules/3.16.4/build/vmlinux
470 * rather than build-id path (in debug directory).
471 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
473 symbol_conf.ignore_vmlinux_buildid = true;
476 * If --buildid-all is given, it marks all DSO regardless of hits,
477 * so no need to process samples. But if timestamp_boundary is enabled,
478 * it still needs to walk on all samples to get the timestamps of
479 * first/last samples.
481 if (rec->buildid_all && !rec->timestamp_boundary)
482 rec->tool.sample = NULL;
484 return perf_session__process_events(session);
487 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
489 int err;
490 struct perf_tool *tool = data;
492 *As for guest kernel when processing subcommand record&report,
493 *we arrange module mmap prior to guest kernel mmap and trigger
494 *a preload dso because default guest module symbols are loaded
495 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
496 *method is used to avoid symbol missing when the first addr is
497 *in module instead of in guest kernel.
499 err = perf_event__synthesize_modules(tool, process_synthesized_event,
500 machine);
501 if (err < 0)
502 pr_err("Couldn't record guest kernel [%d]'s reference"
503 " relocation symbol.\n", machine->pid);
506 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
507 * have no _text sometimes.
509 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
510 machine);
511 if (err < 0)
512 pr_err("Couldn't record guest kernel [%d]'s reference"
513 " relocation symbol.\n", machine->pid);
516 static struct perf_event_header finished_round_event = {
517 .size = sizeof(struct perf_event_header),
518 .type = PERF_RECORD_FINISHED_ROUND,
521 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
522 bool overwrite)
524 u64 bytes_written = rec->bytes_written;
525 int i;
526 int rc = 0;
527 struct perf_mmap *maps;
529 if (!evlist)
530 return 0;
532 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
533 if (!maps)
534 return 0;
536 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
537 return 0;
539 for (i = 0; i < evlist->nr_mmaps; i++) {
540 struct perf_mmap *map = &maps[i];
542 if (map->base) {
543 if (perf_mmap__push(map, rec, record__pushfn) != 0) {
544 rc = -1;
545 goto out;
549 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
550 record__auxtrace_mmap_read(rec, map) != 0) {
551 rc = -1;
552 goto out;
557 * Mark the round finished in case we wrote
558 * at least one event.
560 if (bytes_written != rec->bytes_written)
561 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
563 if (overwrite)
564 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
565 out:
566 return rc;
569 static int record__mmap_read_all(struct record *rec)
571 int err;
573 err = record__mmap_read_evlist(rec, rec->evlist, false);
574 if (err)
575 return err;
577 return record__mmap_read_evlist(rec, rec->evlist, true);
580 static void record__init_features(struct record *rec)
582 struct perf_session *session = rec->session;
583 int feat;
585 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
586 perf_header__set_feat(&session->header, feat);
588 if (rec->no_buildid)
589 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
591 if (!have_tracepoints(&rec->evlist->entries))
592 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
594 if (!rec->opts.branch_stack)
595 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
597 if (!rec->opts.full_auxtrace)
598 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
600 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
601 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
603 perf_header__clear_feat(&session->header, HEADER_STAT);
606 static void
607 record__finish_output(struct record *rec)
609 struct perf_data *data = &rec->data;
610 int fd = perf_data__fd(data);
612 if (data->is_pipe)
613 return;
615 rec->session->header.data_size += rec->bytes_written;
616 data->size = lseek(perf_data__fd(data), 0, SEEK_CUR);
618 if (!rec->no_buildid) {
619 process_buildids(rec);
621 if (rec->buildid_all)
622 dsos__hit_all(rec->session);
624 perf_session__write_header(rec->session, rec->evlist, fd, true);
626 return;
629 static int record__synthesize_workload(struct record *rec, bool tail)
631 int err;
632 struct thread_map *thread_map;
634 if (rec->opts.tail_synthesize != tail)
635 return 0;
637 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
638 if (thread_map == NULL)
639 return -1;
641 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
642 process_synthesized_event,
643 &rec->session->machines.host,
644 rec->opts.sample_address,
645 rec->opts.proc_map_timeout);
646 thread_map__put(thread_map);
647 return err;
650 static int record__synthesize(struct record *rec, bool tail);
652 static int
653 record__switch_output(struct record *rec, bool at_exit)
655 struct perf_data *data = &rec->data;
656 int fd, err;
658 /* Same Size: "2015122520103046"*/
659 char timestamp[] = "InvalidTimestamp";
661 record__synthesize(rec, true);
662 if (target__none(&rec->opts.target))
663 record__synthesize_workload(rec, true);
665 rec->samples = 0;
666 record__finish_output(rec);
667 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
668 if (err) {
669 pr_err("Failed to get current timestamp\n");
670 return -EINVAL;
673 fd = perf_data__switch(data, timestamp,
674 rec->session->header.data_offset,
675 at_exit);
676 if (fd >= 0 && !at_exit) {
677 rec->bytes_written = 0;
678 rec->session->header.data_size = 0;
681 if (!quiet)
682 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
683 data->file.path, timestamp);
685 /* Output tracking events */
686 if (!at_exit) {
687 record__synthesize(rec, false);
690 * In 'perf record --switch-output' without -a,
691 * record__synthesize() in record__switch_output() won't
692 * generate tracking events because there's no thread_map
693 * in evlist. Which causes newly created perf.data doesn't
694 * contain map and comm information.
695 * Create a fake thread_map and directly call
696 * perf_event__synthesize_thread_map() for those events.
698 if (target__none(&rec->opts.target))
699 record__synthesize_workload(rec, false);
701 return fd;
704 static volatile int workload_exec_errno;
707 * perf_evlist__prepare_workload will send a SIGUSR1
708 * if the fork fails, since we asked by setting its
709 * want_signal to true.
711 static void workload_exec_failed_signal(int signo __maybe_unused,
712 siginfo_t *info,
713 void *ucontext __maybe_unused)
715 workload_exec_errno = info->si_value.sival_int;
716 done = 1;
717 child_finished = 1;
720 static void snapshot_sig_handler(int sig);
721 static void alarm_sig_handler(int sig);
723 int __weak
724 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
725 struct perf_tool *tool __maybe_unused,
726 perf_event__handler_t process __maybe_unused,
727 struct machine *machine __maybe_unused)
729 return 0;
732 static const struct perf_event_mmap_page *
733 perf_evlist__pick_pc(struct perf_evlist *evlist)
735 if (evlist) {
736 if (evlist->mmap && evlist->mmap[0].base)
737 return evlist->mmap[0].base;
738 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
739 return evlist->overwrite_mmap[0].base;
741 return NULL;
744 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
746 const struct perf_event_mmap_page *pc;
748 pc = perf_evlist__pick_pc(rec->evlist);
749 if (pc)
750 return pc;
751 return NULL;
754 static int record__synthesize(struct record *rec, bool tail)
756 struct perf_session *session = rec->session;
757 struct machine *machine = &session->machines.host;
758 struct perf_data *data = &rec->data;
759 struct record_opts *opts = &rec->opts;
760 struct perf_tool *tool = &rec->tool;
761 int fd = perf_data__fd(data);
762 int err = 0;
764 if (rec->opts.tail_synthesize != tail)
765 return 0;
767 if (data->is_pipe) {
769 * We need to synthesize events first, because some
770 * features works on top of them (on report side).
772 err = perf_event__synthesize_attrs(tool, rec->evlist,
773 process_synthesized_event);
774 if (err < 0) {
775 pr_err("Couldn't synthesize attrs.\n");
776 goto out;
779 err = perf_event__synthesize_features(tool, session, rec->evlist,
780 process_synthesized_event);
781 if (err < 0) {
782 pr_err("Couldn't synthesize features.\n");
783 return err;
786 if (have_tracepoints(&rec->evlist->entries)) {
788 * FIXME err <= 0 here actually means that
789 * there were no tracepoints so its not really
790 * an error, just that we don't need to
791 * synthesize anything. We really have to
792 * return this more properly and also
793 * propagate errors that now are calling die()
795 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
796 process_synthesized_event);
797 if (err <= 0) {
798 pr_err("Couldn't record tracing data.\n");
799 goto out;
801 rec->bytes_written += err;
805 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
806 process_synthesized_event, machine);
807 if (err)
808 goto out;
810 if (rec->opts.full_auxtrace) {
811 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
812 session, process_synthesized_event);
813 if (err)
814 goto out;
817 if (!perf_evlist__exclude_kernel(rec->evlist)) {
818 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
819 machine);
820 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
821 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
822 "Check /proc/kallsyms permission or run as root.\n");
824 err = perf_event__synthesize_modules(tool, process_synthesized_event,
825 machine);
826 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
827 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
828 "Check /proc/modules permission or run as root.\n");
831 if (perf_guest) {
832 machines__process_guests(&session->machines,
833 perf_event__synthesize_guest_os, tool);
836 err = perf_event__synthesize_extra_attr(&rec->tool,
837 rec->evlist,
838 process_synthesized_event,
839 data->is_pipe);
840 if (err)
841 goto out;
843 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
844 process_synthesized_event,
845 NULL);
846 if (err < 0) {
847 pr_err("Couldn't synthesize thread map.\n");
848 return err;
851 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
852 process_synthesized_event, NULL);
853 if (err < 0) {
854 pr_err("Couldn't synthesize cpu map.\n");
855 return err;
858 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
859 process_synthesized_event, opts->sample_address,
860 opts->proc_map_timeout, 1);
861 out:
862 return err;
865 static int __cmd_record(struct record *rec, int argc, const char **argv)
867 int err;
868 int status = 0;
869 unsigned long waking = 0;
870 const bool forks = argc > 0;
871 struct perf_tool *tool = &rec->tool;
872 struct record_opts *opts = &rec->opts;
873 struct perf_data *data = &rec->data;
874 struct perf_session *session;
875 bool disabled = false, draining = false;
876 int fd;
878 atexit(record__sig_exit);
879 signal(SIGCHLD, sig_handler);
880 signal(SIGINT, sig_handler);
881 signal(SIGTERM, sig_handler);
882 signal(SIGSEGV, sigsegv_handler);
884 if (rec->opts.record_namespaces)
885 tool->namespace_events = true;
887 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
888 signal(SIGUSR2, snapshot_sig_handler);
889 if (rec->opts.auxtrace_snapshot_mode)
890 trigger_on(&auxtrace_snapshot_trigger);
891 if (rec->switch_output.enabled)
892 trigger_on(&switch_output_trigger);
893 } else {
894 signal(SIGUSR2, SIG_IGN);
897 session = perf_session__new(data, false, tool);
898 if (session == NULL) {
899 pr_err("Perf session creation failed.\n");
900 return -1;
903 fd = perf_data__fd(data);
904 rec->session = session;
906 record__init_features(rec);
908 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
909 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
911 if (forks) {
912 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
913 argv, data->is_pipe,
914 workload_exec_failed_signal);
915 if (err < 0) {
916 pr_err("Couldn't run the workload!\n");
917 status = err;
918 goto out_delete_session;
923 * If we have just single event and are sending data
924 * through pipe, we need to force the ids allocation,
925 * because we synthesize event name through the pipe
926 * and need the id for that.
928 if (data->is_pipe && rec->evlist->nr_entries == 1)
929 rec->opts.sample_id = true;
931 if (record__open(rec) != 0) {
932 err = -1;
933 goto out_child;
936 err = bpf__apply_obj_config();
937 if (err) {
938 char errbuf[BUFSIZ];
940 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
941 pr_err("ERROR: Apply config to BPF failed: %s\n",
942 errbuf);
943 goto out_child;
947 * Normally perf_session__new would do this, but it doesn't have the
948 * evlist.
950 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
951 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
952 rec->tool.ordered_events = false;
955 if (!rec->evlist->nr_groups)
956 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
958 if (data->is_pipe) {
959 err = perf_header__write_pipe(fd);
960 if (err < 0)
961 goto out_child;
962 } else {
963 err = perf_session__write_header(session, rec->evlist, fd, false);
964 if (err < 0)
965 goto out_child;
968 if (!rec->no_buildid
969 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
970 pr_err("Couldn't generate buildids. "
971 "Use --no-buildid to profile anyway.\n");
972 err = -1;
973 goto out_child;
976 err = record__synthesize(rec, false);
977 if (err < 0)
978 goto out_child;
980 if (rec->realtime_prio) {
981 struct sched_param param;
983 param.sched_priority = rec->realtime_prio;
984 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
985 pr_err("Could not set realtime priority.\n");
986 err = -1;
987 goto out_child;
992 * When perf is starting the traced process, all the events
993 * (apart from group members) have enable_on_exec=1 set,
994 * so don't spoil it by prematurely enabling them.
996 if (!target__none(&opts->target) && !opts->initial_delay)
997 perf_evlist__enable(rec->evlist);
1000 * Let the child rip
1002 if (forks) {
1003 struct machine *machine = &session->machines.host;
1004 union perf_event *event;
1005 pid_t tgid;
1007 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1008 if (event == NULL) {
1009 err = -ENOMEM;
1010 goto out_child;
1014 * Some H/W events are generated before COMM event
1015 * which is emitted during exec(), so perf script
1016 * cannot see a correct process name for those events.
1017 * Synthesize COMM event to prevent it.
1019 tgid = perf_event__synthesize_comm(tool, event,
1020 rec->evlist->workload.pid,
1021 process_synthesized_event,
1022 machine);
1023 free(event);
1025 if (tgid == -1)
1026 goto out_child;
1028 event = malloc(sizeof(event->namespaces) +
1029 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1030 machine->id_hdr_size);
1031 if (event == NULL) {
1032 err = -ENOMEM;
1033 goto out_child;
1037 * Synthesize NAMESPACES event for the command specified.
1039 perf_event__synthesize_namespaces(tool, event,
1040 rec->evlist->workload.pid,
1041 tgid, process_synthesized_event,
1042 machine);
1043 free(event);
1045 perf_evlist__start_workload(rec->evlist);
1048 if (opts->initial_delay) {
1049 usleep(opts->initial_delay * USEC_PER_MSEC);
1050 perf_evlist__enable(rec->evlist);
1053 trigger_ready(&auxtrace_snapshot_trigger);
1054 trigger_ready(&switch_output_trigger);
1055 perf_hooks__invoke_record_start();
1056 for (;;) {
1057 unsigned long long hits = rec->samples;
1060 * rec->evlist->bkw_mmap_state is possible to be
1061 * BKW_MMAP_EMPTY here: when done == true and
1062 * hits != rec->samples in previous round.
1064 * perf_evlist__toggle_bkw_mmap ensure we never
1065 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1067 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1068 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1070 if (record__mmap_read_all(rec) < 0) {
1071 trigger_error(&auxtrace_snapshot_trigger);
1072 trigger_error(&switch_output_trigger);
1073 err = -1;
1074 goto out_child;
1077 if (auxtrace_record__snapshot_started) {
1078 auxtrace_record__snapshot_started = 0;
1079 if (!trigger_is_error(&auxtrace_snapshot_trigger))
1080 record__read_auxtrace_snapshot(rec);
1081 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1082 pr_err("AUX area tracing snapshot failed\n");
1083 err = -1;
1084 goto out_child;
1088 if (trigger_is_hit(&switch_output_trigger)) {
1090 * If switch_output_trigger is hit, the data in
1091 * overwritable ring buffer should have been collected,
1092 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1094 * If SIGUSR2 raise after or during record__mmap_read_all(),
1095 * record__mmap_read_all() didn't collect data from
1096 * overwritable ring buffer. Read again.
1098 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1099 continue;
1100 trigger_ready(&switch_output_trigger);
1103 * Reenable events in overwrite ring buffer after
1104 * record__mmap_read_all(): we should have collected
1105 * data from it.
1107 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1109 if (!quiet)
1110 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1111 waking);
1112 waking = 0;
1113 fd = record__switch_output(rec, false);
1114 if (fd < 0) {
1115 pr_err("Failed to switch to new file\n");
1116 trigger_error(&switch_output_trigger);
1117 err = fd;
1118 goto out_child;
1121 /* re-arm the alarm */
1122 if (rec->switch_output.time)
1123 alarm(rec->switch_output.time);
1126 if (hits == rec->samples) {
1127 if (done || draining)
1128 break;
1129 err = perf_evlist__poll(rec->evlist, -1);
1131 * Propagate error, only if there's any. Ignore positive
1132 * number of returned events and interrupt error.
1134 if (err > 0 || (err < 0 && errno == EINTR))
1135 err = 0;
1136 waking++;
1138 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1139 draining = true;
1143 * When perf is starting the traced process, at the end events
1144 * die with the process and we wait for that. Thus no need to
1145 * disable events in this case.
1147 if (done && !disabled && !target__none(&opts->target)) {
1148 trigger_off(&auxtrace_snapshot_trigger);
1149 perf_evlist__disable(rec->evlist);
1150 disabled = true;
1153 trigger_off(&auxtrace_snapshot_trigger);
1154 trigger_off(&switch_output_trigger);
1156 if (forks && workload_exec_errno) {
1157 char msg[STRERR_BUFSIZE];
1158 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1159 pr_err("Workload failed: %s\n", emsg);
1160 err = -1;
1161 goto out_child;
1164 if (!quiet)
1165 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1167 if (target__none(&rec->opts.target))
1168 record__synthesize_workload(rec, true);
1170 out_child:
1171 if (forks) {
1172 int exit_status;
1174 if (!child_finished)
1175 kill(rec->evlist->workload.pid, SIGTERM);
1177 wait(&exit_status);
1179 if (err < 0)
1180 status = err;
1181 else if (WIFEXITED(exit_status))
1182 status = WEXITSTATUS(exit_status);
1183 else if (WIFSIGNALED(exit_status))
1184 signr = WTERMSIG(exit_status);
1185 } else
1186 status = err;
1188 record__synthesize(rec, true);
1189 /* this will be recalculated during process_buildids() */
1190 rec->samples = 0;
1192 if (!err) {
1193 if (!rec->timestamp_filename) {
1194 record__finish_output(rec);
1195 } else {
1196 fd = record__switch_output(rec, true);
1197 if (fd < 0) {
1198 status = fd;
1199 goto out_delete_session;
1204 perf_hooks__invoke_record_end();
1206 if (!err && !quiet) {
1207 char samples[128];
1208 const char *postfix = rec->timestamp_filename ?
1209 ".<timestamp>" : "";
1211 if (rec->samples && !rec->opts.full_auxtrace)
1212 scnprintf(samples, sizeof(samples),
1213 " (%" PRIu64 " samples)", rec->samples);
1214 else
1215 samples[0] = '\0';
1217 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1218 perf_data__size(data) / 1024.0 / 1024.0,
1219 data->file.path, postfix, samples);
1222 out_delete_session:
1223 perf_session__delete(session);
1224 return status;
1227 static void callchain_debug(struct callchain_param *callchain)
1229 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1231 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1233 if (callchain->record_mode == CALLCHAIN_DWARF)
1234 pr_debug("callchain: stack dump size %d\n",
1235 callchain->dump_size);
1238 int record_opts__parse_callchain(struct record_opts *record,
1239 struct callchain_param *callchain,
1240 const char *arg, bool unset)
1242 int ret;
1243 callchain->enabled = !unset;
1245 /* --no-call-graph */
1246 if (unset) {
1247 callchain->record_mode = CALLCHAIN_NONE;
1248 pr_debug("callchain: disabled\n");
1249 return 0;
1252 ret = parse_callchain_record_opt(arg, callchain);
1253 if (!ret) {
1254 /* Enable data address sampling for DWARF unwind. */
1255 if (callchain->record_mode == CALLCHAIN_DWARF)
1256 record->sample_address = true;
1257 callchain_debug(callchain);
1260 return ret;
1263 int record_parse_callchain_opt(const struct option *opt,
1264 const char *arg,
1265 int unset)
1267 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1270 int record_callchain_opt(const struct option *opt,
1271 const char *arg __maybe_unused,
1272 int unset __maybe_unused)
1274 struct callchain_param *callchain = opt->value;
1276 callchain->enabled = true;
1278 if (callchain->record_mode == CALLCHAIN_NONE)
1279 callchain->record_mode = CALLCHAIN_FP;
1281 callchain_debug(callchain);
1282 return 0;
1285 static int perf_record_config(const char *var, const char *value, void *cb)
1287 struct record *rec = cb;
1289 if (!strcmp(var, "record.build-id")) {
1290 if (!strcmp(value, "cache"))
1291 rec->no_buildid_cache = false;
1292 else if (!strcmp(value, "no-cache"))
1293 rec->no_buildid_cache = true;
1294 else if (!strcmp(value, "skip"))
1295 rec->no_buildid = true;
1296 else
1297 return -1;
1298 return 0;
1300 if (!strcmp(var, "record.call-graph")) {
1301 var = "call-graph.record-mode";
1302 return perf_default_config(var, value, cb);
1305 return 0;
1308 struct clockid_map {
1309 const char *name;
1310 int clockid;
1313 #define CLOCKID_MAP(n, c) \
1314 { .name = n, .clockid = (c), }
1316 #define CLOCKID_END { .name = NULL, }
1320 * Add the missing ones, we need to build on many distros...
1322 #ifndef CLOCK_MONOTONIC_RAW
1323 #define CLOCK_MONOTONIC_RAW 4
1324 #endif
1325 #ifndef CLOCK_BOOTTIME
1326 #define CLOCK_BOOTTIME 7
1327 #endif
1328 #ifndef CLOCK_TAI
1329 #define CLOCK_TAI 11
1330 #endif
1332 static const struct clockid_map clockids[] = {
1333 /* available for all events, NMI safe */
1334 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1335 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1337 /* available for some events */
1338 CLOCKID_MAP("realtime", CLOCK_REALTIME),
1339 CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1340 CLOCKID_MAP("tai", CLOCK_TAI),
1342 /* available for the lazy */
1343 CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1344 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1345 CLOCKID_MAP("real", CLOCK_REALTIME),
1346 CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1348 CLOCKID_END,
1351 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
1353 struct timespec res;
1355 *res_ns = 0;
1356 if (!clock_getres(clk_id, &res))
1357 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
1358 else
1359 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1361 return 0;
1364 static int parse_clockid(const struct option *opt, const char *str, int unset)
1366 struct record_opts *opts = (struct record_opts *)opt->value;
1367 const struct clockid_map *cm;
1368 const char *ostr = str;
1370 if (unset) {
1371 opts->use_clockid = 0;
1372 return 0;
1375 /* no arg passed */
1376 if (!str)
1377 return 0;
1379 /* no setting it twice */
1380 if (opts->use_clockid)
1381 return -1;
1383 opts->use_clockid = true;
1385 /* if its a number, we're done */
1386 if (sscanf(str, "%d", &opts->clockid) == 1)
1387 return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
1389 /* allow a "CLOCK_" prefix to the name */
1390 if (!strncasecmp(str, "CLOCK_", 6))
1391 str += 6;
1393 for (cm = clockids; cm->name; cm++) {
1394 if (!strcasecmp(str, cm->name)) {
1395 opts->clockid = cm->clockid;
1396 return get_clockid_res(opts->clockid,
1397 &opts->clockid_res_ns);
1401 opts->use_clockid = false;
1402 ui__warning("unknown clockid %s, check man page\n", ostr);
1403 return -1;
1406 static int record__parse_mmap_pages(const struct option *opt,
1407 const char *str,
1408 int unset __maybe_unused)
1410 struct record_opts *opts = opt->value;
1411 char *s, *p;
1412 unsigned int mmap_pages;
1413 int ret;
1415 if (!str)
1416 return -EINVAL;
1418 s = strdup(str);
1419 if (!s)
1420 return -ENOMEM;
1422 p = strchr(s, ',');
1423 if (p)
1424 *p = '\0';
1426 if (*s) {
1427 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1428 if (ret)
1429 goto out_free;
1430 opts->mmap_pages = mmap_pages;
1433 if (!p) {
1434 ret = 0;
1435 goto out_free;
1438 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1439 if (ret)
1440 goto out_free;
1442 opts->auxtrace_mmap_pages = mmap_pages;
1444 out_free:
1445 free(s);
1446 return ret;
1449 static void switch_output_size_warn(struct record *rec)
1451 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1452 struct switch_output *s = &rec->switch_output;
1454 wakeup_size /= 2;
1456 if (s->size < wakeup_size) {
1457 char buf[100];
1459 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1460 pr_warning("WARNING: switch-output data size lower than "
1461 "wakeup kernel buffer size (%s) "
1462 "expect bigger perf.data sizes\n", buf);
1466 static int switch_output_setup(struct record *rec)
1468 struct switch_output *s = &rec->switch_output;
1469 static struct parse_tag tags_size[] = {
1470 { .tag = 'B', .mult = 1 },
1471 { .tag = 'K', .mult = 1 << 10 },
1472 { .tag = 'M', .mult = 1 << 20 },
1473 { .tag = 'G', .mult = 1 << 30 },
1474 { .tag = 0 },
1476 static struct parse_tag tags_time[] = {
1477 { .tag = 's', .mult = 1 },
1478 { .tag = 'm', .mult = 60 },
1479 { .tag = 'h', .mult = 60*60 },
1480 { .tag = 'd', .mult = 60*60*24 },
1481 { .tag = 0 },
1483 unsigned long val;
1485 if (!s->set)
1486 return 0;
1488 if (!strcmp(s->str, "signal")) {
1489 s->signal = true;
1490 pr_debug("switch-output with SIGUSR2 signal\n");
1491 goto enabled;
1494 val = parse_tag_value(s->str, tags_size);
1495 if (val != (unsigned long) -1) {
1496 s->size = val;
1497 pr_debug("switch-output with %s size threshold\n", s->str);
1498 goto enabled;
1501 val = parse_tag_value(s->str, tags_time);
1502 if (val != (unsigned long) -1) {
1503 s->time = val;
1504 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1505 s->str, s->time);
1506 goto enabled;
1509 return -1;
1511 enabled:
1512 rec->timestamp_filename = true;
1513 s->enabled = true;
1515 if (s->size && !rec->opts.no_buffering)
1516 switch_output_size_warn(rec);
1518 return 0;
1521 static const char * const __record_usage[] = {
1522 "perf record [<options>] [<command>]",
1523 "perf record [<options>] -- <command> [<options>]",
1524 NULL
1526 const char * const *record_usage = __record_usage;
1529 * XXX Ideally would be local to cmd_record() and passed to a record__new
1530 * because we need to have access to it in record__exit, that is called
1531 * after cmd_record() exits, but since record_options need to be accessible to
1532 * builtin-script, leave it here.
1534 * At least we don't ouch it in all the other functions here directly.
1536 * Just say no to tons of global variables, sigh.
1538 static struct record record = {
1539 .opts = {
1540 .sample_time = true,
1541 .mmap_pages = UINT_MAX,
1542 .user_freq = UINT_MAX,
1543 .user_interval = ULLONG_MAX,
1544 .freq = 4000,
1545 .target = {
1546 .uses_mmap = true,
1547 .default_per_cpu = true,
1549 .proc_map_timeout = 500,
1551 .tool = {
1552 .sample = process_sample_event,
1553 .fork = perf_event__process_fork,
1554 .exit = perf_event__process_exit,
1555 .comm = perf_event__process_comm,
1556 .namespaces = perf_event__process_namespaces,
1557 .mmap = perf_event__process_mmap,
1558 .mmap2 = perf_event__process_mmap2,
1559 .ordered_events = true,
1563 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1564 "\n\t\t\t\tDefault: fp";
1566 static bool dry_run;
1569 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1570 * with it and switch to use the library functions in perf_evlist that came
1571 * from builtin-record.c, i.e. use record_opts,
1572 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1573 * using pipes, etc.
1575 static struct option __record_options[] = {
1576 OPT_CALLBACK('e', "event", &record.evlist, "event",
1577 "event selector. use 'perf list' to list available events",
1578 parse_events_option),
1579 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1580 "event filter", parse_filter),
1581 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1582 NULL, "don't record events from perf itself",
1583 exclude_perf),
1584 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1585 "record events on existing process id"),
1586 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1587 "record events on existing thread id"),
1588 OPT_INTEGER('r', "realtime", &record.realtime_prio,
1589 "collect data with this RT SCHED_FIFO priority"),
1590 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1591 "collect data without buffering"),
1592 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1593 "collect raw sample records from all opened counters"),
1594 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1595 "system-wide collection from all CPUs"),
1596 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1597 "list of cpus to monitor"),
1598 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1599 OPT_STRING('o', "output", &record.data.file.path, "file",
1600 "output file name"),
1601 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1602 &record.opts.no_inherit_set,
1603 "child tasks do not inherit counters"),
1604 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1605 "synthesize non-sample events at the end of output"),
1606 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1607 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1608 "Fail if the specified frequency can't be used"),
1609 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
1610 "profile at this frequency",
1611 record__parse_freq),
1612 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1613 "number of mmap data pages and AUX area tracing mmap pages",
1614 record__parse_mmap_pages),
1615 OPT_BOOLEAN(0, "group", &record.opts.group,
1616 "put the counters into a counter group"),
1617 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1618 NULL, "enables call-graph recording" ,
1619 &record_callchain_opt),
1620 OPT_CALLBACK(0, "call-graph", &record.opts,
1621 "record_mode[,record_size]", record_callchain_help,
1622 &record_parse_callchain_opt),
1623 OPT_INCR('v', "verbose", &verbose,
1624 "be more verbose (show counter open errors, etc)"),
1625 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1626 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1627 "per thread counts"),
1628 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1629 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1630 "Record the sample physical addresses"),
1631 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1632 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1633 &record.opts.sample_time_set,
1634 "Record the sample timestamps"),
1635 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1636 "Record the sample period"),
1637 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1638 "don't sample"),
1639 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1640 &record.no_buildid_cache_set,
1641 "do not update the buildid cache"),
1642 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1643 &record.no_buildid_set,
1644 "do not collect buildids in perf.data"),
1645 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1646 "monitor event in cgroup name only",
1647 parse_cgroups),
1648 OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1649 "ms to wait before starting measurement after program start"),
1650 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1651 "user to profile"),
1653 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1654 "branch any", "sample any taken branches",
1655 parse_branch_stack),
1657 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1658 "branch filter mask", "branch stack filter modes",
1659 parse_branch_stack),
1660 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1661 "sample by weight (on special events only)"),
1662 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1663 "sample transaction flags (special events only)"),
1664 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1665 "use per-thread mmaps"),
1666 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1667 "sample selected machine registers on interrupt,"
1668 " use -I ? to list register names", parse_regs),
1669 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1670 "sample selected machine registers on interrupt,"
1671 " use -I ? to list register names", parse_regs),
1672 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1673 "Record running/enabled time of read (:S) events"),
1674 OPT_CALLBACK('k', "clockid", &record.opts,
1675 "clockid", "clockid to use for events, see clock_gettime()",
1676 parse_clockid),
1677 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1678 "opts", "AUX area tracing Snapshot Mode", ""),
1679 OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1680 "per thread proc mmap processing timeout in ms"),
1681 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1682 "Record namespaces events"),
1683 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1684 "Record context switch events"),
1685 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1686 "Configure all used events to run in kernel space.",
1687 PARSE_OPT_EXCLUSIVE),
1688 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1689 "Configure all used events to run in user space.",
1690 PARSE_OPT_EXCLUSIVE),
1691 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1692 "clang binary to use for compiling BPF scriptlets"),
1693 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1694 "options passed to clang when compiling BPF scriptlets"),
1695 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1696 "file", "vmlinux pathname"),
1697 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1698 "Record build-id of all DSOs regardless of hits"),
1699 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1700 "append timestamp to output filename"),
1701 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
1702 "Record timestamp boundary (time of first/last samples)"),
1703 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1704 &record.switch_output.set, "signal,size,time",
1705 "Switch output when receive SIGUSR2 or cross size,time threshold",
1706 "signal"),
1707 OPT_BOOLEAN(0, "dry-run", &dry_run,
1708 "Parse options then exit"),
1709 OPT_END()
1712 struct option *record_options = __record_options;
1714 int cmd_record(int argc, const char **argv)
1716 int err;
1717 struct record *rec = &record;
1718 char errbuf[BUFSIZ];
1720 setlocale(LC_ALL, "");
1722 #ifndef HAVE_LIBBPF_SUPPORT
1723 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1724 set_nobuild('\0', "clang-path", true);
1725 set_nobuild('\0', "clang-opt", true);
1726 # undef set_nobuild
1727 #endif
1729 #ifndef HAVE_BPF_PROLOGUE
1730 # if !defined (HAVE_DWARF_SUPPORT)
1731 # define REASON "NO_DWARF=1"
1732 # elif !defined (HAVE_LIBBPF_SUPPORT)
1733 # define REASON "NO_LIBBPF=1"
1734 # else
1735 # define REASON "this architecture doesn't support BPF prologue"
1736 # endif
1737 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1738 set_nobuild('\0', "vmlinux", true);
1739 # undef set_nobuild
1740 # undef REASON
1741 #endif
1743 rec->evlist = perf_evlist__new();
1744 if (rec->evlist == NULL)
1745 return -ENOMEM;
1747 err = perf_config(perf_record_config, rec);
1748 if (err)
1749 return err;
1751 argc = parse_options(argc, argv, record_options, record_usage,
1752 PARSE_OPT_STOP_AT_NON_OPTION);
1753 if (quiet)
1754 perf_quiet_option();
1756 /* Make system wide (-a) the default target. */
1757 if (!argc && target__none(&rec->opts.target))
1758 rec->opts.target.system_wide = true;
1760 if (nr_cgroups && !rec->opts.target.system_wide) {
1761 usage_with_options_msg(record_usage, record_options,
1762 "cgroup monitoring only available in system-wide mode");
1765 if (rec->opts.record_switch_events &&
1766 !perf_can_record_switch_events()) {
1767 ui__error("kernel does not support recording context switch events\n");
1768 parse_options_usage(record_usage, record_options, "switch-events", 0);
1769 return -EINVAL;
1772 if (switch_output_setup(rec)) {
1773 parse_options_usage(record_usage, record_options, "switch-output", 0);
1774 return -EINVAL;
1777 if (rec->switch_output.time) {
1778 signal(SIGALRM, alarm_sig_handler);
1779 alarm(rec->switch_output.time);
1783 * Allow aliases to facilitate the lookup of symbols for address
1784 * filters. Refer to auxtrace_parse_filters().
1786 symbol_conf.allow_aliases = true;
1788 symbol__init(NULL);
1790 err = record__auxtrace_init(rec);
1791 if (err)
1792 goto out;
1794 if (dry_run)
1795 goto out;
1797 err = bpf__setup_stdout(rec->evlist);
1798 if (err) {
1799 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1800 pr_err("ERROR: Setup BPF stdout failed: %s\n",
1801 errbuf);
1802 goto out;
1805 err = -ENOMEM;
1807 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
1808 pr_warning(
1809 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1810 "check /proc/sys/kernel/kptr_restrict.\n\n"
1811 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1812 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1813 "Samples in kernel modules won't be resolved at all.\n\n"
1814 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1815 "even with a suitable vmlinux or kallsyms file.\n\n");
1817 if (rec->no_buildid_cache || rec->no_buildid) {
1818 disable_buildid_cache();
1819 } else if (rec->switch_output.enabled) {
1821 * In 'perf record --switch-output', disable buildid
1822 * generation by default to reduce data file switching
1823 * overhead. Still generate buildid if they are required
1824 * explicitly using
1826 * perf record --switch-output --no-no-buildid \
1827 * --no-no-buildid-cache
1829 * Following code equals to:
1831 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1832 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1833 * disable_buildid_cache();
1835 bool disable = true;
1837 if (rec->no_buildid_set && !rec->no_buildid)
1838 disable = false;
1839 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1840 disable = false;
1841 if (disable) {
1842 rec->no_buildid = true;
1843 rec->no_buildid_cache = true;
1844 disable_buildid_cache();
1848 if (record.opts.overwrite)
1849 record.opts.tail_synthesize = true;
1851 if (rec->evlist->nr_entries == 0 &&
1852 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
1853 pr_err("Not enough memory for event selector list\n");
1854 goto out;
1857 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1858 rec->opts.no_inherit = true;
1860 err = target__validate(&rec->opts.target);
1861 if (err) {
1862 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1863 ui__warning("%s\n", errbuf);
1866 err = target__parse_uid(&rec->opts.target);
1867 if (err) {
1868 int saved_errno = errno;
1870 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1871 ui__error("%s", errbuf);
1873 err = -saved_errno;
1874 goto out;
1877 /* Enable ignoring missing threads when -u/-p option is defined. */
1878 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
1880 err = -ENOMEM;
1881 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1882 usage_with_options(record_usage, record_options);
1884 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1885 if (err)
1886 goto out;
1889 * We take all buildids when the file contains
1890 * AUX area tracing data because we do not decode the
1891 * trace because it would take too long.
1893 if (rec->opts.full_auxtrace)
1894 rec->buildid_all = true;
1896 if (record_opts__config(&rec->opts)) {
1897 err = -EINVAL;
1898 goto out;
1901 err = __cmd_record(&record, argc, argv);
1902 out:
1903 perf_evlist__delete(rec->evlist);
1904 symbol__exit();
1905 auxtrace_record__free(rec->itr);
1906 return err;
1909 static void snapshot_sig_handler(int sig __maybe_unused)
1911 struct record *rec = &record;
1913 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1914 trigger_hit(&auxtrace_snapshot_trigger);
1915 auxtrace_record__snapshot_started = 1;
1916 if (auxtrace_record__snapshot_start(record.itr))
1917 trigger_error(&auxtrace_snapshot_trigger);
1920 if (switch_output_signal(rec))
1921 trigger_hit(&switch_output_trigger);
1924 static void alarm_sig_handler(int sig __maybe_unused)
1926 struct record *rec = &record;
1928 if (switch_output_time(rec))
1929 trigger_hit(&switch_output_trigger);