ARC: [*defconfig] Reenable soft lock-up detector
[linux/fpc-iii.git] / tools / perf / builtin-record.c
blob56f8142ff97f1b6a1a7bb9433c1967f758d47916
1 /*
2 * builtin-record.c
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
7 */
8 #include "builtin.h"
10 #include "perf.h"
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 #include "util/config.h"
18 #include "util/callchain.h"
19 #include "util/cgroup.h"
20 #include "util/header.h"
21 #include "util/event.h"
22 #include "util/evlist.h"
23 #include "util/evsel.h"
24 #include "util/debug.h"
25 #include "util/drv_configs.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "util/time-utils.h"
42 #include "util/units.h"
43 #include "asm/bug.h"
45 #include <errno.h>
46 #include <inttypes.h>
47 #include <poll.h>
48 #include <unistd.h>
49 #include <sched.h>
50 #include <signal.h>
51 #include <sys/mman.h>
52 #include <sys/wait.h>
53 #include <asm/bug.h>
54 #include <linux/time64.h>
56 struct switch_output {
57 bool enabled;
58 bool signal;
59 unsigned long size;
60 unsigned long time;
61 const char *str;
62 bool set;
65 struct record {
66 struct perf_tool tool;
67 struct record_opts opts;
68 u64 bytes_written;
69 struct perf_data_file file;
70 struct auxtrace_record *itr;
71 struct perf_evlist *evlist;
72 struct perf_session *session;
73 const char *progname;
74 int realtime_prio;
75 bool no_buildid;
76 bool no_buildid_set;
77 bool no_buildid_cache;
78 bool no_buildid_cache_set;
79 bool buildid_all;
80 bool timestamp_filename;
81 struct switch_output switch_output;
82 unsigned long long samples;
85 static volatile int auxtrace_record__snapshot_started;
86 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
87 static DEFINE_TRIGGER(switch_output_trigger);
89 static bool switch_output_signal(struct record *rec)
91 return rec->switch_output.signal &&
92 trigger_is_ready(&switch_output_trigger);
95 static bool switch_output_size(struct record *rec)
97 return rec->switch_output.size &&
98 trigger_is_ready(&switch_output_trigger) &&
99 (rec->bytes_written >= rec->switch_output.size);
102 static bool switch_output_time(struct record *rec)
104 return rec->switch_output.time &&
105 trigger_is_ready(&switch_output_trigger);
108 static int record__write(struct record *rec, void *bf, size_t size)
110 if (perf_data_file__write(rec->session->file, bf, size) < 0) {
111 pr_err("failed to write perf data, error: %m\n");
112 return -1;
115 rec->bytes_written += size;
117 if (switch_output_size(rec))
118 trigger_hit(&switch_output_trigger);
120 return 0;
123 static int process_synthesized_event(struct perf_tool *tool,
124 union perf_event *event,
125 struct perf_sample *sample __maybe_unused,
126 struct machine *machine __maybe_unused)
128 struct record *rec = container_of(tool, struct record, tool);
129 return record__write(rec, event, event->header.size);
132 static int
133 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
135 struct perf_event_header *pheader;
136 u64 evt_head = head;
137 int size = mask + 1;
139 pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
140 pheader = (struct perf_event_header *)(buf + (head & mask));
141 *start = head;
142 while (true) {
143 if (evt_head - head >= (unsigned int)size) {
144 pr_debug("Finished reading backward ring buffer: rewind\n");
145 if (evt_head - head > (unsigned int)size)
146 evt_head -= pheader->size;
147 *end = evt_head;
148 return 0;
151 pheader = (struct perf_event_header *)(buf + (evt_head & mask));
153 if (pheader->size == 0) {
154 pr_debug("Finished reading backward ring buffer: get start\n");
155 *end = evt_head;
156 return 0;
159 evt_head += pheader->size;
160 pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
162 WARN_ONCE(1, "Shouldn't get here\n");
163 return -1;
166 static int
167 rb_find_range(void *data, int mask, u64 head, u64 old,
168 u64 *start, u64 *end, bool backward)
170 if (!backward) {
171 *start = old;
172 *end = head;
173 return 0;
176 return backward_rb_find_range(data, mask, head, start, end);
179 static int
180 record__mmap_read(struct record *rec, struct perf_mmap *md,
181 bool overwrite, bool backward)
183 u64 head = perf_mmap__read_head(md);
184 u64 old = md->prev;
185 u64 end = head, start = old;
186 unsigned char *data = md->base + page_size;
187 unsigned long size;
188 void *buf;
189 int rc = 0;
191 if (rb_find_range(data, md->mask, head,
192 old, &start, &end, backward))
193 return -1;
195 if (start == end)
196 return 0;
198 rec->samples++;
200 size = end - start;
201 if (size > (unsigned long)(md->mask) + 1) {
202 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
204 md->prev = head;
205 perf_mmap__consume(md, overwrite || backward);
206 return 0;
209 if ((start & md->mask) + size != (end & md->mask)) {
210 buf = &data[start & md->mask];
211 size = md->mask + 1 - (start & md->mask);
212 start += size;
214 if (record__write(rec, buf, size) < 0) {
215 rc = -1;
216 goto out;
220 buf = &data[start & md->mask];
221 size = end - start;
222 start += size;
224 if (record__write(rec, buf, size) < 0) {
225 rc = -1;
226 goto out;
229 md->prev = head;
230 perf_mmap__consume(md, overwrite || backward);
231 out:
232 return rc;
235 static volatile int done;
236 static volatile int signr = -1;
237 static volatile int child_finished;
239 static void sig_handler(int sig)
241 if (sig == SIGCHLD)
242 child_finished = 1;
243 else
244 signr = sig;
246 done = 1;
249 static void sigsegv_handler(int sig)
251 perf_hooks__recover();
252 sighandler_dump_stack(sig);
255 static void record__sig_exit(void)
257 if (signr == -1)
258 return;
260 signal(signr, SIG_DFL);
261 raise(signr);
264 #ifdef HAVE_AUXTRACE_SUPPORT
266 static int record__process_auxtrace(struct perf_tool *tool,
267 union perf_event *event, void *data1,
268 size_t len1, void *data2, size_t len2)
270 struct record *rec = container_of(tool, struct record, tool);
271 struct perf_data_file *file = &rec->file;
272 size_t padding;
273 u8 pad[8] = {0};
275 if (!perf_data_file__is_pipe(file)) {
276 off_t file_offset;
277 int fd = perf_data_file__fd(file);
278 int err;
280 file_offset = lseek(fd, 0, SEEK_CUR);
281 if (file_offset == -1)
282 return -1;
283 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
284 event, file_offset);
285 if (err)
286 return err;
289 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
290 padding = (len1 + len2) & 7;
291 if (padding)
292 padding = 8 - padding;
294 record__write(rec, event, event->header.size);
295 record__write(rec, data1, len1);
296 if (len2)
297 record__write(rec, data2, len2);
298 record__write(rec, &pad, padding);
300 return 0;
303 static int record__auxtrace_mmap_read(struct record *rec,
304 struct auxtrace_mmap *mm)
306 int ret;
308 ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
309 record__process_auxtrace);
310 if (ret < 0)
311 return ret;
313 if (ret)
314 rec->samples++;
316 return 0;
319 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
320 struct auxtrace_mmap *mm)
322 int ret;
324 ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
325 record__process_auxtrace,
326 rec->opts.auxtrace_snapshot_size);
327 if (ret < 0)
328 return ret;
330 if (ret)
331 rec->samples++;
333 return 0;
336 static int record__auxtrace_read_snapshot_all(struct record *rec)
338 int i;
339 int rc = 0;
341 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
342 struct auxtrace_mmap *mm =
343 &rec->evlist->mmap[i].auxtrace_mmap;
345 if (!mm->base)
346 continue;
348 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
349 rc = -1;
350 goto out;
353 out:
354 return rc;
357 static void record__read_auxtrace_snapshot(struct record *rec)
359 pr_debug("Recording AUX area tracing snapshot\n");
360 if (record__auxtrace_read_snapshot_all(rec) < 0) {
361 trigger_error(&auxtrace_snapshot_trigger);
362 } else {
363 if (auxtrace_record__snapshot_finish(rec->itr))
364 trigger_error(&auxtrace_snapshot_trigger);
365 else
366 trigger_ready(&auxtrace_snapshot_trigger);
370 #else
372 static inline
373 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
374 struct auxtrace_mmap *mm __maybe_unused)
376 return 0;
379 static inline
380 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
384 static inline
385 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
387 return 0;
390 #endif
392 static int record__mmap_evlist(struct record *rec,
393 struct perf_evlist *evlist)
395 struct record_opts *opts = &rec->opts;
396 char msg[512];
398 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
399 opts->auxtrace_mmap_pages,
400 opts->auxtrace_snapshot_mode) < 0) {
401 if (errno == EPERM) {
402 pr_err("Permission error mapping pages.\n"
403 "Consider increasing "
404 "/proc/sys/kernel/perf_event_mlock_kb,\n"
405 "or try again with a smaller value of -m/--mmap_pages.\n"
406 "(current value: %u,%u)\n",
407 opts->mmap_pages, opts->auxtrace_mmap_pages);
408 return -errno;
409 } else {
410 pr_err("failed to mmap with %d (%s)\n", errno,
411 str_error_r(errno, msg, sizeof(msg)));
412 if (errno)
413 return -errno;
414 else
415 return -EINVAL;
418 return 0;
421 static int record__mmap(struct record *rec)
423 return record__mmap_evlist(rec, rec->evlist);
426 static int record__open(struct record *rec)
428 char msg[BUFSIZ];
429 struct perf_evsel *pos;
430 struct perf_evlist *evlist = rec->evlist;
431 struct perf_session *session = rec->session;
432 struct record_opts *opts = &rec->opts;
433 struct perf_evsel_config_term *err_term;
434 int rc = 0;
436 perf_evlist__config(evlist, opts, &callchain_param);
438 evlist__for_each_entry(evlist, pos) {
439 try_again:
440 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
441 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
442 if (verbose > 0)
443 ui__warning("%s\n", msg);
444 goto try_again;
447 rc = -errno;
448 perf_evsel__open_strerror(pos, &opts->target,
449 errno, msg, sizeof(msg));
450 ui__error("%s\n", msg);
451 goto out;
455 if (perf_evlist__apply_filters(evlist, &pos)) {
456 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
457 pos->filter, perf_evsel__name(pos), errno,
458 str_error_r(errno, msg, sizeof(msg)));
459 rc = -1;
460 goto out;
463 if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
464 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
465 err_term->val.drv_cfg, perf_evsel__name(pos), errno,
466 str_error_r(errno, msg, sizeof(msg)));
467 rc = -1;
468 goto out;
471 rc = record__mmap(rec);
472 if (rc)
473 goto out;
475 session->evlist = evlist;
476 perf_session__set_id_hdr_size(session);
477 out:
478 return rc;
481 static int process_sample_event(struct perf_tool *tool,
482 union perf_event *event,
483 struct perf_sample *sample,
484 struct perf_evsel *evsel,
485 struct machine *machine)
487 struct record *rec = container_of(tool, struct record, tool);
489 rec->samples++;
491 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
494 static int process_buildids(struct record *rec)
496 struct perf_data_file *file = &rec->file;
497 struct perf_session *session = rec->session;
499 if (file->size == 0)
500 return 0;
503 * During this process, it'll load kernel map and replace the
504 * dso->long_name to a real pathname it found. In this case
505 * we prefer the vmlinux path like
506 * /lib/modules/3.16.4/build/vmlinux
508 * rather than build-id path (in debug directory).
509 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
511 symbol_conf.ignore_vmlinux_buildid = true;
514 * If --buildid-all is given, it marks all DSO regardless of hits,
515 * so no need to process samples.
517 if (rec->buildid_all)
518 rec->tool.sample = NULL;
520 return perf_session__process_events(session);
523 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
525 int err;
526 struct perf_tool *tool = data;
528 *As for guest kernel when processing subcommand record&report,
529 *we arrange module mmap prior to guest kernel mmap and trigger
530 *a preload dso because default guest module symbols are loaded
531 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
532 *method is used to avoid symbol missing when the first addr is
533 *in module instead of in guest kernel.
535 err = perf_event__synthesize_modules(tool, process_synthesized_event,
536 machine);
537 if (err < 0)
538 pr_err("Couldn't record guest kernel [%d]'s reference"
539 " relocation symbol.\n", machine->pid);
542 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
543 * have no _text sometimes.
545 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
546 machine);
547 if (err < 0)
548 pr_err("Couldn't record guest kernel [%d]'s reference"
549 " relocation symbol.\n", machine->pid);
552 static struct perf_event_header finished_round_event = {
553 .size = sizeof(struct perf_event_header),
554 .type = PERF_RECORD_FINISHED_ROUND,
557 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
558 bool backward)
560 u64 bytes_written = rec->bytes_written;
561 int i;
562 int rc = 0;
563 struct perf_mmap *maps;
565 if (!evlist)
566 return 0;
568 maps = backward ? evlist->backward_mmap : evlist->mmap;
569 if (!maps)
570 return 0;
572 if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
573 return 0;
575 for (i = 0; i < evlist->nr_mmaps; i++) {
576 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
578 if (maps[i].base) {
579 if (record__mmap_read(rec, &maps[i],
580 evlist->overwrite, backward) != 0) {
581 rc = -1;
582 goto out;
586 if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
587 record__auxtrace_mmap_read(rec, mm) != 0) {
588 rc = -1;
589 goto out;
594 * Mark the round finished in case we wrote
595 * at least one event.
597 if (bytes_written != rec->bytes_written)
598 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
600 if (backward)
601 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
602 out:
603 return rc;
606 static int record__mmap_read_all(struct record *rec)
608 int err;
610 err = record__mmap_read_evlist(rec, rec->evlist, false);
611 if (err)
612 return err;
614 return record__mmap_read_evlist(rec, rec->evlist, true);
617 static void record__init_features(struct record *rec)
619 struct perf_session *session = rec->session;
620 int feat;
622 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
623 perf_header__set_feat(&session->header, feat);
625 if (rec->no_buildid)
626 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
628 if (!have_tracepoints(&rec->evlist->entries))
629 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
631 if (!rec->opts.branch_stack)
632 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
634 if (!rec->opts.full_auxtrace)
635 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
637 perf_header__clear_feat(&session->header, HEADER_STAT);
640 static void
641 record__finish_output(struct record *rec)
643 struct perf_data_file *file = &rec->file;
644 int fd = perf_data_file__fd(file);
646 if (file->is_pipe)
647 return;
649 rec->session->header.data_size += rec->bytes_written;
650 file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
652 if (!rec->no_buildid) {
653 process_buildids(rec);
655 if (rec->buildid_all)
656 dsos__hit_all(rec->session);
658 perf_session__write_header(rec->session, rec->evlist, fd, true);
660 return;
663 static int record__synthesize_workload(struct record *rec, bool tail)
665 int err;
666 struct thread_map *thread_map;
668 if (rec->opts.tail_synthesize != tail)
669 return 0;
671 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
672 if (thread_map == NULL)
673 return -1;
675 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
676 process_synthesized_event,
677 &rec->session->machines.host,
678 rec->opts.sample_address,
679 rec->opts.proc_map_timeout);
680 thread_map__put(thread_map);
681 return err;
684 static int record__synthesize(struct record *rec, bool tail);
686 static int
687 record__switch_output(struct record *rec, bool at_exit)
689 struct perf_data_file *file = &rec->file;
690 int fd, err;
692 /* Same Size: "2015122520103046"*/
693 char timestamp[] = "InvalidTimestamp";
695 record__synthesize(rec, true);
696 if (target__none(&rec->opts.target))
697 record__synthesize_workload(rec, true);
699 rec->samples = 0;
700 record__finish_output(rec);
701 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
702 if (err) {
703 pr_err("Failed to get current timestamp\n");
704 return -EINVAL;
707 fd = perf_data_file__switch(file, timestamp,
708 rec->session->header.data_offset,
709 at_exit);
710 if (fd >= 0 && !at_exit) {
711 rec->bytes_written = 0;
712 rec->session->header.data_size = 0;
715 if (!quiet)
716 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
717 file->path, timestamp);
719 /* Output tracking events */
720 if (!at_exit) {
721 record__synthesize(rec, false);
724 * In 'perf record --switch-output' without -a,
725 * record__synthesize() in record__switch_output() won't
726 * generate tracking events because there's no thread_map
727 * in evlist. Which causes newly created perf.data doesn't
728 * contain map and comm information.
729 * Create a fake thread_map and directly call
730 * perf_event__synthesize_thread_map() for those events.
732 if (target__none(&rec->opts.target))
733 record__synthesize_workload(rec, false);
735 return fd;
738 static volatile int workload_exec_errno;
741 * perf_evlist__prepare_workload will send a SIGUSR1
742 * if the fork fails, since we asked by setting its
743 * want_signal to true.
745 static void workload_exec_failed_signal(int signo __maybe_unused,
746 siginfo_t *info,
747 void *ucontext __maybe_unused)
749 workload_exec_errno = info->si_value.sival_int;
750 done = 1;
751 child_finished = 1;
754 static void snapshot_sig_handler(int sig);
755 static void alarm_sig_handler(int sig);
757 int __weak
758 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
759 struct perf_tool *tool __maybe_unused,
760 perf_event__handler_t process __maybe_unused,
761 struct machine *machine __maybe_unused)
763 return 0;
766 static const struct perf_event_mmap_page *
767 perf_evlist__pick_pc(struct perf_evlist *evlist)
769 if (evlist) {
770 if (evlist->mmap && evlist->mmap[0].base)
771 return evlist->mmap[0].base;
772 if (evlist->backward_mmap && evlist->backward_mmap[0].base)
773 return evlist->backward_mmap[0].base;
775 return NULL;
778 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
780 const struct perf_event_mmap_page *pc;
782 pc = perf_evlist__pick_pc(rec->evlist);
783 if (pc)
784 return pc;
785 return NULL;
788 static int record__synthesize(struct record *rec, bool tail)
790 struct perf_session *session = rec->session;
791 struct machine *machine = &session->machines.host;
792 struct perf_data_file *file = &rec->file;
793 struct record_opts *opts = &rec->opts;
794 struct perf_tool *tool = &rec->tool;
795 int fd = perf_data_file__fd(file);
796 int err = 0;
798 if (rec->opts.tail_synthesize != tail)
799 return 0;
801 if (file->is_pipe) {
802 err = perf_event__synthesize_features(
803 tool, session, rec->evlist, process_synthesized_event);
804 if (err < 0) {
805 pr_err("Couldn't synthesize features.\n");
806 return err;
809 err = perf_event__synthesize_attrs(tool, session,
810 process_synthesized_event);
811 if (err < 0) {
812 pr_err("Couldn't synthesize attrs.\n");
813 goto out;
816 if (have_tracepoints(&rec->evlist->entries)) {
818 * FIXME err <= 0 here actually means that
819 * there were no tracepoints so its not really
820 * an error, just that we don't need to
821 * synthesize anything. We really have to
822 * return this more properly and also
823 * propagate errors that now are calling die()
825 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
826 process_synthesized_event);
827 if (err <= 0) {
828 pr_err("Couldn't record tracing data.\n");
829 goto out;
831 rec->bytes_written += err;
835 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
836 process_synthesized_event, machine);
837 if (err)
838 goto out;
840 if (rec->opts.full_auxtrace) {
841 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
842 session, process_synthesized_event);
843 if (err)
844 goto out;
847 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
848 machine);
849 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
850 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
851 "Check /proc/kallsyms permission or run as root.\n");
853 err = perf_event__synthesize_modules(tool, process_synthesized_event,
854 machine);
855 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
856 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
857 "Check /proc/modules permission or run as root.\n");
859 if (perf_guest) {
860 machines__process_guests(&session->machines,
861 perf_event__synthesize_guest_os, tool);
864 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
865 process_synthesized_event, opts->sample_address,
866 opts->proc_map_timeout);
867 out:
868 return err;
871 static int __cmd_record(struct record *rec, int argc, const char **argv)
873 int err;
874 int status = 0;
875 unsigned long waking = 0;
876 const bool forks = argc > 0;
877 struct machine *machine;
878 struct perf_tool *tool = &rec->tool;
879 struct record_opts *opts = &rec->opts;
880 struct perf_data_file *file = &rec->file;
881 struct perf_session *session;
882 bool disabled = false, draining = false;
883 int fd;
885 rec->progname = argv[0];
887 atexit(record__sig_exit);
888 signal(SIGCHLD, sig_handler);
889 signal(SIGINT, sig_handler);
890 signal(SIGTERM, sig_handler);
891 signal(SIGSEGV, sigsegv_handler);
893 if (rec->opts.record_namespaces)
894 tool->namespace_events = true;
896 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
897 signal(SIGUSR2, snapshot_sig_handler);
898 if (rec->opts.auxtrace_snapshot_mode)
899 trigger_on(&auxtrace_snapshot_trigger);
900 if (rec->switch_output.enabled)
901 trigger_on(&switch_output_trigger);
902 } else {
903 signal(SIGUSR2, SIG_IGN);
906 session = perf_session__new(file, false, tool);
907 if (session == NULL) {
908 pr_err("Perf session creation failed.\n");
909 return -1;
912 fd = perf_data_file__fd(file);
913 rec->session = session;
915 record__init_features(rec);
917 if (forks) {
918 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
919 argv, file->is_pipe,
920 workload_exec_failed_signal);
921 if (err < 0) {
922 pr_err("Couldn't run the workload!\n");
923 status = err;
924 goto out_delete_session;
928 if (record__open(rec) != 0) {
929 err = -1;
930 goto out_child;
933 err = bpf__apply_obj_config();
934 if (err) {
935 char errbuf[BUFSIZ];
937 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
938 pr_err("ERROR: Apply config to BPF failed: %s\n",
939 errbuf);
940 goto out_child;
944 * Normally perf_session__new would do this, but it doesn't have the
945 * evlist.
947 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
948 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
949 rec->tool.ordered_events = false;
952 if (!rec->evlist->nr_groups)
953 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
955 if (file->is_pipe) {
956 err = perf_header__write_pipe(fd);
957 if (err < 0)
958 goto out_child;
959 } else {
960 err = perf_session__write_header(session, rec->evlist, fd, false);
961 if (err < 0)
962 goto out_child;
965 if (!rec->no_buildid
966 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
967 pr_err("Couldn't generate buildids. "
968 "Use --no-buildid to profile anyway.\n");
969 err = -1;
970 goto out_child;
973 machine = &session->machines.host;
975 err = record__synthesize(rec, false);
976 if (err < 0)
977 goto out_child;
979 if (rec->realtime_prio) {
980 struct sched_param param;
982 param.sched_priority = rec->realtime_prio;
983 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
984 pr_err("Could not set realtime priority.\n");
985 err = -1;
986 goto out_child;
991 * When perf is starting the traced process, all the events
992 * (apart from group members) have enable_on_exec=1 set,
993 * so don't spoil it by prematurely enabling them.
995 if (!target__none(&opts->target) && !opts->initial_delay)
996 perf_evlist__enable(rec->evlist);
999 * Let the child rip
1001 if (forks) {
1002 union perf_event *event;
1003 pid_t tgid;
1005 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1006 if (event == NULL) {
1007 err = -ENOMEM;
1008 goto out_child;
1012 * Some H/W events are generated before COMM event
1013 * which is emitted during exec(), so perf script
1014 * cannot see a correct process name for those events.
1015 * Synthesize COMM event to prevent it.
1017 tgid = perf_event__synthesize_comm(tool, event,
1018 rec->evlist->workload.pid,
1019 process_synthesized_event,
1020 machine);
1021 free(event);
1023 if (tgid == -1)
1024 goto out_child;
1026 event = malloc(sizeof(event->namespaces) +
1027 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1028 machine->id_hdr_size);
1029 if (event == NULL) {
1030 err = -ENOMEM;
1031 goto out_child;
1035 * Synthesize NAMESPACES event for the command specified.
1037 perf_event__synthesize_namespaces(tool, event,
1038 rec->evlist->workload.pid,
1039 tgid, process_synthesized_event,
1040 machine);
1041 free(event);
1043 perf_evlist__start_workload(rec->evlist);
1046 if (opts->initial_delay) {
1047 usleep(opts->initial_delay * USEC_PER_MSEC);
1048 perf_evlist__enable(rec->evlist);
1051 trigger_ready(&auxtrace_snapshot_trigger);
1052 trigger_ready(&switch_output_trigger);
1053 perf_hooks__invoke_record_start();
1054 for (;;) {
1055 unsigned long long hits = rec->samples;
1058 * rec->evlist->bkw_mmap_state is possible to be
1059 * BKW_MMAP_EMPTY here: when done == true and
1060 * hits != rec->samples in previous round.
1062 * perf_evlist__toggle_bkw_mmap ensure we never
1063 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1065 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1066 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1068 if (record__mmap_read_all(rec) < 0) {
1069 trigger_error(&auxtrace_snapshot_trigger);
1070 trigger_error(&switch_output_trigger);
1071 err = -1;
1072 goto out_child;
1075 if (auxtrace_record__snapshot_started) {
1076 auxtrace_record__snapshot_started = 0;
1077 if (!trigger_is_error(&auxtrace_snapshot_trigger))
1078 record__read_auxtrace_snapshot(rec);
1079 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1080 pr_err("AUX area tracing snapshot failed\n");
1081 err = -1;
1082 goto out_child;
1086 if (trigger_is_hit(&switch_output_trigger)) {
1088 * If switch_output_trigger is hit, the data in
1089 * overwritable ring buffer should have been collected,
1090 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1092 * If SIGUSR2 raise after or during record__mmap_read_all(),
1093 * record__mmap_read_all() didn't collect data from
1094 * overwritable ring buffer. Read again.
1096 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1097 continue;
1098 trigger_ready(&switch_output_trigger);
1101 * Reenable events in overwrite ring buffer after
1102 * record__mmap_read_all(): we should have collected
1103 * data from it.
1105 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1107 if (!quiet)
1108 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1109 waking);
1110 waking = 0;
1111 fd = record__switch_output(rec, false);
1112 if (fd < 0) {
1113 pr_err("Failed to switch to new file\n");
1114 trigger_error(&switch_output_trigger);
1115 err = fd;
1116 goto out_child;
1119 /* re-arm the alarm */
1120 if (rec->switch_output.time)
1121 alarm(rec->switch_output.time);
1124 if (hits == rec->samples) {
1125 if (done || draining)
1126 break;
1127 err = perf_evlist__poll(rec->evlist, -1);
1129 * Propagate error, only if there's any. Ignore positive
1130 * number of returned events and interrupt error.
1132 if (err > 0 || (err < 0 && errno == EINTR))
1133 err = 0;
1134 waking++;
1136 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1137 draining = true;
1141 * When perf is starting the traced process, at the end events
1142 * die with the process and we wait for that. Thus no need to
1143 * disable events in this case.
1145 if (done && !disabled && !target__none(&opts->target)) {
1146 trigger_off(&auxtrace_snapshot_trigger);
1147 perf_evlist__disable(rec->evlist);
1148 disabled = true;
1151 trigger_off(&auxtrace_snapshot_trigger);
1152 trigger_off(&switch_output_trigger);
1154 if (forks && workload_exec_errno) {
1155 char msg[STRERR_BUFSIZE];
1156 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1157 pr_err("Workload failed: %s\n", emsg);
1158 err = -1;
1159 goto out_child;
1162 if (!quiet)
1163 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1165 if (target__none(&rec->opts.target))
1166 record__synthesize_workload(rec, true);
1168 out_child:
1169 if (forks) {
1170 int exit_status;
1172 if (!child_finished)
1173 kill(rec->evlist->workload.pid, SIGTERM);
1175 wait(&exit_status);
1177 if (err < 0)
1178 status = err;
1179 else if (WIFEXITED(exit_status))
1180 status = WEXITSTATUS(exit_status);
1181 else if (WIFSIGNALED(exit_status))
1182 signr = WTERMSIG(exit_status);
1183 } else
1184 status = err;
1186 record__synthesize(rec, true);
1187 /* this will be recalculated during process_buildids() */
1188 rec->samples = 0;
1190 if (!err) {
1191 if (!rec->timestamp_filename) {
1192 record__finish_output(rec);
1193 } else {
1194 fd = record__switch_output(rec, true);
1195 if (fd < 0) {
1196 status = fd;
1197 goto out_delete_session;
1202 perf_hooks__invoke_record_end();
1204 if (!err && !quiet) {
1205 char samples[128];
1206 const char *postfix = rec->timestamp_filename ?
1207 ".<timestamp>" : "";
1209 if (rec->samples && !rec->opts.full_auxtrace)
1210 scnprintf(samples, sizeof(samples),
1211 " (%" PRIu64 " samples)", rec->samples);
1212 else
1213 samples[0] = '\0';
1215 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1216 perf_data_file__size(file) / 1024.0 / 1024.0,
1217 file->path, postfix, samples);
1220 out_delete_session:
1221 perf_session__delete(session);
1222 return status;
1225 static void callchain_debug(struct callchain_param *callchain)
1227 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1229 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1231 if (callchain->record_mode == CALLCHAIN_DWARF)
1232 pr_debug("callchain: stack dump size %d\n",
1233 callchain->dump_size);
1236 int record_opts__parse_callchain(struct record_opts *record,
1237 struct callchain_param *callchain,
1238 const char *arg, bool unset)
1240 int ret;
1241 callchain->enabled = !unset;
1243 /* --no-call-graph */
1244 if (unset) {
1245 callchain->record_mode = CALLCHAIN_NONE;
1246 pr_debug("callchain: disabled\n");
1247 return 0;
1250 ret = parse_callchain_record_opt(arg, callchain);
1251 if (!ret) {
1252 /* Enable data address sampling for DWARF unwind. */
1253 if (callchain->record_mode == CALLCHAIN_DWARF)
1254 record->sample_address = true;
1255 callchain_debug(callchain);
1258 return ret;
1261 int record_parse_callchain_opt(const struct option *opt,
1262 const char *arg,
1263 int unset)
1265 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1268 int record_callchain_opt(const struct option *opt,
1269 const char *arg __maybe_unused,
1270 int unset __maybe_unused)
1272 struct callchain_param *callchain = opt->value;
1274 callchain->enabled = true;
1276 if (callchain->record_mode == CALLCHAIN_NONE)
1277 callchain->record_mode = CALLCHAIN_FP;
1279 callchain_debug(callchain);
1280 return 0;
1283 static int perf_record_config(const char *var, const char *value, void *cb)
1285 struct record *rec = cb;
1287 if (!strcmp(var, "record.build-id")) {
1288 if (!strcmp(value, "cache"))
1289 rec->no_buildid_cache = false;
1290 else if (!strcmp(value, "no-cache"))
1291 rec->no_buildid_cache = true;
1292 else if (!strcmp(value, "skip"))
1293 rec->no_buildid = true;
1294 else
1295 return -1;
1296 return 0;
1298 if (!strcmp(var, "record.call-graph"))
1299 var = "call-graph.record-mode"; /* fall-through */
1301 return perf_default_config(var, value, cb);
1304 struct clockid_map {
1305 const char *name;
1306 int clockid;
1309 #define CLOCKID_MAP(n, c) \
1310 { .name = n, .clockid = (c), }
1312 #define CLOCKID_END { .name = NULL, }
1316 * Add the missing ones, we need to build on many distros...
1318 #ifndef CLOCK_MONOTONIC_RAW
1319 #define CLOCK_MONOTONIC_RAW 4
1320 #endif
1321 #ifndef CLOCK_BOOTTIME
1322 #define CLOCK_BOOTTIME 7
1323 #endif
1324 #ifndef CLOCK_TAI
1325 #define CLOCK_TAI 11
1326 #endif
1328 static const struct clockid_map clockids[] = {
1329 /* available for all events, NMI safe */
1330 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1331 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1333 /* available for some events */
1334 CLOCKID_MAP("realtime", CLOCK_REALTIME),
1335 CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1336 CLOCKID_MAP("tai", CLOCK_TAI),
1338 /* available for the lazy */
1339 CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1340 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1341 CLOCKID_MAP("real", CLOCK_REALTIME),
1342 CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1344 CLOCKID_END,
1347 static int parse_clockid(const struct option *opt, const char *str, int unset)
1349 struct record_opts *opts = (struct record_opts *)opt->value;
1350 const struct clockid_map *cm;
1351 const char *ostr = str;
1353 if (unset) {
1354 opts->use_clockid = 0;
1355 return 0;
1358 /* no arg passed */
1359 if (!str)
1360 return 0;
1362 /* no setting it twice */
1363 if (opts->use_clockid)
1364 return -1;
1366 opts->use_clockid = true;
1368 /* if its a number, we're done */
1369 if (sscanf(str, "%d", &opts->clockid) == 1)
1370 return 0;
1372 /* allow a "CLOCK_" prefix to the name */
1373 if (!strncasecmp(str, "CLOCK_", 6))
1374 str += 6;
1376 for (cm = clockids; cm->name; cm++) {
1377 if (!strcasecmp(str, cm->name)) {
1378 opts->clockid = cm->clockid;
1379 return 0;
1383 opts->use_clockid = false;
1384 ui__warning("unknown clockid %s, check man page\n", ostr);
1385 return -1;
1388 static int record__parse_mmap_pages(const struct option *opt,
1389 const char *str,
1390 int unset __maybe_unused)
1392 struct record_opts *opts = opt->value;
1393 char *s, *p;
1394 unsigned int mmap_pages;
1395 int ret;
1397 if (!str)
1398 return -EINVAL;
1400 s = strdup(str);
1401 if (!s)
1402 return -ENOMEM;
1404 p = strchr(s, ',');
1405 if (p)
1406 *p = '\0';
1408 if (*s) {
1409 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1410 if (ret)
1411 goto out_free;
1412 opts->mmap_pages = mmap_pages;
1415 if (!p) {
1416 ret = 0;
1417 goto out_free;
1420 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1421 if (ret)
1422 goto out_free;
1424 opts->auxtrace_mmap_pages = mmap_pages;
1426 out_free:
1427 free(s);
1428 return ret;
1431 static void switch_output_size_warn(struct record *rec)
1433 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1434 struct switch_output *s = &rec->switch_output;
1436 wakeup_size /= 2;
1438 if (s->size < wakeup_size) {
1439 char buf[100];
1441 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1442 pr_warning("WARNING: switch-output data size lower than "
1443 "wakeup kernel buffer size (%s) "
1444 "expect bigger perf.data sizes\n", buf);
1448 static int switch_output_setup(struct record *rec)
1450 struct switch_output *s = &rec->switch_output;
1451 static struct parse_tag tags_size[] = {
1452 { .tag = 'B', .mult = 1 },
1453 { .tag = 'K', .mult = 1 << 10 },
1454 { .tag = 'M', .mult = 1 << 20 },
1455 { .tag = 'G', .mult = 1 << 30 },
1456 { .tag = 0 },
1458 static struct parse_tag tags_time[] = {
1459 { .tag = 's', .mult = 1 },
1460 { .tag = 'm', .mult = 60 },
1461 { .tag = 'h', .mult = 60*60 },
1462 { .tag = 'd', .mult = 60*60*24 },
1463 { .tag = 0 },
1465 unsigned long val;
1467 if (!s->set)
1468 return 0;
1470 if (!strcmp(s->str, "signal")) {
1471 s->signal = true;
1472 pr_debug("switch-output with SIGUSR2 signal\n");
1473 goto enabled;
1476 val = parse_tag_value(s->str, tags_size);
1477 if (val != (unsigned long) -1) {
1478 s->size = val;
1479 pr_debug("switch-output with %s size threshold\n", s->str);
1480 goto enabled;
1483 val = parse_tag_value(s->str, tags_time);
1484 if (val != (unsigned long) -1) {
1485 s->time = val;
1486 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1487 s->str, s->time);
1488 goto enabled;
1491 return -1;
1493 enabled:
1494 rec->timestamp_filename = true;
1495 s->enabled = true;
1497 if (s->size && !rec->opts.no_buffering)
1498 switch_output_size_warn(rec);
1500 return 0;
1503 static const char * const __record_usage[] = {
1504 "perf record [<options>] [<command>]",
1505 "perf record [<options>] -- <command> [<options>]",
1506 NULL
1508 const char * const *record_usage = __record_usage;
1511 * XXX Ideally would be local to cmd_record() and passed to a record__new
1512 * because we need to have access to it in record__exit, that is called
1513 * after cmd_record() exits, but since record_options need to be accessible to
1514 * builtin-script, leave it here.
1516 * At least we don't ouch it in all the other functions here directly.
1518 * Just say no to tons of global variables, sigh.
1520 static struct record record = {
1521 .opts = {
1522 .sample_time = true,
1523 .mmap_pages = UINT_MAX,
1524 .user_freq = UINT_MAX,
1525 .user_interval = ULLONG_MAX,
1526 .freq = 4000,
1527 .target = {
1528 .uses_mmap = true,
1529 .default_per_cpu = true,
1531 .proc_map_timeout = 500,
1533 .tool = {
1534 .sample = process_sample_event,
1535 .fork = perf_event__process_fork,
1536 .exit = perf_event__process_exit,
1537 .comm = perf_event__process_comm,
1538 .namespaces = perf_event__process_namespaces,
1539 .mmap = perf_event__process_mmap,
1540 .mmap2 = perf_event__process_mmap2,
1541 .ordered_events = true,
1545 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1546 "\n\t\t\t\tDefault: fp";
1548 static bool dry_run;
1551 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1552 * with it and switch to use the library functions in perf_evlist that came
1553 * from builtin-record.c, i.e. use record_opts,
1554 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1555 * using pipes, etc.
1557 static struct option __record_options[] = {
1558 OPT_CALLBACK('e', "event", &record.evlist, "event",
1559 "event selector. use 'perf list' to list available events",
1560 parse_events_option),
1561 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1562 "event filter", parse_filter),
1563 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1564 NULL, "don't record events from perf itself",
1565 exclude_perf),
1566 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1567 "record events on existing process id"),
1568 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1569 "record events on existing thread id"),
1570 OPT_INTEGER('r', "realtime", &record.realtime_prio,
1571 "collect data with this RT SCHED_FIFO priority"),
1572 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1573 "collect data without buffering"),
1574 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1575 "collect raw sample records from all opened counters"),
1576 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1577 "system-wide collection from all CPUs"),
1578 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1579 "list of cpus to monitor"),
1580 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1581 OPT_STRING('o', "output", &record.file.path, "file",
1582 "output file name"),
1583 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1584 &record.opts.no_inherit_set,
1585 "child tasks do not inherit counters"),
1586 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1587 "synthesize non-sample events at the end of output"),
1588 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1589 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1590 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1591 "number of mmap data pages and AUX area tracing mmap pages",
1592 record__parse_mmap_pages),
1593 OPT_BOOLEAN(0, "group", &record.opts.group,
1594 "put the counters into a counter group"),
1595 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1596 NULL, "enables call-graph recording" ,
1597 &record_callchain_opt),
1598 OPT_CALLBACK(0, "call-graph", &record.opts,
1599 "record_mode[,record_size]", record_callchain_help,
1600 &record_parse_callchain_opt),
1601 OPT_INCR('v', "verbose", &verbose,
1602 "be more verbose (show counter open errors, etc)"),
1603 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1604 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1605 "per thread counts"),
1606 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1607 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1608 "Record the sample physical addresses"),
1609 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1610 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1611 &record.opts.sample_time_set,
1612 "Record the sample timestamps"),
1613 OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1614 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1615 "don't sample"),
1616 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1617 &record.no_buildid_cache_set,
1618 "do not update the buildid cache"),
1619 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1620 &record.no_buildid_set,
1621 "do not collect buildids in perf.data"),
1622 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1623 "monitor event in cgroup name only",
1624 parse_cgroups),
1625 OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1626 "ms to wait before starting measurement after program start"),
1627 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1628 "user to profile"),
1630 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1631 "branch any", "sample any taken branches",
1632 parse_branch_stack),
1634 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1635 "branch filter mask", "branch stack filter modes",
1636 parse_branch_stack),
1637 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1638 "sample by weight (on special events only)"),
1639 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1640 "sample transaction flags (special events only)"),
1641 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1642 "use per-thread mmaps"),
1643 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1644 "sample selected machine registers on interrupt,"
1645 " use -I ? to list register names", parse_regs),
1646 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1647 "Record running/enabled time of read (:S) events"),
1648 OPT_CALLBACK('k', "clockid", &record.opts,
1649 "clockid", "clockid to use for events, see clock_gettime()",
1650 parse_clockid),
1651 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1652 "opts", "AUX area tracing Snapshot Mode", ""),
1653 OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1654 "per thread proc mmap processing timeout in ms"),
1655 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1656 "Record namespaces events"),
1657 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1658 "Record context switch events"),
1659 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1660 "Configure all used events to run in kernel space.",
1661 PARSE_OPT_EXCLUSIVE),
1662 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1663 "Configure all used events to run in user space.",
1664 PARSE_OPT_EXCLUSIVE),
1665 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1666 "clang binary to use for compiling BPF scriptlets"),
1667 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1668 "options passed to clang when compiling BPF scriptlets"),
1669 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1670 "file", "vmlinux pathname"),
1671 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1672 "Record build-id of all DSOs regardless of hits"),
1673 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1674 "append timestamp to output filename"),
1675 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1676 &record.switch_output.set, "signal,size,time",
1677 "Switch output when receive SIGUSR2 or cross size,time threshold",
1678 "signal"),
1679 OPT_BOOLEAN(0, "dry-run", &dry_run,
1680 "Parse options then exit"),
1681 OPT_END()
1684 struct option *record_options = __record_options;
1686 int cmd_record(int argc, const char **argv)
1688 int err;
1689 struct record *rec = &record;
1690 char errbuf[BUFSIZ];
1692 #ifndef HAVE_LIBBPF_SUPPORT
1693 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1694 set_nobuild('\0', "clang-path", true);
1695 set_nobuild('\0', "clang-opt", true);
1696 # undef set_nobuild
1697 #endif
1699 #ifndef HAVE_BPF_PROLOGUE
1700 # if !defined (HAVE_DWARF_SUPPORT)
1701 # define REASON "NO_DWARF=1"
1702 # elif !defined (HAVE_LIBBPF_SUPPORT)
1703 # define REASON "NO_LIBBPF=1"
1704 # else
1705 # define REASON "this architecture doesn't support BPF prologue"
1706 # endif
1707 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1708 set_nobuild('\0', "vmlinux", true);
1709 # undef set_nobuild
1710 # undef REASON
1711 #endif
1713 rec->evlist = perf_evlist__new();
1714 if (rec->evlist == NULL)
1715 return -ENOMEM;
1717 err = perf_config(perf_record_config, rec);
1718 if (err)
1719 return err;
1721 argc = parse_options(argc, argv, record_options, record_usage,
1722 PARSE_OPT_STOP_AT_NON_OPTION);
1723 if (quiet)
1724 perf_quiet_option();
1726 /* Make system wide (-a) the default target. */
1727 if (!argc && target__none(&rec->opts.target))
1728 rec->opts.target.system_wide = true;
1730 if (nr_cgroups && !rec->opts.target.system_wide) {
1731 usage_with_options_msg(record_usage, record_options,
1732 "cgroup monitoring only available in system-wide mode");
1735 if (rec->opts.record_switch_events &&
1736 !perf_can_record_switch_events()) {
1737 ui__error("kernel does not support recording context switch events\n");
1738 parse_options_usage(record_usage, record_options, "switch-events", 0);
1739 return -EINVAL;
1742 if (switch_output_setup(rec)) {
1743 parse_options_usage(record_usage, record_options, "switch-output", 0);
1744 return -EINVAL;
1747 if (rec->switch_output.time) {
1748 signal(SIGALRM, alarm_sig_handler);
1749 alarm(rec->switch_output.time);
1752 if (!rec->itr) {
1753 rec->itr = auxtrace_record__init(rec->evlist, &err);
1754 if (err)
1755 goto out;
1758 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1759 rec->opts.auxtrace_snapshot_opts);
1760 if (err)
1761 goto out;
1764 * Allow aliases to facilitate the lookup of symbols for address
1765 * filters. Refer to auxtrace_parse_filters().
1767 symbol_conf.allow_aliases = true;
1769 symbol__init(NULL);
1771 err = auxtrace_parse_filters(rec->evlist);
1772 if (err)
1773 goto out;
1775 if (dry_run)
1776 goto out;
1778 err = bpf__setup_stdout(rec->evlist);
1779 if (err) {
1780 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1781 pr_err("ERROR: Setup BPF stdout failed: %s\n",
1782 errbuf);
1783 goto out;
1786 err = -ENOMEM;
1788 if (symbol_conf.kptr_restrict)
1789 pr_warning(
1790 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1791 "check /proc/sys/kernel/kptr_restrict.\n\n"
1792 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1793 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1794 "Samples in kernel modules won't be resolved at all.\n\n"
1795 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1796 "even with a suitable vmlinux or kallsyms file.\n\n");
1798 if (rec->no_buildid_cache || rec->no_buildid) {
1799 disable_buildid_cache();
1800 } else if (rec->switch_output.enabled) {
1802 * In 'perf record --switch-output', disable buildid
1803 * generation by default to reduce data file switching
1804 * overhead. Still generate buildid if they are required
1805 * explicitly using
1807 * perf record --switch-output --no-no-buildid \
1808 * --no-no-buildid-cache
1810 * Following code equals to:
1812 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1813 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1814 * disable_buildid_cache();
1816 bool disable = true;
1818 if (rec->no_buildid_set && !rec->no_buildid)
1819 disable = false;
1820 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1821 disable = false;
1822 if (disable) {
1823 rec->no_buildid = true;
1824 rec->no_buildid_cache = true;
1825 disable_buildid_cache();
1829 if (record.opts.overwrite)
1830 record.opts.tail_synthesize = true;
1832 if (rec->evlist->nr_entries == 0 &&
1833 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
1834 pr_err("Not enough memory for event selector list\n");
1835 goto out;
1838 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1839 rec->opts.no_inherit = true;
1841 err = target__validate(&rec->opts.target);
1842 if (err) {
1843 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1844 ui__warning("%s", errbuf);
1847 err = target__parse_uid(&rec->opts.target);
1848 if (err) {
1849 int saved_errno = errno;
1851 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1852 ui__error("%s", errbuf);
1854 err = -saved_errno;
1855 goto out;
1858 /* Enable ignoring missing threads when -u option is defined. */
1859 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX;
1861 err = -ENOMEM;
1862 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1863 usage_with_options(record_usage, record_options);
1865 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1866 if (err)
1867 goto out;
1870 * We take all buildids when the file contains
1871 * AUX area tracing data because we do not decode the
1872 * trace because it would take too long.
1874 if (rec->opts.full_auxtrace)
1875 rec->buildid_all = true;
1877 if (record_opts__config(&rec->opts)) {
1878 err = -EINVAL;
1879 goto out;
1882 err = __cmd_record(&record, argc, argv);
1883 out:
1884 perf_evlist__delete(rec->evlist);
1885 symbol__exit();
1886 auxtrace_record__free(rec->itr);
1887 return err;
1890 static void snapshot_sig_handler(int sig __maybe_unused)
1892 struct record *rec = &record;
1894 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1895 trigger_hit(&auxtrace_snapshot_trigger);
1896 auxtrace_record__snapshot_started = 1;
1897 if (auxtrace_record__snapshot_start(record.itr))
1898 trigger_error(&auxtrace_snapshot_trigger);
1901 if (switch_output_signal(rec))
1902 trigger_hit(&switch_output_trigger);
1905 static void alarm_sig_handler(int sig __maybe_unused)
1907 struct record *rec = &record;
1909 if (switch_output_time(rec))
1910 trigger_hit(&switch_output_trigger);