clk: samsung: Add bus clock for GPU/G3D on Exynos4412
[linux/fpc-iii.git] / tools / perf / builtin-record.c
blobe2c3a585a61eb6acc48f55dd8c6ab2757a1216ff
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * builtin-record.c
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
8 */
9 #include "builtin.h"
11 #include "perf.h"
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "util/cpu-set-sched.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "util/bpf-event.h"
45 #include "asm/bug.h"
47 #include <errno.h>
48 #include <inttypes.h>
49 #include <locale.h>
50 #include <poll.h>
51 #include <unistd.h>
52 #include <sched.h>
53 #include <signal.h>
54 #include <sys/mman.h>
55 #include <sys/wait.h>
56 #include <linux/time64.h>
58 struct switch_output {
59 bool enabled;
60 bool signal;
61 unsigned long size;
62 unsigned long time;
63 const char *str;
64 bool set;
65 char **filenames;
66 int num_files;
67 int cur_file;
70 struct record {
71 struct perf_tool tool;
72 struct record_opts opts;
73 u64 bytes_written;
74 struct perf_data data;
75 struct auxtrace_record *itr;
76 struct perf_evlist *evlist;
77 struct perf_session *session;
78 int realtime_prio;
79 bool no_buildid;
80 bool no_buildid_set;
81 bool no_buildid_cache;
82 bool no_buildid_cache_set;
83 bool buildid_all;
84 bool timestamp_filename;
85 bool timestamp_boundary;
86 struct switch_output switch_output;
87 unsigned long long samples;
88 cpu_set_t affinity_mask;
91 static volatile int auxtrace_record__snapshot_started;
92 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
93 static DEFINE_TRIGGER(switch_output_trigger);
95 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
96 "SYS", "NODE", "CPU"
99 static bool switch_output_signal(struct record *rec)
101 return rec->switch_output.signal &&
102 trigger_is_ready(&switch_output_trigger);
105 static bool switch_output_size(struct record *rec)
107 return rec->switch_output.size &&
108 trigger_is_ready(&switch_output_trigger) &&
109 (rec->bytes_written >= rec->switch_output.size);
112 static bool switch_output_time(struct record *rec)
114 return rec->switch_output.time &&
115 trigger_is_ready(&switch_output_trigger);
118 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
119 void *bf, size_t size)
121 struct perf_data_file *file = &rec->session->data->file;
123 if (perf_data_file__write(file, bf, size) < 0) {
124 pr_err("failed to write perf data, error: %m\n");
125 return -1;
128 rec->bytes_written += size;
130 if (switch_output_size(rec))
131 trigger_hit(&switch_output_trigger);
133 return 0;
136 static int record__aio_enabled(struct record *rec);
137 static int record__comp_enabled(struct record *rec);
138 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
139 void *src, size_t src_size);
141 #ifdef HAVE_AIO_SUPPORT
142 static int record__aio_write(struct aiocb *cblock, int trace_fd,
143 void *buf, size_t size, off_t off)
145 int rc;
147 cblock->aio_fildes = trace_fd;
148 cblock->aio_buf = buf;
149 cblock->aio_nbytes = size;
150 cblock->aio_offset = off;
151 cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
153 do {
154 rc = aio_write(cblock);
155 if (rc == 0) {
156 break;
157 } else if (errno != EAGAIN) {
158 cblock->aio_fildes = -1;
159 pr_err("failed to queue perf data, error: %m\n");
160 break;
162 } while (1);
164 return rc;
167 static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
169 void *rem_buf;
170 off_t rem_off;
171 size_t rem_size;
172 int rc, aio_errno;
173 ssize_t aio_ret, written;
175 aio_errno = aio_error(cblock);
176 if (aio_errno == EINPROGRESS)
177 return 0;
179 written = aio_ret = aio_return(cblock);
180 if (aio_ret < 0) {
181 if (aio_errno != EINTR)
182 pr_err("failed to write perf data, error: %m\n");
183 written = 0;
186 rem_size = cblock->aio_nbytes - written;
188 if (rem_size == 0) {
189 cblock->aio_fildes = -1;
191 * md->refcount is incremented in record__aio_pushfn() for
192 * every aio write request started in record__aio_push() so
193 * decrement it because the request is now complete.
195 perf_mmap__put(md);
196 rc = 1;
197 } else {
199 * aio write request may require restart with the
200 * reminder if the kernel didn't write whole
201 * chunk at once.
203 rem_off = cblock->aio_offset + written;
204 rem_buf = (void *)(cblock->aio_buf + written);
205 record__aio_write(cblock, cblock->aio_fildes,
206 rem_buf, rem_size, rem_off);
207 rc = 0;
210 return rc;
213 static int record__aio_sync(struct perf_mmap *md, bool sync_all)
215 struct aiocb **aiocb = md->aio.aiocb;
216 struct aiocb *cblocks = md->aio.cblocks;
217 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
218 int i, do_suspend;
220 do {
221 do_suspend = 0;
222 for (i = 0; i < md->aio.nr_cblocks; ++i) {
223 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
224 if (sync_all)
225 aiocb[i] = NULL;
226 else
227 return i;
228 } else {
230 * Started aio write is not complete yet
231 * so it has to be waited before the
232 * next allocation.
234 aiocb[i] = &cblocks[i];
235 do_suspend = 1;
238 if (!do_suspend)
239 return -1;
241 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
242 if (!(errno == EAGAIN || errno == EINTR))
243 pr_err("failed to sync perf data, error: %m\n");
245 } while (1);
248 struct record_aio {
249 struct record *rec;
250 void *data;
251 size_t size;
254 static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t size)
256 struct record_aio *aio = to;
259 * map->base data pointed by buf is copied into free map->aio.data[] buffer
260 * to release space in the kernel buffer as fast as possible, calling
261 * perf_mmap__consume() from perf_mmap__push() function.
263 * That lets the kernel to proceed with storing more profiling data into
264 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
266 * Coping can be done in two steps in case the chunk of profiling data
267 * crosses the upper bound of the kernel buffer. In this case we first move
268 * part of data from map->start till the upper bound and then the reminder
269 * from the beginning of the kernel buffer till the end of the data chunk.
272 if (record__comp_enabled(aio->rec)) {
273 size = zstd_compress(aio->rec->session, aio->data + aio->size,
274 perf_mmap__mmap_len(map) - aio->size,
275 buf, size);
276 } else {
277 memcpy(aio->data + aio->size, buf, size);
280 if (!aio->size) {
282 * Increment map->refcount to guard map->aio.data[] buffer
283 * from premature deallocation because map object can be
284 * released earlier than aio write request started on
285 * map->aio.data[] buffer is complete.
287 * perf_mmap__put() is done at record__aio_complete()
288 * after started aio request completion or at record__aio_push()
289 * if the request failed to start.
291 perf_mmap__get(map);
294 aio->size += size;
296 return size;
299 static int record__aio_push(struct record *rec, struct perf_mmap *map, off_t *off)
301 int ret, idx;
302 int trace_fd = rec->session->data->file.fd;
303 struct record_aio aio = { .rec = rec, .size = 0 };
306 * Call record__aio_sync() to wait till map->aio.data[] buffer
307 * becomes available after previous aio write operation.
310 idx = record__aio_sync(map, false);
311 aio.data = map->aio.data[idx];
312 ret = perf_mmap__push(map, &aio, record__aio_pushfn);
313 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
314 return ret;
316 rec->samples++;
317 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
318 if (!ret) {
319 *off += aio.size;
320 rec->bytes_written += aio.size;
321 if (switch_output_size(rec))
322 trigger_hit(&switch_output_trigger);
323 } else {
325 * Decrement map->refcount incremented in record__aio_pushfn()
326 * back if record__aio_write() operation failed to start, otherwise
327 * map->refcount is decremented in record__aio_complete() after
328 * aio write operation finishes successfully.
330 perf_mmap__put(map);
333 return ret;
336 static off_t record__aio_get_pos(int trace_fd)
338 return lseek(trace_fd, 0, SEEK_CUR);
341 static void record__aio_set_pos(int trace_fd, off_t pos)
343 lseek(trace_fd, pos, SEEK_SET);
346 static void record__aio_mmap_read_sync(struct record *rec)
348 int i;
349 struct perf_evlist *evlist = rec->evlist;
350 struct perf_mmap *maps = evlist->mmap;
352 if (!record__aio_enabled(rec))
353 return;
355 for (i = 0; i < evlist->nr_mmaps; i++) {
356 struct perf_mmap *map = &maps[i];
358 if (map->base)
359 record__aio_sync(map, true);
363 static int nr_cblocks_default = 1;
364 static int nr_cblocks_max = 4;
366 static int record__aio_parse(const struct option *opt,
367 const char *str,
368 int unset)
370 struct record_opts *opts = (struct record_opts *)opt->value;
372 if (unset) {
373 opts->nr_cblocks = 0;
374 } else {
375 if (str)
376 opts->nr_cblocks = strtol(str, NULL, 0);
377 if (!opts->nr_cblocks)
378 opts->nr_cblocks = nr_cblocks_default;
381 return 0;
383 #else /* HAVE_AIO_SUPPORT */
384 static int nr_cblocks_max = 0;
386 static int record__aio_push(struct record *rec __maybe_unused, struct perf_mmap *map __maybe_unused,
387 off_t *off __maybe_unused)
389 return -1;
392 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
394 return -1;
397 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
401 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
404 #endif
406 static int record__aio_enabled(struct record *rec)
408 return rec->opts.nr_cblocks > 0;
411 #define MMAP_FLUSH_DEFAULT 1
412 static int record__mmap_flush_parse(const struct option *opt,
413 const char *str,
414 int unset)
416 int flush_max;
417 struct record_opts *opts = (struct record_opts *)opt->value;
418 static struct parse_tag tags[] = {
419 { .tag = 'B', .mult = 1 },
420 { .tag = 'K', .mult = 1 << 10 },
421 { .tag = 'M', .mult = 1 << 20 },
422 { .tag = 'G', .mult = 1 << 30 },
423 { .tag = 0 },
426 if (unset)
427 return 0;
429 if (str) {
430 opts->mmap_flush = parse_tag_value(str, tags);
431 if (opts->mmap_flush == (int)-1)
432 opts->mmap_flush = strtol(str, NULL, 0);
435 if (!opts->mmap_flush)
436 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
438 flush_max = perf_evlist__mmap_size(opts->mmap_pages);
439 flush_max /= 4;
440 if (opts->mmap_flush > flush_max)
441 opts->mmap_flush = flush_max;
443 return 0;
446 #ifdef HAVE_ZSTD_SUPPORT
447 static unsigned int comp_level_default = 1;
449 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
451 struct record_opts *opts = opt->value;
453 if (unset) {
454 opts->comp_level = 0;
455 } else {
456 if (str)
457 opts->comp_level = strtol(str, NULL, 0);
458 if (!opts->comp_level)
459 opts->comp_level = comp_level_default;
462 return 0;
464 #endif
465 static unsigned int comp_level_max = 22;
467 static int record__comp_enabled(struct record *rec)
469 return rec->opts.comp_level > 0;
472 static int process_synthesized_event(struct perf_tool *tool,
473 union perf_event *event,
474 struct perf_sample *sample __maybe_unused,
475 struct machine *machine __maybe_unused)
477 struct record *rec = container_of(tool, struct record, tool);
478 return record__write(rec, NULL, event, event->header.size);
481 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
483 struct record *rec = to;
485 if (record__comp_enabled(rec)) {
486 size = zstd_compress(rec->session, map->data, perf_mmap__mmap_len(map), bf, size);
487 bf = map->data;
490 rec->samples++;
491 return record__write(rec, map, bf, size);
494 static volatile int done;
495 static volatile int signr = -1;
496 static volatile int child_finished;
498 static void sig_handler(int sig)
500 if (sig == SIGCHLD)
501 child_finished = 1;
502 else
503 signr = sig;
505 done = 1;
508 static void sigsegv_handler(int sig)
510 perf_hooks__recover();
511 sighandler_dump_stack(sig);
514 static void record__sig_exit(void)
516 if (signr == -1)
517 return;
519 signal(signr, SIG_DFL);
520 raise(signr);
523 #ifdef HAVE_AUXTRACE_SUPPORT
525 static int record__process_auxtrace(struct perf_tool *tool,
526 struct perf_mmap *map,
527 union perf_event *event, void *data1,
528 size_t len1, void *data2, size_t len2)
530 struct record *rec = container_of(tool, struct record, tool);
531 struct perf_data *data = &rec->data;
532 size_t padding;
533 u8 pad[8] = {0};
535 if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) {
536 off_t file_offset;
537 int fd = perf_data__fd(data);
538 int err;
540 file_offset = lseek(fd, 0, SEEK_CUR);
541 if (file_offset == -1)
542 return -1;
543 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
544 event, file_offset);
545 if (err)
546 return err;
549 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
550 padding = (len1 + len2) & 7;
551 if (padding)
552 padding = 8 - padding;
554 record__write(rec, map, event, event->header.size);
555 record__write(rec, map, data1, len1);
556 if (len2)
557 record__write(rec, map, data2, len2);
558 record__write(rec, map, &pad, padding);
560 return 0;
563 static int record__auxtrace_mmap_read(struct record *rec,
564 struct perf_mmap *map)
566 int ret;
568 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
569 record__process_auxtrace);
570 if (ret < 0)
571 return ret;
573 if (ret)
574 rec->samples++;
576 return 0;
579 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
580 struct perf_mmap *map)
582 int ret;
584 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
585 record__process_auxtrace,
586 rec->opts.auxtrace_snapshot_size);
587 if (ret < 0)
588 return ret;
590 if (ret)
591 rec->samples++;
593 return 0;
596 static int record__auxtrace_read_snapshot_all(struct record *rec)
598 int i;
599 int rc = 0;
601 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
602 struct perf_mmap *map = &rec->evlist->mmap[i];
604 if (!map->auxtrace_mmap.base)
605 continue;
607 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
608 rc = -1;
609 goto out;
612 out:
613 return rc;
616 static void record__read_auxtrace_snapshot(struct record *rec)
618 pr_debug("Recording AUX area tracing snapshot\n");
619 if (record__auxtrace_read_snapshot_all(rec) < 0) {
620 trigger_error(&auxtrace_snapshot_trigger);
621 } else {
622 if (auxtrace_record__snapshot_finish(rec->itr))
623 trigger_error(&auxtrace_snapshot_trigger);
624 else
625 trigger_ready(&auxtrace_snapshot_trigger);
629 static int record__auxtrace_init(struct record *rec)
631 int err;
633 if (!rec->itr) {
634 rec->itr = auxtrace_record__init(rec->evlist, &err);
635 if (err)
636 return err;
639 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
640 rec->opts.auxtrace_snapshot_opts);
641 if (err)
642 return err;
644 return auxtrace_parse_filters(rec->evlist);
647 #else
649 static inline
650 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
651 struct perf_mmap *map __maybe_unused)
653 return 0;
656 static inline
657 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
661 static inline
662 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
664 return 0;
667 static int record__auxtrace_init(struct record *rec __maybe_unused)
669 return 0;
672 #endif
674 static int record__mmap_evlist(struct record *rec,
675 struct perf_evlist *evlist)
677 struct record_opts *opts = &rec->opts;
678 char msg[512];
680 if (opts->affinity != PERF_AFFINITY_SYS)
681 cpu__setup_cpunode_map();
683 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
684 opts->auxtrace_mmap_pages,
685 opts->auxtrace_snapshot_mode,
686 opts->nr_cblocks, opts->affinity,
687 opts->mmap_flush, opts->comp_level) < 0) {
688 if (errno == EPERM) {
689 pr_err("Permission error mapping pages.\n"
690 "Consider increasing "
691 "/proc/sys/kernel/perf_event_mlock_kb,\n"
692 "or try again with a smaller value of -m/--mmap_pages.\n"
693 "(current value: %u,%u)\n",
694 opts->mmap_pages, opts->auxtrace_mmap_pages);
695 return -errno;
696 } else {
697 pr_err("failed to mmap with %d (%s)\n", errno,
698 str_error_r(errno, msg, sizeof(msg)));
699 if (errno)
700 return -errno;
701 else
702 return -EINVAL;
705 return 0;
708 static int record__mmap(struct record *rec)
710 return record__mmap_evlist(rec, rec->evlist);
713 static int record__open(struct record *rec)
715 char msg[BUFSIZ];
716 struct perf_evsel *pos;
717 struct perf_evlist *evlist = rec->evlist;
718 struct perf_session *session = rec->session;
719 struct record_opts *opts = &rec->opts;
720 int rc = 0;
723 * For initial_delay we need to add a dummy event so that we can track
724 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
725 * real events, the ones asked by the user.
727 if (opts->initial_delay) {
728 if (perf_evlist__add_dummy(evlist))
729 return -ENOMEM;
731 pos = perf_evlist__first(evlist);
732 pos->tracking = 0;
733 pos = perf_evlist__last(evlist);
734 pos->tracking = 1;
735 pos->attr.enable_on_exec = 1;
738 perf_evlist__config(evlist, opts, &callchain_param);
740 evlist__for_each_entry(evlist, pos) {
741 try_again:
742 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
743 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
744 if (verbose > 0)
745 ui__warning("%s\n", msg);
746 goto try_again;
748 if ((errno == EINVAL || errno == EBADF) &&
749 pos->leader != pos &&
750 pos->weak_group) {
751 pos = perf_evlist__reset_weak_group(evlist, pos);
752 goto try_again;
754 rc = -errno;
755 perf_evsel__open_strerror(pos, &opts->target,
756 errno, msg, sizeof(msg));
757 ui__error("%s\n", msg);
758 goto out;
761 pos->supported = true;
764 if (perf_evlist__apply_filters(evlist, &pos)) {
765 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
766 pos->filter, perf_evsel__name(pos), errno,
767 str_error_r(errno, msg, sizeof(msg)));
768 rc = -1;
769 goto out;
772 rc = record__mmap(rec);
773 if (rc)
774 goto out;
776 session->evlist = evlist;
777 perf_session__set_id_hdr_size(session);
778 out:
779 return rc;
782 static int process_sample_event(struct perf_tool *tool,
783 union perf_event *event,
784 struct perf_sample *sample,
785 struct perf_evsel *evsel,
786 struct machine *machine)
788 struct record *rec = container_of(tool, struct record, tool);
790 if (rec->evlist->first_sample_time == 0)
791 rec->evlist->first_sample_time = sample->time;
793 rec->evlist->last_sample_time = sample->time;
795 if (rec->buildid_all)
796 return 0;
798 rec->samples++;
799 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
802 static int process_buildids(struct record *rec)
804 struct perf_session *session = rec->session;
806 if (perf_data__size(&rec->data) == 0)
807 return 0;
810 * During this process, it'll load kernel map and replace the
811 * dso->long_name to a real pathname it found. In this case
812 * we prefer the vmlinux path like
813 * /lib/modules/3.16.4/build/vmlinux
815 * rather than build-id path (in debug directory).
816 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
818 symbol_conf.ignore_vmlinux_buildid = true;
821 * If --buildid-all is given, it marks all DSO regardless of hits,
822 * so no need to process samples. But if timestamp_boundary is enabled,
823 * it still needs to walk on all samples to get the timestamps of
824 * first/last samples.
826 if (rec->buildid_all && !rec->timestamp_boundary)
827 rec->tool.sample = NULL;
829 return perf_session__process_events(session);
832 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
834 int err;
835 struct perf_tool *tool = data;
837 *As for guest kernel when processing subcommand record&report,
838 *we arrange module mmap prior to guest kernel mmap and trigger
839 *a preload dso because default guest module symbols are loaded
840 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
841 *method is used to avoid symbol missing when the first addr is
842 *in module instead of in guest kernel.
844 err = perf_event__synthesize_modules(tool, process_synthesized_event,
845 machine);
846 if (err < 0)
847 pr_err("Couldn't record guest kernel [%d]'s reference"
848 " relocation symbol.\n", machine->pid);
851 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
852 * have no _text sometimes.
854 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
855 machine);
856 if (err < 0)
857 pr_err("Couldn't record guest kernel [%d]'s reference"
858 " relocation symbol.\n", machine->pid);
861 static struct perf_event_header finished_round_event = {
862 .size = sizeof(struct perf_event_header),
863 .type = PERF_RECORD_FINISHED_ROUND,
866 static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
868 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
869 !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
870 CPU_ZERO(&rec->affinity_mask);
871 CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask);
872 sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask);
876 static size_t process_comp_header(void *record, size_t increment)
878 struct compressed_event *event = record;
879 size_t size = sizeof(*event);
881 if (increment) {
882 event->header.size += increment;
883 return increment;
886 event->header.type = PERF_RECORD_COMPRESSED;
887 event->header.size = size;
889 return size;
892 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
893 void *src, size_t src_size)
895 size_t compressed;
896 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct compressed_event) - 1;
898 compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size,
899 max_record_size, process_comp_header);
901 session->bytes_transferred += src_size;
902 session->bytes_compressed += compressed;
904 return compressed;
907 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
908 bool overwrite, bool synch)
910 u64 bytes_written = rec->bytes_written;
911 int i;
912 int rc = 0;
913 struct perf_mmap *maps;
914 int trace_fd = rec->data.file.fd;
915 off_t off = 0;
917 if (!evlist)
918 return 0;
920 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
921 if (!maps)
922 return 0;
924 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
925 return 0;
927 if (record__aio_enabled(rec))
928 off = record__aio_get_pos(trace_fd);
930 for (i = 0; i < evlist->nr_mmaps; i++) {
931 u64 flush = 0;
932 struct perf_mmap *map = &maps[i];
934 if (map->base) {
935 record__adjust_affinity(rec, map);
936 if (synch) {
937 flush = map->flush;
938 map->flush = 1;
940 if (!record__aio_enabled(rec)) {
941 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
942 if (synch)
943 map->flush = flush;
944 rc = -1;
945 goto out;
947 } else {
948 if (record__aio_push(rec, map, &off) < 0) {
949 record__aio_set_pos(trace_fd, off);
950 if (synch)
951 map->flush = flush;
952 rc = -1;
953 goto out;
956 if (synch)
957 map->flush = flush;
960 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
961 record__auxtrace_mmap_read(rec, map) != 0) {
962 rc = -1;
963 goto out;
967 if (record__aio_enabled(rec))
968 record__aio_set_pos(trace_fd, off);
971 * Mark the round finished in case we wrote
972 * at least one event.
974 if (bytes_written != rec->bytes_written)
975 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
977 if (overwrite)
978 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
979 out:
980 return rc;
983 static int record__mmap_read_all(struct record *rec, bool synch)
985 int err;
987 err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
988 if (err)
989 return err;
991 return record__mmap_read_evlist(rec, rec->evlist, true, synch);
994 static void record__init_features(struct record *rec)
996 struct perf_session *session = rec->session;
997 int feat;
999 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1000 perf_header__set_feat(&session->header, feat);
1002 if (rec->no_buildid)
1003 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1005 if (!have_tracepoints(&rec->evlist->entries))
1006 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1008 if (!rec->opts.branch_stack)
1009 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1011 if (!rec->opts.full_auxtrace)
1012 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1014 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1015 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1017 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1018 if (!record__comp_enabled(rec))
1019 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1021 perf_header__clear_feat(&session->header, HEADER_STAT);
1024 static void
1025 record__finish_output(struct record *rec)
1027 struct perf_data *data = &rec->data;
1028 int fd = perf_data__fd(data);
1030 if (data->is_pipe)
1031 return;
1033 rec->session->header.data_size += rec->bytes_written;
1034 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1036 if (!rec->no_buildid) {
1037 process_buildids(rec);
1039 if (rec->buildid_all)
1040 dsos__hit_all(rec->session);
1042 perf_session__write_header(rec->session, rec->evlist, fd, true);
1044 return;
1047 static int record__synthesize_workload(struct record *rec, bool tail)
1049 int err;
1050 struct thread_map *thread_map;
1052 if (rec->opts.tail_synthesize != tail)
1053 return 0;
1055 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1056 if (thread_map == NULL)
1057 return -1;
1059 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1060 process_synthesized_event,
1061 &rec->session->machines.host,
1062 rec->opts.sample_address);
1063 thread_map__put(thread_map);
1064 return err;
1067 static int record__synthesize(struct record *rec, bool tail);
1069 static int
1070 record__switch_output(struct record *rec, bool at_exit)
1072 struct perf_data *data = &rec->data;
1073 int fd, err;
1074 char *new_filename;
1076 /* Same Size: "2015122520103046"*/
1077 char timestamp[] = "InvalidTimestamp";
1079 record__aio_mmap_read_sync(rec);
1081 record__synthesize(rec, true);
1082 if (target__none(&rec->opts.target))
1083 record__synthesize_workload(rec, true);
1085 rec->samples = 0;
1086 record__finish_output(rec);
1087 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1088 if (err) {
1089 pr_err("Failed to get current timestamp\n");
1090 return -EINVAL;
1093 fd = perf_data__switch(data, timestamp,
1094 rec->session->header.data_offset,
1095 at_exit, &new_filename);
1096 if (fd >= 0 && !at_exit) {
1097 rec->bytes_written = 0;
1098 rec->session->header.data_size = 0;
1101 if (!quiet)
1102 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1103 data->path, timestamp);
1105 if (rec->switch_output.num_files) {
1106 int n = rec->switch_output.cur_file + 1;
1108 if (n >= rec->switch_output.num_files)
1109 n = 0;
1110 rec->switch_output.cur_file = n;
1111 if (rec->switch_output.filenames[n]) {
1112 remove(rec->switch_output.filenames[n]);
1113 free(rec->switch_output.filenames[n]);
1115 rec->switch_output.filenames[n] = new_filename;
1116 } else {
1117 free(new_filename);
1120 /* Output tracking events */
1121 if (!at_exit) {
1122 record__synthesize(rec, false);
1125 * In 'perf record --switch-output' without -a,
1126 * record__synthesize() in record__switch_output() won't
1127 * generate tracking events because there's no thread_map
1128 * in evlist. Which causes newly created perf.data doesn't
1129 * contain map and comm information.
1130 * Create a fake thread_map and directly call
1131 * perf_event__synthesize_thread_map() for those events.
1133 if (target__none(&rec->opts.target))
1134 record__synthesize_workload(rec, false);
1136 return fd;
1139 static volatile int workload_exec_errno;
1142 * perf_evlist__prepare_workload will send a SIGUSR1
1143 * if the fork fails, since we asked by setting its
1144 * want_signal to true.
1146 static void workload_exec_failed_signal(int signo __maybe_unused,
1147 siginfo_t *info,
1148 void *ucontext __maybe_unused)
1150 workload_exec_errno = info->si_value.sival_int;
1151 done = 1;
1152 child_finished = 1;
1155 static void snapshot_sig_handler(int sig);
1156 static void alarm_sig_handler(int sig);
1158 int __weak
1159 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
1160 struct perf_tool *tool __maybe_unused,
1161 perf_event__handler_t process __maybe_unused,
1162 struct machine *machine __maybe_unused)
1164 return 0;
1167 static const struct perf_event_mmap_page *
1168 perf_evlist__pick_pc(struct perf_evlist *evlist)
1170 if (evlist) {
1171 if (evlist->mmap && evlist->mmap[0].base)
1172 return evlist->mmap[0].base;
1173 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
1174 return evlist->overwrite_mmap[0].base;
1176 return NULL;
1179 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1181 const struct perf_event_mmap_page *pc;
1183 pc = perf_evlist__pick_pc(rec->evlist);
1184 if (pc)
1185 return pc;
1186 return NULL;
1189 static int record__synthesize(struct record *rec, bool tail)
1191 struct perf_session *session = rec->session;
1192 struct machine *machine = &session->machines.host;
1193 struct perf_data *data = &rec->data;
1194 struct record_opts *opts = &rec->opts;
1195 struct perf_tool *tool = &rec->tool;
1196 int fd = perf_data__fd(data);
1197 int err = 0;
1199 if (rec->opts.tail_synthesize != tail)
1200 return 0;
1202 if (data->is_pipe) {
1204 * We need to synthesize events first, because some
1205 * features works on top of them (on report side).
1207 err = perf_event__synthesize_attrs(tool, rec->evlist,
1208 process_synthesized_event);
1209 if (err < 0) {
1210 pr_err("Couldn't synthesize attrs.\n");
1211 goto out;
1214 err = perf_event__synthesize_features(tool, session, rec->evlist,
1215 process_synthesized_event);
1216 if (err < 0) {
1217 pr_err("Couldn't synthesize features.\n");
1218 return err;
1221 if (have_tracepoints(&rec->evlist->entries)) {
1223 * FIXME err <= 0 here actually means that
1224 * there were no tracepoints so its not really
1225 * an error, just that we don't need to
1226 * synthesize anything. We really have to
1227 * return this more properly and also
1228 * propagate errors that now are calling die()
1230 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1231 process_synthesized_event);
1232 if (err <= 0) {
1233 pr_err("Couldn't record tracing data.\n");
1234 goto out;
1236 rec->bytes_written += err;
1240 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1241 process_synthesized_event, machine);
1242 if (err)
1243 goto out;
1245 if (rec->opts.full_auxtrace) {
1246 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1247 session, process_synthesized_event);
1248 if (err)
1249 goto out;
1252 if (!perf_evlist__exclude_kernel(rec->evlist)) {
1253 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1254 machine);
1255 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1256 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1257 "Check /proc/kallsyms permission or run as root.\n");
1259 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1260 machine);
1261 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1262 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1263 "Check /proc/modules permission or run as root.\n");
1266 if (perf_guest) {
1267 machines__process_guests(&session->machines,
1268 perf_event__synthesize_guest_os, tool);
1271 err = perf_event__synthesize_extra_attr(&rec->tool,
1272 rec->evlist,
1273 process_synthesized_event,
1274 data->is_pipe);
1275 if (err)
1276 goto out;
1278 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
1279 process_synthesized_event,
1280 NULL);
1281 if (err < 0) {
1282 pr_err("Couldn't synthesize thread map.\n");
1283 return err;
1286 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
1287 process_synthesized_event, NULL);
1288 if (err < 0) {
1289 pr_err("Couldn't synthesize cpu map.\n");
1290 return err;
1293 err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1294 machine, opts);
1295 if (err < 0)
1296 pr_warning("Couldn't synthesize bpf events.\n");
1298 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
1299 process_synthesized_event, opts->sample_address,
1301 out:
1302 return err;
1305 static int __cmd_record(struct record *rec, int argc, const char **argv)
1307 int err;
1308 int status = 0;
1309 unsigned long waking = 0;
1310 const bool forks = argc > 0;
1311 struct perf_tool *tool = &rec->tool;
1312 struct record_opts *opts = &rec->opts;
1313 struct perf_data *data = &rec->data;
1314 struct perf_session *session;
1315 bool disabled = false, draining = false;
1316 struct perf_evlist *sb_evlist = NULL;
1317 int fd;
1318 float ratio = 0;
1320 atexit(record__sig_exit);
1321 signal(SIGCHLD, sig_handler);
1322 signal(SIGINT, sig_handler);
1323 signal(SIGTERM, sig_handler);
1324 signal(SIGSEGV, sigsegv_handler);
1326 if (rec->opts.record_namespaces)
1327 tool->namespace_events = true;
1329 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1330 signal(SIGUSR2, snapshot_sig_handler);
1331 if (rec->opts.auxtrace_snapshot_mode)
1332 trigger_on(&auxtrace_snapshot_trigger);
1333 if (rec->switch_output.enabled)
1334 trigger_on(&switch_output_trigger);
1335 } else {
1336 signal(SIGUSR2, SIG_IGN);
1339 session = perf_session__new(data, false, tool);
1340 if (session == NULL) {
1341 pr_err("Perf session creation failed.\n");
1342 return -1;
1345 fd = perf_data__fd(data);
1346 rec->session = session;
1348 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
1349 pr_err("Compression initialization failed.\n");
1350 return -1;
1353 session->header.env.comp_type = PERF_COMP_ZSTD;
1354 session->header.env.comp_level = rec->opts.comp_level;
1356 record__init_features(rec);
1358 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1359 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1361 if (forks) {
1362 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1363 argv, data->is_pipe,
1364 workload_exec_failed_signal);
1365 if (err < 0) {
1366 pr_err("Couldn't run the workload!\n");
1367 status = err;
1368 goto out_delete_session;
1373 * If we have just single event and are sending data
1374 * through pipe, we need to force the ids allocation,
1375 * because we synthesize event name through the pipe
1376 * and need the id for that.
1378 if (data->is_pipe && rec->evlist->nr_entries == 1)
1379 rec->opts.sample_id = true;
1381 if (record__open(rec) != 0) {
1382 err = -1;
1383 goto out_child;
1385 session->header.env.comp_mmap_len = session->evlist->mmap_len;
1387 err = bpf__apply_obj_config();
1388 if (err) {
1389 char errbuf[BUFSIZ];
1391 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1392 pr_err("ERROR: Apply config to BPF failed: %s\n",
1393 errbuf);
1394 goto out_child;
1398 * Normally perf_session__new would do this, but it doesn't have the
1399 * evlist.
1401 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1402 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1403 rec->tool.ordered_events = false;
1406 if (!rec->evlist->nr_groups)
1407 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1409 if (data->is_pipe) {
1410 err = perf_header__write_pipe(fd);
1411 if (err < 0)
1412 goto out_child;
1413 } else {
1414 err = perf_session__write_header(session, rec->evlist, fd, false);
1415 if (err < 0)
1416 goto out_child;
1419 if (!rec->no_buildid
1420 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1421 pr_err("Couldn't generate buildids. "
1422 "Use --no-buildid to profile anyway.\n");
1423 err = -1;
1424 goto out_child;
1427 if (!opts->no_bpf_event)
1428 bpf_event__add_sb_event(&sb_evlist, &session->header.env);
1430 if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) {
1431 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1432 opts->no_bpf_event = true;
1435 err = record__synthesize(rec, false);
1436 if (err < 0)
1437 goto out_child;
1439 if (rec->realtime_prio) {
1440 struct sched_param param;
1442 param.sched_priority = rec->realtime_prio;
1443 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1444 pr_err("Could not set realtime priority.\n");
1445 err = -1;
1446 goto out_child;
1451 * When perf is starting the traced process, all the events
1452 * (apart from group members) have enable_on_exec=1 set,
1453 * so don't spoil it by prematurely enabling them.
1455 if (!target__none(&opts->target) && !opts->initial_delay)
1456 perf_evlist__enable(rec->evlist);
1459 * Let the child rip
1461 if (forks) {
1462 struct machine *machine = &session->machines.host;
1463 union perf_event *event;
1464 pid_t tgid;
1466 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1467 if (event == NULL) {
1468 err = -ENOMEM;
1469 goto out_child;
1473 * Some H/W events are generated before COMM event
1474 * which is emitted during exec(), so perf script
1475 * cannot see a correct process name for those events.
1476 * Synthesize COMM event to prevent it.
1478 tgid = perf_event__synthesize_comm(tool, event,
1479 rec->evlist->workload.pid,
1480 process_synthesized_event,
1481 machine);
1482 free(event);
1484 if (tgid == -1)
1485 goto out_child;
1487 event = malloc(sizeof(event->namespaces) +
1488 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1489 machine->id_hdr_size);
1490 if (event == NULL) {
1491 err = -ENOMEM;
1492 goto out_child;
1496 * Synthesize NAMESPACES event for the command specified.
1498 perf_event__synthesize_namespaces(tool, event,
1499 rec->evlist->workload.pid,
1500 tgid, process_synthesized_event,
1501 machine);
1502 free(event);
1504 perf_evlist__start_workload(rec->evlist);
1507 if (opts->initial_delay) {
1508 usleep(opts->initial_delay * USEC_PER_MSEC);
1509 perf_evlist__enable(rec->evlist);
1512 trigger_ready(&auxtrace_snapshot_trigger);
1513 trigger_ready(&switch_output_trigger);
1514 perf_hooks__invoke_record_start();
1515 for (;;) {
1516 unsigned long long hits = rec->samples;
1519 * rec->evlist->bkw_mmap_state is possible to be
1520 * BKW_MMAP_EMPTY here: when done == true and
1521 * hits != rec->samples in previous round.
1523 * perf_evlist__toggle_bkw_mmap ensure we never
1524 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1526 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1527 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1529 if (record__mmap_read_all(rec, false) < 0) {
1530 trigger_error(&auxtrace_snapshot_trigger);
1531 trigger_error(&switch_output_trigger);
1532 err = -1;
1533 goto out_child;
1536 if (auxtrace_record__snapshot_started) {
1537 auxtrace_record__snapshot_started = 0;
1538 if (!trigger_is_error(&auxtrace_snapshot_trigger))
1539 record__read_auxtrace_snapshot(rec);
1540 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1541 pr_err("AUX area tracing snapshot failed\n");
1542 err = -1;
1543 goto out_child;
1547 if (trigger_is_hit(&switch_output_trigger)) {
1549 * If switch_output_trigger is hit, the data in
1550 * overwritable ring buffer should have been collected,
1551 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1553 * If SIGUSR2 raise after or during record__mmap_read_all(),
1554 * record__mmap_read_all() didn't collect data from
1555 * overwritable ring buffer. Read again.
1557 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1558 continue;
1559 trigger_ready(&switch_output_trigger);
1562 * Reenable events in overwrite ring buffer after
1563 * record__mmap_read_all(): we should have collected
1564 * data from it.
1566 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1568 if (!quiet)
1569 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1570 waking);
1571 waking = 0;
1572 fd = record__switch_output(rec, false);
1573 if (fd < 0) {
1574 pr_err("Failed to switch to new file\n");
1575 trigger_error(&switch_output_trigger);
1576 err = fd;
1577 goto out_child;
1580 /* re-arm the alarm */
1581 if (rec->switch_output.time)
1582 alarm(rec->switch_output.time);
1585 if (hits == rec->samples) {
1586 if (done || draining)
1587 break;
1588 err = perf_evlist__poll(rec->evlist, -1);
1590 * Propagate error, only if there's any. Ignore positive
1591 * number of returned events and interrupt error.
1593 if (err > 0 || (err < 0 && errno == EINTR))
1594 err = 0;
1595 waking++;
1597 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1598 draining = true;
1602 * When perf is starting the traced process, at the end events
1603 * die with the process and we wait for that. Thus no need to
1604 * disable events in this case.
1606 if (done && !disabled && !target__none(&opts->target)) {
1607 trigger_off(&auxtrace_snapshot_trigger);
1608 perf_evlist__disable(rec->evlist);
1609 disabled = true;
1612 trigger_off(&auxtrace_snapshot_trigger);
1613 trigger_off(&switch_output_trigger);
1615 if (forks && workload_exec_errno) {
1616 char msg[STRERR_BUFSIZE];
1617 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1618 pr_err("Workload failed: %s\n", emsg);
1619 err = -1;
1620 goto out_child;
1623 if (!quiet)
1624 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1626 if (target__none(&rec->opts.target))
1627 record__synthesize_workload(rec, true);
1629 out_child:
1630 record__mmap_read_all(rec, true);
1631 record__aio_mmap_read_sync(rec);
1633 if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
1634 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
1635 session->header.env.comp_ratio = ratio + 0.5;
1638 if (forks) {
1639 int exit_status;
1641 if (!child_finished)
1642 kill(rec->evlist->workload.pid, SIGTERM);
1644 wait(&exit_status);
1646 if (err < 0)
1647 status = err;
1648 else if (WIFEXITED(exit_status))
1649 status = WEXITSTATUS(exit_status);
1650 else if (WIFSIGNALED(exit_status))
1651 signr = WTERMSIG(exit_status);
1652 } else
1653 status = err;
1655 record__synthesize(rec, true);
1656 /* this will be recalculated during process_buildids() */
1657 rec->samples = 0;
1659 if (!err) {
1660 if (!rec->timestamp_filename) {
1661 record__finish_output(rec);
1662 } else {
1663 fd = record__switch_output(rec, true);
1664 if (fd < 0) {
1665 status = fd;
1666 goto out_delete_session;
1671 perf_hooks__invoke_record_end();
1673 if (!err && !quiet) {
1674 char samples[128];
1675 const char *postfix = rec->timestamp_filename ?
1676 ".<timestamp>" : "";
1678 if (rec->samples && !rec->opts.full_auxtrace)
1679 scnprintf(samples, sizeof(samples),
1680 " (%" PRIu64 " samples)", rec->samples);
1681 else
1682 samples[0] = '\0';
1684 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
1685 perf_data__size(data) / 1024.0 / 1024.0,
1686 data->path, postfix, samples);
1687 if (ratio) {
1688 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
1689 rec->session->bytes_transferred / 1024.0 / 1024.0,
1690 ratio);
1692 fprintf(stderr, " ]\n");
1695 out_delete_session:
1696 zstd_fini(&session->zstd_data);
1697 perf_session__delete(session);
1699 if (!opts->no_bpf_event)
1700 perf_evlist__stop_sb_thread(sb_evlist);
1701 return status;
1704 static void callchain_debug(struct callchain_param *callchain)
1706 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1708 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1710 if (callchain->record_mode == CALLCHAIN_DWARF)
1711 pr_debug("callchain: stack dump size %d\n",
1712 callchain->dump_size);
1715 int record_opts__parse_callchain(struct record_opts *record,
1716 struct callchain_param *callchain,
1717 const char *arg, bool unset)
1719 int ret;
1720 callchain->enabled = !unset;
1722 /* --no-call-graph */
1723 if (unset) {
1724 callchain->record_mode = CALLCHAIN_NONE;
1725 pr_debug("callchain: disabled\n");
1726 return 0;
1729 ret = parse_callchain_record_opt(arg, callchain);
1730 if (!ret) {
1731 /* Enable data address sampling for DWARF unwind. */
1732 if (callchain->record_mode == CALLCHAIN_DWARF)
1733 record->sample_address = true;
1734 callchain_debug(callchain);
1737 return ret;
1740 int record_parse_callchain_opt(const struct option *opt,
1741 const char *arg,
1742 int unset)
1744 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1747 int record_callchain_opt(const struct option *opt,
1748 const char *arg __maybe_unused,
1749 int unset __maybe_unused)
1751 struct callchain_param *callchain = opt->value;
1753 callchain->enabled = true;
1755 if (callchain->record_mode == CALLCHAIN_NONE)
1756 callchain->record_mode = CALLCHAIN_FP;
1758 callchain_debug(callchain);
1759 return 0;
1762 static int perf_record_config(const char *var, const char *value, void *cb)
1764 struct record *rec = cb;
1766 if (!strcmp(var, "record.build-id")) {
1767 if (!strcmp(value, "cache"))
1768 rec->no_buildid_cache = false;
1769 else if (!strcmp(value, "no-cache"))
1770 rec->no_buildid_cache = true;
1771 else if (!strcmp(value, "skip"))
1772 rec->no_buildid = true;
1773 else
1774 return -1;
1775 return 0;
1777 if (!strcmp(var, "record.call-graph")) {
1778 var = "call-graph.record-mode";
1779 return perf_default_config(var, value, cb);
1781 #ifdef HAVE_AIO_SUPPORT
1782 if (!strcmp(var, "record.aio")) {
1783 rec->opts.nr_cblocks = strtol(value, NULL, 0);
1784 if (!rec->opts.nr_cblocks)
1785 rec->opts.nr_cblocks = nr_cblocks_default;
1787 #endif
1789 return 0;
1792 struct clockid_map {
1793 const char *name;
1794 int clockid;
1797 #define CLOCKID_MAP(n, c) \
1798 { .name = n, .clockid = (c), }
1800 #define CLOCKID_END { .name = NULL, }
1804 * Add the missing ones, we need to build on many distros...
1806 #ifndef CLOCK_MONOTONIC_RAW
1807 #define CLOCK_MONOTONIC_RAW 4
1808 #endif
1809 #ifndef CLOCK_BOOTTIME
1810 #define CLOCK_BOOTTIME 7
1811 #endif
1812 #ifndef CLOCK_TAI
1813 #define CLOCK_TAI 11
1814 #endif
1816 static const struct clockid_map clockids[] = {
1817 /* available for all events, NMI safe */
1818 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1819 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1821 /* available for some events */
1822 CLOCKID_MAP("realtime", CLOCK_REALTIME),
1823 CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1824 CLOCKID_MAP("tai", CLOCK_TAI),
1826 /* available for the lazy */
1827 CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1828 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1829 CLOCKID_MAP("real", CLOCK_REALTIME),
1830 CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1832 CLOCKID_END,
1835 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
1837 struct timespec res;
1839 *res_ns = 0;
1840 if (!clock_getres(clk_id, &res))
1841 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
1842 else
1843 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1845 return 0;
1848 static int parse_clockid(const struct option *opt, const char *str, int unset)
1850 struct record_opts *opts = (struct record_opts *)opt->value;
1851 const struct clockid_map *cm;
1852 const char *ostr = str;
1854 if (unset) {
1855 opts->use_clockid = 0;
1856 return 0;
1859 /* no arg passed */
1860 if (!str)
1861 return 0;
1863 /* no setting it twice */
1864 if (opts->use_clockid)
1865 return -1;
1867 opts->use_clockid = true;
1869 /* if its a number, we're done */
1870 if (sscanf(str, "%d", &opts->clockid) == 1)
1871 return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
1873 /* allow a "CLOCK_" prefix to the name */
1874 if (!strncasecmp(str, "CLOCK_", 6))
1875 str += 6;
1877 for (cm = clockids; cm->name; cm++) {
1878 if (!strcasecmp(str, cm->name)) {
1879 opts->clockid = cm->clockid;
1880 return get_clockid_res(opts->clockid,
1881 &opts->clockid_res_ns);
1885 opts->use_clockid = false;
1886 ui__warning("unknown clockid %s, check man page\n", ostr);
1887 return -1;
1890 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
1892 struct record_opts *opts = (struct record_opts *)opt->value;
1894 if (unset || !str)
1895 return 0;
1897 if (!strcasecmp(str, "node"))
1898 opts->affinity = PERF_AFFINITY_NODE;
1899 else if (!strcasecmp(str, "cpu"))
1900 opts->affinity = PERF_AFFINITY_CPU;
1902 return 0;
1905 static int record__parse_mmap_pages(const struct option *opt,
1906 const char *str,
1907 int unset __maybe_unused)
1909 struct record_opts *opts = opt->value;
1910 char *s, *p;
1911 unsigned int mmap_pages;
1912 int ret;
1914 if (!str)
1915 return -EINVAL;
1917 s = strdup(str);
1918 if (!s)
1919 return -ENOMEM;
1921 p = strchr(s, ',');
1922 if (p)
1923 *p = '\0';
1925 if (*s) {
1926 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1927 if (ret)
1928 goto out_free;
1929 opts->mmap_pages = mmap_pages;
1932 if (!p) {
1933 ret = 0;
1934 goto out_free;
1937 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1938 if (ret)
1939 goto out_free;
1941 opts->auxtrace_mmap_pages = mmap_pages;
1943 out_free:
1944 free(s);
1945 return ret;
1948 static void switch_output_size_warn(struct record *rec)
1950 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1951 struct switch_output *s = &rec->switch_output;
1953 wakeup_size /= 2;
1955 if (s->size < wakeup_size) {
1956 char buf[100];
1958 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1959 pr_warning("WARNING: switch-output data size lower than "
1960 "wakeup kernel buffer size (%s) "
1961 "expect bigger perf.data sizes\n", buf);
1965 static int switch_output_setup(struct record *rec)
1967 struct switch_output *s = &rec->switch_output;
1968 static struct parse_tag tags_size[] = {
1969 { .tag = 'B', .mult = 1 },
1970 { .tag = 'K', .mult = 1 << 10 },
1971 { .tag = 'M', .mult = 1 << 20 },
1972 { .tag = 'G', .mult = 1 << 30 },
1973 { .tag = 0 },
1975 static struct parse_tag tags_time[] = {
1976 { .tag = 's', .mult = 1 },
1977 { .tag = 'm', .mult = 60 },
1978 { .tag = 'h', .mult = 60*60 },
1979 { .tag = 'd', .mult = 60*60*24 },
1980 { .tag = 0 },
1982 unsigned long val;
1984 if (!s->set)
1985 return 0;
1987 if (!strcmp(s->str, "signal")) {
1988 s->signal = true;
1989 pr_debug("switch-output with SIGUSR2 signal\n");
1990 goto enabled;
1993 val = parse_tag_value(s->str, tags_size);
1994 if (val != (unsigned long) -1) {
1995 s->size = val;
1996 pr_debug("switch-output with %s size threshold\n", s->str);
1997 goto enabled;
2000 val = parse_tag_value(s->str, tags_time);
2001 if (val != (unsigned long) -1) {
2002 s->time = val;
2003 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2004 s->str, s->time);
2005 goto enabled;
2008 return -1;
2010 enabled:
2011 rec->timestamp_filename = true;
2012 s->enabled = true;
2014 if (s->size && !rec->opts.no_buffering)
2015 switch_output_size_warn(rec);
2017 return 0;
2020 static const char * const __record_usage[] = {
2021 "perf record [<options>] [<command>]",
2022 "perf record [<options>] -- <command> [<options>]",
2023 NULL
2025 const char * const *record_usage = __record_usage;
2028 * XXX Ideally would be local to cmd_record() and passed to a record__new
2029 * because we need to have access to it in record__exit, that is called
2030 * after cmd_record() exits, but since record_options need to be accessible to
2031 * builtin-script, leave it here.
2033 * At least we don't ouch it in all the other functions here directly.
2035 * Just say no to tons of global variables, sigh.
2037 static struct record record = {
2038 .opts = {
2039 .sample_time = true,
2040 .mmap_pages = UINT_MAX,
2041 .user_freq = UINT_MAX,
2042 .user_interval = ULLONG_MAX,
2043 .freq = 4000,
2044 .target = {
2045 .uses_mmap = true,
2046 .default_per_cpu = true,
2048 .mmap_flush = MMAP_FLUSH_DEFAULT,
2050 .tool = {
2051 .sample = process_sample_event,
2052 .fork = perf_event__process_fork,
2053 .exit = perf_event__process_exit,
2054 .comm = perf_event__process_comm,
2055 .namespaces = perf_event__process_namespaces,
2056 .mmap = perf_event__process_mmap,
2057 .mmap2 = perf_event__process_mmap2,
2058 .ordered_events = true,
2062 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
2063 "\n\t\t\t\tDefault: fp";
2065 static bool dry_run;
2068 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
2069 * with it and switch to use the library functions in perf_evlist that came
2070 * from builtin-record.c, i.e. use record_opts,
2071 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
2072 * using pipes, etc.
2074 static struct option __record_options[] = {
2075 OPT_CALLBACK('e', "event", &record.evlist, "event",
2076 "event selector. use 'perf list' to list available events",
2077 parse_events_option),
2078 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
2079 "event filter", parse_filter),
2080 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
2081 NULL, "don't record events from perf itself",
2082 exclude_perf),
2083 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
2084 "record events on existing process id"),
2085 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
2086 "record events on existing thread id"),
2087 OPT_INTEGER('r', "realtime", &record.realtime_prio,
2088 "collect data with this RT SCHED_FIFO priority"),
2089 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
2090 "collect data without buffering"),
2091 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
2092 "collect raw sample records from all opened counters"),
2093 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
2094 "system-wide collection from all CPUs"),
2095 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
2096 "list of cpus to monitor"),
2097 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
2098 OPT_STRING('o', "output", &record.data.path, "file",
2099 "output file name"),
2100 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
2101 &record.opts.no_inherit_set,
2102 "child tasks do not inherit counters"),
2103 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
2104 "synthesize non-sample events at the end of output"),
2105 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
2106 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
2107 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
2108 "Fail if the specified frequency can't be used"),
2109 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
2110 "profile at this frequency",
2111 record__parse_freq),
2112 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
2113 "number of mmap data pages and AUX area tracing mmap pages",
2114 record__parse_mmap_pages),
2115 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
2116 "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
2117 record__mmap_flush_parse),
2118 OPT_BOOLEAN(0, "group", &record.opts.group,
2119 "put the counters into a counter group"),
2120 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
2121 NULL, "enables call-graph recording" ,
2122 &record_callchain_opt),
2123 OPT_CALLBACK(0, "call-graph", &record.opts,
2124 "record_mode[,record_size]", record_callchain_help,
2125 &record_parse_callchain_opt),
2126 OPT_INCR('v', "verbose", &verbose,
2127 "be more verbose (show counter open errors, etc)"),
2128 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
2129 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
2130 "per thread counts"),
2131 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
2132 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
2133 "Record the sample physical addresses"),
2134 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
2135 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
2136 &record.opts.sample_time_set,
2137 "Record the sample timestamps"),
2138 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
2139 "Record the sample period"),
2140 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
2141 "don't sample"),
2142 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
2143 &record.no_buildid_cache_set,
2144 "do not update the buildid cache"),
2145 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
2146 &record.no_buildid_set,
2147 "do not collect buildids in perf.data"),
2148 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
2149 "monitor event in cgroup name only",
2150 parse_cgroups),
2151 OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
2152 "ms to wait before starting measurement after program start"),
2153 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
2154 "user to profile"),
2156 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
2157 "branch any", "sample any taken branches",
2158 parse_branch_stack),
2160 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
2161 "branch filter mask", "branch stack filter modes",
2162 parse_branch_stack),
2163 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
2164 "sample by weight (on special events only)"),
2165 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
2166 "sample transaction flags (special events only)"),
2167 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
2168 "use per-thread mmaps"),
2169 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
2170 "sample selected machine registers on interrupt,"
2171 " use '-I?' to list register names", parse_intr_regs),
2172 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
2173 "sample selected machine registers on interrupt,"
2174 " use '--user-regs=?' to list register names", parse_user_regs),
2175 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
2176 "Record running/enabled time of read (:S) events"),
2177 OPT_CALLBACK('k', "clockid", &record.opts,
2178 "clockid", "clockid to use for events, see clock_gettime()",
2179 parse_clockid),
2180 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
2181 "opts", "AUX area tracing Snapshot Mode", ""),
2182 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
2183 "per thread proc mmap processing timeout in ms"),
2184 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
2185 "Record namespaces events"),
2186 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
2187 "Record context switch events"),
2188 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
2189 "Configure all used events to run in kernel space.",
2190 PARSE_OPT_EXCLUSIVE),
2191 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
2192 "Configure all used events to run in user space.",
2193 PARSE_OPT_EXCLUSIVE),
2194 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
2195 "clang binary to use for compiling BPF scriptlets"),
2196 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
2197 "options passed to clang when compiling BPF scriptlets"),
2198 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2199 "file", "vmlinux pathname"),
2200 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
2201 "Record build-id of all DSOs regardless of hits"),
2202 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2203 "append timestamp to output filename"),
2204 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2205 "Record timestamp boundary (time of first/last samples)"),
2206 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
2207 &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2208 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2209 "signal"),
2210 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2211 "Limit number of switch output generated files"),
2212 OPT_BOOLEAN(0, "dry-run", &dry_run,
2213 "Parse options then exit"),
2214 #ifdef HAVE_AIO_SUPPORT
2215 OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2216 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2217 record__aio_parse),
2218 #endif
2219 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2220 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2221 record__parse_affinity),
2222 #ifdef HAVE_ZSTD_SUPPORT
2223 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
2224 "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
2225 record__parse_comp_level),
2226 #endif
2227 OPT_END()
2230 struct option *record_options = __record_options;
2232 int cmd_record(int argc, const char **argv)
2234 int err;
2235 struct record *rec = &record;
2236 char errbuf[BUFSIZ];
2238 setlocale(LC_ALL, "");
2240 #ifndef HAVE_LIBBPF_SUPPORT
2241 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2242 set_nobuild('\0', "clang-path", true);
2243 set_nobuild('\0', "clang-opt", true);
2244 # undef set_nobuild
2245 #endif
2247 #ifndef HAVE_BPF_PROLOGUE
2248 # if !defined (HAVE_DWARF_SUPPORT)
2249 # define REASON "NO_DWARF=1"
2250 # elif !defined (HAVE_LIBBPF_SUPPORT)
2251 # define REASON "NO_LIBBPF=1"
2252 # else
2253 # define REASON "this architecture doesn't support BPF prologue"
2254 # endif
2255 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2256 set_nobuild('\0', "vmlinux", true);
2257 # undef set_nobuild
2258 # undef REASON
2259 #endif
2261 CPU_ZERO(&rec->affinity_mask);
2262 rec->opts.affinity = PERF_AFFINITY_SYS;
2264 rec->evlist = perf_evlist__new();
2265 if (rec->evlist == NULL)
2266 return -ENOMEM;
2268 err = perf_config(perf_record_config, rec);
2269 if (err)
2270 return err;
2272 argc = parse_options(argc, argv, record_options, record_usage,
2273 PARSE_OPT_STOP_AT_NON_OPTION);
2274 if (quiet)
2275 perf_quiet_option();
2277 /* Make system wide (-a) the default target. */
2278 if (!argc && target__none(&rec->opts.target))
2279 rec->opts.target.system_wide = true;
2281 if (nr_cgroups && !rec->opts.target.system_wide) {
2282 usage_with_options_msg(record_usage, record_options,
2283 "cgroup monitoring only available in system-wide mode");
2287 if (rec->opts.comp_level != 0) {
2288 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
2289 rec->no_buildid = true;
2292 if (rec->opts.record_switch_events &&
2293 !perf_can_record_switch_events()) {
2294 ui__error("kernel does not support recording context switch events\n");
2295 parse_options_usage(record_usage, record_options, "switch-events", 0);
2296 return -EINVAL;
2299 if (switch_output_setup(rec)) {
2300 parse_options_usage(record_usage, record_options, "switch-output", 0);
2301 return -EINVAL;
2304 if (rec->switch_output.time) {
2305 signal(SIGALRM, alarm_sig_handler);
2306 alarm(rec->switch_output.time);
2309 if (rec->switch_output.num_files) {
2310 rec->switch_output.filenames = calloc(sizeof(char *),
2311 rec->switch_output.num_files);
2312 if (!rec->switch_output.filenames)
2313 return -EINVAL;
2317 * Allow aliases to facilitate the lookup of symbols for address
2318 * filters. Refer to auxtrace_parse_filters().
2320 symbol_conf.allow_aliases = true;
2322 symbol__init(NULL);
2324 err = record__auxtrace_init(rec);
2325 if (err)
2326 goto out;
2328 if (dry_run)
2329 goto out;
2331 err = bpf__setup_stdout(rec->evlist);
2332 if (err) {
2333 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2334 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2335 errbuf);
2336 goto out;
2339 err = -ENOMEM;
2341 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
2342 pr_warning(
2343 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
2344 "check /proc/sys/kernel/kptr_restrict.\n\n"
2345 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
2346 "file is not found in the buildid cache or in the vmlinux path.\n\n"
2347 "Samples in kernel modules won't be resolved at all.\n\n"
2348 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
2349 "even with a suitable vmlinux or kallsyms file.\n\n");
2351 if (rec->no_buildid_cache || rec->no_buildid) {
2352 disable_buildid_cache();
2353 } else if (rec->switch_output.enabled) {
2355 * In 'perf record --switch-output', disable buildid
2356 * generation by default to reduce data file switching
2357 * overhead. Still generate buildid if they are required
2358 * explicitly using
2360 * perf record --switch-output --no-no-buildid \
2361 * --no-no-buildid-cache
2363 * Following code equals to:
2365 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2366 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2367 * disable_buildid_cache();
2369 bool disable = true;
2371 if (rec->no_buildid_set && !rec->no_buildid)
2372 disable = false;
2373 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2374 disable = false;
2375 if (disable) {
2376 rec->no_buildid = true;
2377 rec->no_buildid_cache = true;
2378 disable_buildid_cache();
2382 if (record.opts.overwrite)
2383 record.opts.tail_synthesize = true;
2385 if (rec->evlist->nr_entries == 0 &&
2386 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2387 pr_err("Not enough memory for event selector list\n");
2388 goto out;
2391 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2392 rec->opts.no_inherit = true;
2394 err = target__validate(&rec->opts.target);
2395 if (err) {
2396 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2397 ui__warning("%s\n", errbuf);
2400 err = target__parse_uid(&rec->opts.target);
2401 if (err) {
2402 int saved_errno = errno;
2404 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2405 ui__error("%s", errbuf);
2407 err = -saved_errno;
2408 goto out;
2411 /* Enable ignoring missing threads when -u/-p option is defined. */
2412 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2414 err = -ENOMEM;
2415 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2416 usage_with_options(record_usage, record_options);
2418 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2419 if (err)
2420 goto out;
2423 * We take all buildids when the file contains
2424 * AUX area tracing data because we do not decode the
2425 * trace because it would take too long.
2427 if (rec->opts.full_auxtrace)
2428 rec->buildid_all = true;
2430 if (record_opts__config(&rec->opts)) {
2431 err = -EINVAL;
2432 goto out;
2435 if (rec->opts.nr_cblocks > nr_cblocks_max)
2436 rec->opts.nr_cblocks = nr_cblocks_max;
2437 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2439 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2440 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
2442 if (rec->opts.comp_level > comp_level_max)
2443 rec->opts.comp_level = comp_level_max;
2444 pr_debug("comp level: %d\n", rec->opts.comp_level);
2446 err = __cmd_record(&record, argc, argv);
2447 out:
2448 perf_evlist__delete(rec->evlist);
2449 symbol__exit();
2450 auxtrace_record__free(rec->itr);
2451 return err;
2454 static void snapshot_sig_handler(int sig __maybe_unused)
2456 struct record *rec = &record;
2458 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2459 trigger_hit(&auxtrace_snapshot_trigger);
2460 auxtrace_record__snapshot_started = 1;
2461 if (auxtrace_record__snapshot_start(record.itr))
2462 trigger_error(&auxtrace_snapshot_trigger);
2465 if (switch_output_signal(rec))
2466 trigger_hit(&switch_output_trigger);
2469 static void alarm_sig_handler(int sig __maybe_unused)
2471 struct record *rec = &record;
2473 if (switch_output_time(rec))
2474 trigger_hit(&switch_output_trigger);