1 // SPDX-License-Identifier: GPL-2.0
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include "util/parse-events.h"
14 #include "util/config.h"
16 #include "util/callchain.h"
17 #include "util/cgroup.h"
18 #include "util/header.h"
19 #include "util/event.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/debug.h"
23 #include "util/mmap.h"
24 #include "util/target.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/record.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "util/cpu-set-sched.h"
42 #include "util/synthetic-events.h"
43 #include "util/time-utils.h"
44 #include "util/units.h"
45 #include "util/bpf-event.h"
58 #include <sys/types.h>
61 #include <linux/err.h>
62 #include <linux/string.h>
63 #include <linux/time64.h>
64 #include <linux/zalloc.h>
65 #include <linux/bitmap.h>
67 struct switch_output
{
80 struct perf_tool tool
;
81 struct record_opts opts
;
83 struct perf_data data
;
84 struct auxtrace_record
*itr
;
85 struct evlist
*evlist
;
86 struct perf_session
*session
;
90 bool no_buildid_cache
;
91 bool no_buildid_cache_set
;
93 bool timestamp_filename
;
94 bool timestamp_boundary
;
95 struct switch_output switch_output
;
96 unsigned long long samples
;
97 struct mmap_cpu_mask affinity_mask
;
98 unsigned long output_max_size
; /* = 0: unlimited */
101 static volatile int done
;
103 static volatile int auxtrace_record__snapshot_started
;
104 static DEFINE_TRIGGER(auxtrace_snapshot_trigger
);
105 static DEFINE_TRIGGER(switch_output_trigger
);
107 static const char *affinity_tags
[PERF_AFFINITY_MAX
] = {
111 static bool switch_output_signal(struct record
*rec
)
113 return rec
->switch_output
.signal
&&
114 trigger_is_ready(&switch_output_trigger
);
117 static bool switch_output_size(struct record
*rec
)
119 return rec
->switch_output
.size
&&
120 trigger_is_ready(&switch_output_trigger
) &&
121 (rec
->bytes_written
>= rec
->switch_output
.size
);
124 static bool switch_output_time(struct record
*rec
)
126 return rec
->switch_output
.time
&&
127 trigger_is_ready(&switch_output_trigger
);
130 static bool record__output_max_size_exceeded(struct record
*rec
)
132 return rec
->output_max_size
&&
133 (rec
->bytes_written
>= rec
->output_max_size
);
136 static int record__write(struct record
*rec
, struct mmap
*map __maybe_unused
,
137 void *bf
, size_t size
)
139 struct perf_data_file
*file
= &rec
->session
->data
->file
;
141 if (perf_data_file__write(file
, bf
, size
) < 0) {
142 pr_err("failed to write perf data, error: %m\n");
146 rec
->bytes_written
+= size
;
148 if (record__output_max_size_exceeded(rec
) && !done
) {
149 fprintf(stderr
, "[ perf record: perf size limit reached (%" PRIu64
" KB),"
150 " stopping session ]\n",
151 rec
->bytes_written
>> 10);
155 if (switch_output_size(rec
))
156 trigger_hit(&switch_output_trigger
);
161 static int record__aio_enabled(struct record
*rec
);
162 static int record__comp_enabled(struct record
*rec
);
163 static size_t zstd_compress(struct perf_session
*session
, void *dst
, size_t dst_size
,
164 void *src
, size_t src_size
);
166 #ifdef HAVE_AIO_SUPPORT
167 static int record__aio_write(struct aiocb
*cblock
, int trace_fd
,
168 void *buf
, size_t size
, off_t off
)
172 cblock
->aio_fildes
= trace_fd
;
173 cblock
->aio_buf
= buf
;
174 cblock
->aio_nbytes
= size
;
175 cblock
->aio_offset
= off
;
176 cblock
->aio_sigevent
.sigev_notify
= SIGEV_NONE
;
179 rc
= aio_write(cblock
);
182 } else if (errno
!= EAGAIN
) {
183 cblock
->aio_fildes
= -1;
184 pr_err("failed to queue perf data, error: %m\n");
192 static int record__aio_complete(struct mmap
*md
, struct aiocb
*cblock
)
198 ssize_t aio_ret
, written
;
200 aio_errno
= aio_error(cblock
);
201 if (aio_errno
== EINPROGRESS
)
204 written
= aio_ret
= aio_return(cblock
);
206 if (aio_errno
!= EINTR
)
207 pr_err("failed to write perf data, error: %m\n");
211 rem_size
= cblock
->aio_nbytes
- written
;
214 cblock
->aio_fildes
= -1;
216 * md->refcount is incremented in record__aio_pushfn() for
217 * every aio write request started in record__aio_push() so
218 * decrement it because the request is now complete.
220 perf_mmap__put(&md
->core
);
224 * aio write request may require restart with the
225 * reminder if the kernel didn't write whole
228 rem_off
= cblock
->aio_offset
+ written
;
229 rem_buf
= (void *)(cblock
->aio_buf
+ written
);
230 record__aio_write(cblock
, cblock
->aio_fildes
,
231 rem_buf
, rem_size
, rem_off
);
238 static int record__aio_sync(struct mmap
*md
, bool sync_all
)
240 struct aiocb
**aiocb
= md
->aio
.aiocb
;
241 struct aiocb
*cblocks
= md
->aio
.cblocks
;
242 struct timespec timeout
= { 0, 1000 * 1000 * 1 }; /* 1ms */
247 for (i
= 0; i
< md
->aio
.nr_cblocks
; ++i
) {
248 if (cblocks
[i
].aio_fildes
== -1 || record__aio_complete(md
, &cblocks
[i
])) {
255 * Started aio write is not complete yet
256 * so it has to be waited before the
259 aiocb
[i
] = &cblocks
[i
];
266 while (aio_suspend((const struct aiocb
**)aiocb
, md
->aio
.nr_cblocks
, &timeout
)) {
267 if (!(errno
== EAGAIN
|| errno
== EINTR
))
268 pr_err("failed to sync perf data, error: %m\n");
279 static int record__aio_pushfn(struct mmap
*map
, void *to
, void *buf
, size_t size
)
281 struct record_aio
*aio
= to
;
284 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
285 * to release space in the kernel buffer as fast as possible, calling
286 * perf_mmap__consume() from perf_mmap__push() function.
288 * That lets the kernel to proceed with storing more profiling data into
289 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
291 * Coping can be done in two steps in case the chunk of profiling data
292 * crosses the upper bound of the kernel buffer. In this case we first move
293 * part of data from map->start till the upper bound and then the reminder
294 * from the beginning of the kernel buffer till the end of the data chunk.
297 if (record__comp_enabled(aio
->rec
)) {
298 size
= zstd_compress(aio
->rec
->session
, aio
->data
+ aio
->size
,
299 mmap__mmap_len(map
) - aio
->size
,
302 memcpy(aio
->data
+ aio
->size
, buf
, size
);
307 * Increment map->refcount to guard map->aio.data[] buffer
308 * from premature deallocation because map object can be
309 * released earlier than aio write request started on
310 * map->aio.data[] buffer is complete.
312 * perf_mmap__put() is done at record__aio_complete()
313 * after started aio request completion or at record__aio_push()
314 * if the request failed to start.
316 perf_mmap__get(&map
->core
);
324 static int record__aio_push(struct record
*rec
, struct mmap
*map
, off_t
*off
)
327 int trace_fd
= rec
->session
->data
->file
.fd
;
328 struct record_aio aio
= { .rec
= rec
, .size
= 0 };
331 * Call record__aio_sync() to wait till map->aio.data[] buffer
332 * becomes available after previous aio write operation.
335 idx
= record__aio_sync(map
, false);
336 aio
.data
= map
->aio
.data
[idx
];
337 ret
= perf_mmap__push(map
, &aio
, record__aio_pushfn
);
338 if (ret
!= 0) /* ret > 0 - no data, ret < 0 - error */
342 ret
= record__aio_write(&(map
->aio
.cblocks
[idx
]), trace_fd
, aio
.data
, aio
.size
, *off
);
345 rec
->bytes_written
+= aio
.size
;
346 if (switch_output_size(rec
))
347 trigger_hit(&switch_output_trigger
);
350 * Decrement map->refcount incremented in record__aio_pushfn()
351 * back if record__aio_write() operation failed to start, otherwise
352 * map->refcount is decremented in record__aio_complete() after
353 * aio write operation finishes successfully.
355 perf_mmap__put(&map
->core
);
361 static off_t
record__aio_get_pos(int trace_fd
)
363 return lseek(trace_fd
, 0, SEEK_CUR
);
366 static void record__aio_set_pos(int trace_fd
, off_t pos
)
368 lseek(trace_fd
, pos
, SEEK_SET
);
371 static void record__aio_mmap_read_sync(struct record
*rec
)
374 struct evlist
*evlist
= rec
->evlist
;
375 struct mmap
*maps
= evlist
->mmap
;
377 if (!record__aio_enabled(rec
))
380 for (i
= 0; i
< evlist
->core
.nr_mmaps
; i
++) {
381 struct mmap
*map
= &maps
[i
];
384 record__aio_sync(map
, true);
388 static int nr_cblocks_default
= 1;
389 static int nr_cblocks_max
= 4;
391 static int record__aio_parse(const struct option
*opt
,
395 struct record_opts
*opts
= (struct record_opts
*)opt
->value
;
398 opts
->nr_cblocks
= 0;
401 opts
->nr_cblocks
= strtol(str
, NULL
, 0);
402 if (!opts
->nr_cblocks
)
403 opts
->nr_cblocks
= nr_cblocks_default
;
408 #else /* HAVE_AIO_SUPPORT */
409 static int nr_cblocks_max
= 0;
411 static int record__aio_push(struct record
*rec __maybe_unused
, struct mmap
*map __maybe_unused
,
412 off_t
*off __maybe_unused
)
417 static off_t
record__aio_get_pos(int trace_fd __maybe_unused
)
422 static void record__aio_set_pos(int trace_fd __maybe_unused
, off_t pos __maybe_unused
)
426 static void record__aio_mmap_read_sync(struct record
*rec __maybe_unused
)
431 static int record__aio_enabled(struct record
*rec
)
433 return rec
->opts
.nr_cblocks
> 0;
436 #define MMAP_FLUSH_DEFAULT 1
437 static int record__mmap_flush_parse(const struct option
*opt
,
442 struct record_opts
*opts
= (struct record_opts
*)opt
->value
;
443 static struct parse_tag tags
[] = {
444 { .tag
= 'B', .mult
= 1 },
445 { .tag
= 'K', .mult
= 1 << 10 },
446 { .tag
= 'M', .mult
= 1 << 20 },
447 { .tag
= 'G', .mult
= 1 << 30 },
455 opts
->mmap_flush
= parse_tag_value(str
, tags
);
456 if (opts
->mmap_flush
== (int)-1)
457 opts
->mmap_flush
= strtol(str
, NULL
, 0);
460 if (!opts
->mmap_flush
)
461 opts
->mmap_flush
= MMAP_FLUSH_DEFAULT
;
463 flush_max
= evlist__mmap_size(opts
->mmap_pages
);
465 if (opts
->mmap_flush
> flush_max
)
466 opts
->mmap_flush
= flush_max
;
471 #ifdef HAVE_ZSTD_SUPPORT
472 static unsigned int comp_level_default
= 1;
474 static int record__parse_comp_level(const struct option
*opt
, const char *str
, int unset
)
476 struct record_opts
*opts
= opt
->value
;
479 opts
->comp_level
= 0;
482 opts
->comp_level
= strtol(str
, NULL
, 0);
483 if (!opts
->comp_level
)
484 opts
->comp_level
= comp_level_default
;
490 static unsigned int comp_level_max
= 22;
492 static int record__comp_enabled(struct record
*rec
)
494 return rec
->opts
.comp_level
> 0;
497 static int process_synthesized_event(struct perf_tool
*tool
,
498 union perf_event
*event
,
499 struct perf_sample
*sample __maybe_unused
,
500 struct machine
*machine __maybe_unused
)
502 struct record
*rec
= container_of(tool
, struct record
, tool
);
503 return record__write(rec
, NULL
, event
, event
->header
.size
);
506 static int record__pushfn(struct mmap
*map
, void *to
, void *bf
, size_t size
)
508 struct record
*rec
= to
;
510 if (record__comp_enabled(rec
)) {
511 size
= zstd_compress(rec
->session
, map
->data
, mmap__mmap_len(map
), bf
, size
);
516 return record__write(rec
, map
, bf
, size
);
519 static volatile int signr
= -1;
520 static volatile int child_finished
;
522 static void sig_handler(int sig
)
532 static void sigsegv_handler(int sig
)
534 perf_hooks__recover();
535 sighandler_dump_stack(sig
);
538 static void record__sig_exit(void)
543 signal(signr
, SIG_DFL
);
547 #ifdef HAVE_AUXTRACE_SUPPORT
549 static int record__process_auxtrace(struct perf_tool
*tool
,
551 union perf_event
*event
, void *data1
,
552 size_t len1
, void *data2
, size_t len2
)
554 struct record
*rec
= container_of(tool
, struct record
, tool
);
555 struct perf_data
*data
= &rec
->data
;
559 if (!perf_data__is_pipe(data
) && perf_data__is_single_file(data
)) {
561 int fd
= perf_data__fd(data
);
564 file_offset
= lseek(fd
, 0, SEEK_CUR
);
565 if (file_offset
== -1)
567 err
= auxtrace_index__auxtrace_event(&rec
->session
->auxtrace_index
,
573 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
574 padding
= (len1
+ len2
) & 7;
576 padding
= 8 - padding
;
578 record__write(rec
, map
, event
, event
->header
.size
);
579 record__write(rec
, map
, data1
, len1
);
581 record__write(rec
, map
, data2
, len2
);
582 record__write(rec
, map
, &pad
, padding
);
587 static int record__auxtrace_mmap_read(struct record
*rec
,
592 ret
= auxtrace_mmap__read(map
, rec
->itr
, &rec
->tool
,
593 record__process_auxtrace
);
603 static int record__auxtrace_mmap_read_snapshot(struct record
*rec
,
608 ret
= auxtrace_mmap__read_snapshot(map
, rec
->itr
, &rec
->tool
,
609 record__process_auxtrace
,
610 rec
->opts
.auxtrace_snapshot_size
);
620 static int record__auxtrace_read_snapshot_all(struct record
*rec
)
625 for (i
= 0; i
< rec
->evlist
->core
.nr_mmaps
; i
++) {
626 struct mmap
*map
= &rec
->evlist
->mmap
[i
];
628 if (!map
->auxtrace_mmap
.base
)
631 if (record__auxtrace_mmap_read_snapshot(rec
, map
) != 0) {
640 static void record__read_auxtrace_snapshot(struct record
*rec
, bool on_exit
)
642 pr_debug("Recording AUX area tracing snapshot\n");
643 if (record__auxtrace_read_snapshot_all(rec
) < 0) {
644 trigger_error(&auxtrace_snapshot_trigger
);
646 if (auxtrace_record__snapshot_finish(rec
->itr
, on_exit
))
647 trigger_error(&auxtrace_snapshot_trigger
);
649 trigger_ready(&auxtrace_snapshot_trigger
);
653 static int record__auxtrace_snapshot_exit(struct record
*rec
)
655 if (trigger_is_error(&auxtrace_snapshot_trigger
))
658 if (!auxtrace_record__snapshot_started
&&
659 auxtrace_record__snapshot_start(rec
->itr
))
662 record__read_auxtrace_snapshot(rec
, true);
663 if (trigger_is_error(&auxtrace_snapshot_trigger
))
669 static int record__auxtrace_init(struct record
*rec
)
674 rec
->itr
= auxtrace_record__init(rec
->evlist
, &err
);
679 err
= auxtrace_parse_snapshot_options(rec
->itr
, &rec
->opts
,
680 rec
->opts
.auxtrace_snapshot_opts
);
684 err
= auxtrace_parse_sample_options(rec
->itr
, rec
->evlist
, &rec
->opts
,
685 rec
->opts
.auxtrace_sample_opts
);
689 return auxtrace_parse_filters(rec
->evlist
);
695 int record__auxtrace_mmap_read(struct record
*rec __maybe_unused
,
696 struct mmap
*map __maybe_unused
)
702 void record__read_auxtrace_snapshot(struct record
*rec __maybe_unused
,
703 bool on_exit __maybe_unused
)
708 int auxtrace_record__snapshot_start(struct auxtrace_record
*itr __maybe_unused
)
714 int record__auxtrace_snapshot_exit(struct record
*rec __maybe_unused
)
719 static int record__auxtrace_init(struct record
*rec __maybe_unused
)
726 static bool record__kcore_readable(struct machine
*machine
)
728 char kcore
[PATH_MAX
];
731 scnprintf(kcore
, sizeof(kcore
), "%s/proc/kcore", machine
->root_dir
);
733 fd
= open(kcore
, O_RDONLY
);
742 static int record__kcore_copy(struct machine
*machine
, struct perf_data
*data
)
744 char from_dir
[PATH_MAX
];
745 char kcore_dir
[PATH_MAX
];
748 snprintf(from_dir
, sizeof(from_dir
), "%s/proc", machine
->root_dir
);
750 ret
= perf_data__make_kcore_dir(data
, kcore_dir
, sizeof(kcore_dir
));
754 return kcore_copy(from_dir
, kcore_dir
);
757 static int record__mmap_evlist(struct record
*rec
,
758 struct evlist
*evlist
)
760 struct record_opts
*opts
= &rec
->opts
;
761 bool auxtrace_overwrite
= opts
->auxtrace_snapshot_mode
||
762 opts
->auxtrace_sample_mode
;
765 if (opts
->affinity
!= PERF_AFFINITY_SYS
)
766 cpu__setup_cpunode_map();
768 if (evlist__mmap_ex(evlist
, opts
->mmap_pages
,
769 opts
->auxtrace_mmap_pages
,
771 opts
->nr_cblocks
, opts
->affinity
,
772 opts
->mmap_flush
, opts
->comp_level
) < 0) {
773 if (errno
== EPERM
) {
774 pr_err("Permission error mapping pages.\n"
775 "Consider increasing "
776 "/proc/sys/kernel/perf_event_mlock_kb,\n"
777 "or try again with a smaller value of -m/--mmap_pages.\n"
778 "(current value: %u,%u)\n",
779 opts
->mmap_pages
, opts
->auxtrace_mmap_pages
);
782 pr_err("failed to mmap with %d (%s)\n", errno
,
783 str_error_r(errno
, msg
, sizeof(msg
)));
793 static int record__mmap(struct record
*rec
)
795 return record__mmap_evlist(rec
, rec
->evlist
);
798 static int record__open(struct record
*rec
)
802 struct evlist
*evlist
= rec
->evlist
;
803 struct perf_session
*session
= rec
->session
;
804 struct record_opts
*opts
= &rec
->opts
;
808 * For initial_delay we need to add a dummy event so that we can track
809 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
810 * real events, the ones asked by the user.
812 if (opts
->initial_delay
) {
813 if (perf_evlist__add_dummy(evlist
))
816 pos
= evlist__first(evlist
);
818 pos
= evlist__last(evlist
);
820 pos
->core
.attr
.enable_on_exec
= 1;
823 perf_evlist__config(evlist
, opts
, &callchain_param
);
825 evlist__for_each_entry(evlist
, pos
) {
827 if (evsel__open(pos
, pos
->core
.cpus
, pos
->core
.threads
) < 0) {
828 if (perf_evsel__fallback(pos
, errno
, msg
, sizeof(msg
))) {
830 ui__warning("%s\n", msg
);
833 if ((errno
== EINVAL
|| errno
== EBADF
) &&
834 pos
->leader
!= pos
&&
836 pos
= perf_evlist__reset_weak_group(evlist
, pos
, true);
840 perf_evsel__open_strerror(pos
, &opts
->target
,
841 errno
, msg
, sizeof(msg
));
842 ui__error("%s\n", msg
);
846 pos
->supported
= true;
849 if (symbol_conf
.kptr_restrict
&& !perf_evlist__exclude_kernel(evlist
)) {
851 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
852 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
853 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
854 "file is not found in the buildid cache or in the vmlinux path.\n\n"
855 "Samples in kernel modules won't be resolved at all.\n\n"
856 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
857 "even with a suitable vmlinux or kallsyms file.\n\n");
860 if (perf_evlist__apply_filters(evlist
, &pos
)) {
861 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
862 pos
->filter
, perf_evsel__name(pos
), errno
,
863 str_error_r(errno
, msg
, sizeof(msg
)));
868 rc
= record__mmap(rec
);
872 session
->evlist
= evlist
;
873 perf_session__set_id_hdr_size(session
);
878 static int process_sample_event(struct perf_tool
*tool
,
879 union perf_event
*event
,
880 struct perf_sample
*sample
,
882 struct machine
*machine
)
884 struct record
*rec
= container_of(tool
, struct record
, tool
);
886 if (rec
->evlist
->first_sample_time
== 0)
887 rec
->evlist
->first_sample_time
= sample
->time
;
889 rec
->evlist
->last_sample_time
= sample
->time
;
891 if (rec
->buildid_all
)
895 return build_id__mark_dso_hit(tool
, event
, sample
, evsel
, machine
);
898 static int process_buildids(struct record
*rec
)
900 struct perf_session
*session
= rec
->session
;
902 if (perf_data__size(&rec
->data
) == 0)
906 * During this process, it'll load kernel map and replace the
907 * dso->long_name to a real pathname it found. In this case
908 * we prefer the vmlinux path like
909 * /lib/modules/3.16.4/build/vmlinux
911 * rather than build-id path (in debug directory).
912 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
914 symbol_conf
.ignore_vmlinux_buildid
= true;
917 * If --buildid-all is given, it marks all DSO regardless of hits,
918 * so no need to process samples. But if timestamp_boundary is enabled,
919 * it still needs to walk on all samples to get the timestamps of
920 * first/last samples.
922 if (rec
->buildid_all
&& !rec
->timestamp_boundary
)
923 rec
->tool
.sample
= NULL
;
925 return perf_session__process_events(session
);
928 static void perf_event__synthesize_guest_os(struct machine
*machine
, void *data
)
931 struct perf_tool
*tool
= data
;
933 *As for guest kernel when processing subcommand record&report,
934 *we arrange module mmap prior to guest kernel mmap and trigger
935 *a preload dso because default guest module symbols are loaded
936 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
937 *method is used to avoid symbol missing when the first addr is
938 *in module instead of in guest kernel.
940 err
= perf_event__synthesize_modules(tool
, process_synthesized_event
,
943 pr_err("Couldn't record guest kernel [%d]'s reference"
944 " relocation symbol.\n", machine
->pid
);
947 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
948 * have no _text sometimes.
950 err
= perf_event__synthesize_kernel_mmap(tool
, process_synthesized_event
,
953 pr_err("Couldn't record guest kernel [%d]'s reference"
954 " relocation symbol.\n", machine
->pid
);
957 static struct perf_event_header finished_round_event
= {
958 .size
= sizeof(struct perf_event_header
),
959 .type
= PERF_RECORD_FINISHED_ROUND
,
962 static void record__adjust_affinity(struct record
*rec
, struct mmap
*map
)
964 if (rec
->opts
.affinity
!= PERF_AFFINITY_SYS
&&
965 !bitmap_equal(rec
->affinity_mask
.bits
, map
->affinity_mask
.bits
,
966 rec
->affinity_mask
.nbits
)) {
967 bitmap_zero(rec
->affinity_mask
.bits
, rec
->affinity_mask
.nbits
);
968 bitmap_or(rec
->affinity_mask
.bits
, rec
->affinity_mask
.bits
,
969 map
->affinity_mask
.bits
, rec
->affinity_mask
.nbits
);
970 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec
->affinity_mask
),
971 (cpu_set_t
*)rec
->affinity_mask
.bits
);
973 mmap_cpu_mask__scnprintf(&rec
->affinity_mask
, "thread");
977 static size_t process_comp_header(void *record
, size_t increment
)
979 struct perf_record_compressed
*event
= record
;
980 size_t size
= sizeof(*event
);
983 event
->header
.size
+= increment
;
987 event
->header
.type
= PERF_RECORD_COMPRESSED
;
988 event
->header
.size
= size
;
993 static size_t zstd_compress(struct perf_session
*session
, void *dst
, size_t dst_size
,
994 void *src
, size_t src_size
)
997 size_t max_record_size
= PERF_SAMPLE_MAX_SIZE
- sizeof(struct perf_record_compressed
) - 1;
999 compressed
= zstd_compress_stream_to_records(&session
->zstd_data
, dst
, dst_size
, src
, src_size
,
1000 max_record_size
, process_comp_header
);
1002 session
->bytes_transferred
+= src_size
;
1003 session
->bytes_compressed
+= compressed
;
1008 static int record__mmap_read_evlist(struct record
*rec
, struct evlist
*evlist
,
1009 bool overwrite
, bool synch
)
1011 u64 bytes_written
= rec
->bytes_written
;
1015 int trace_fd
= rec
->data
.file
.fd
;
1021 maps
= overwrite
? evlist
->overwrite_mmap
: evlist
->mmap
;
1025 if (overwrite
&& evlist
->bkw_mmap_state
!= BKW_MMAP_DATA_PENDING
)
1028 if (record__aio_enabled(rec
))
1029 off
= record__aio_get_pos(trace_fd
);
1031 for (i
= 0; i
< evlist
->core
.nr_mmaps
; i
++) {
1033 struct mmap
*map
= &maps
[i
];
1035 if (map
->core
.base
) {
1036 record__adjust_affinity(rec
, map
);
1038 flush
= map
->core
.flush
;
1039 map
->core
.flush
= 1;
1041 if (!record__aio_enabled(rec
)) {
1042 if (perf_mmap__push(map
, rec
, record__pushfn
) < 0) {
1044 map
->core
.flush
= flush
;
1049 if (record__aio_push(rec
, map
, &off
) < 0) {
1050 record__aio_set_pos(trace_fd
, off
);
1052 map
->core
.flush
= flush
;
1058 map
->core
.flush
= flush
;
1061 if (map
->auxtrace_mmap
.base
&& !rec
->opts
.auxtrace_snapshot_mode
&&
1062 !rec
->opts
.auxtrace_sample_mode
&&
1063 record__auxtrace_mmap_read(rec
, map
) != 0) {
1069 if (record__aio_enabled(rec
))
1070 record__aio_set_pos(trace_fd
, off
);
1073 * Mark the round finished in case we wrote
1074 * at least one event.
1076 if (bytes_written
!= rec
->bytes_written
)
1077 rc
= record__write(rec
, NULL
, &finished_round_event
, sizeof(finished_round_event
));
1080 perf_evlist__toggle_bkw_mmap(evlist
, BKW_MMAP_EMPTY
);
1085 static int record__mmap_read_all(struct record
*rec
, bool synch
)
1089 err
= record__mmap_read_evlist(rec
, rec
->evlist
, false, synch
);
1093 return record__mmap_read_evlist(rec
, rec
->evlist
, true, synch
);
1096 static void record__init_features(struct record
*rec
)
1098 struct perf_session
*session
= rec
->session
;
1101 for (feat
= HEADER_FIRST_FEATURE
; feat
< HEADER_LAST_FEATURE
; feat
++)
1102 perf_header__set_feat(&session
->header
, feat
);
1104 if (rec
->no_buildid
)
1105 perf_header__clear_feat(&session
->header
, HEADER_BUILD_ID
);
1107 if (!have_tracepoints(&rec
->evlist
->core
.entries
))
1108 perf_header__clear_feat(&session
->header
, HEADER_TRACING_DATA
);
1110 if (!rec
->opts
.branch_stack
)
1111 perf_header__clear_feat(&session
->header
, HEADER_BRANCH_STACK
);
1113 if (!rec
->opts
.full_auxtrace
)
1114 perf_header__clear_feat(&session
->header
, HEADER_AUXTRACE
);
1116 if (!(rec
->opts
.use_clockid
&& rec
->opts
.clockid_res_ns
))
1117 perf_header__clear_feat(&session
->header
, HEADER_CLOCKID
);
1119 perf_header__clear_feat(&session
->header
, HEADER_DIR_FORMAT
);
1120 if (!record__comp_enabled(rec
))
1121 perf_header__clear_feat(&session
->header
, HEADER_COMPRESSED
);
1123 perf_header__clear_feat(&session
->header
, HEADER_STAT
);
1127 record__finish_output(struct record
*rec
)
1129 struct perf_data
*data
= &rec
->data
;
1130 int fd
= perf_data__fd(data
);
1135 rec
->session
->header
.data_size
+= rec
->bytes_written
;
1136 data
->file
.size
= lseek(perf_data__fd(data
), 0, SEEK_CUR
);
1138 if (!rec
->no_buildid
) {
1139 process_buildids(rec
);
1141 if (rec
->buildid_all
)
1142 dsos__hit_all(rec
->session
);
1144 perf_session__write_header(rec
->session
, rec
->evlist
, fd
, true);
1149 static int record__synthesize_workload(struct record
*rec
, bool tail
)
1152 struct perf_thread_map
*thread_map
;
1154 if (rec
->opts
.tail_synthesize
!= tail
)
1157 thread_map
= thread_map__new_by_tid(rec
->evlist
->workload
.pid
);
1158 if (thread_map
== NULL
)
1161 err
= perf_event__synthesize_thread_map(&rec
->tool
, thread_map
,
1162 process_synthesized_event
,
1163 &rec
->session
->machines
.host
,
1164 rec
->opts
.sample_address
);
1165 perf_thread_map__put(thread_map
);
1169 static int record__synthesize(struct record
*rec
, bool tail
);
1172 record__switch_output(struct record
*rec
, bool at_exit
)
1174 struct perf_data
*data
= &rec
->data
;
1178 /* Same Size: "2015122520103046"*/
1179 char timestamp
[] = "InvalidTimestamp";
1181 record__aio_mmap_read_sync(rec
);
1183 record__synthesize(rec
, true);
1184 if (target__none(&rec
->opts
.target
))
1185 record__synthesize_workload(rec
, true);
1188 record__finish_output(rec
);
1189 err
= fetch_current_timestamp(timestamp
, sizeof(timestamp
));
1191 pr_err("Failed to get current timestamp\n");
1195 fd
= perf_data__switch(data
, timestamp
,
1196 rec
->session
->header
.data_offset
,
1197 at_exit
, &new_filename
);
1198 if (fd
>= 0 && !at_exit
) {
1199 rec
->bytes_written
= 0;
1200 rec
->session
->header
.data_size
= 0;
1204 fprintf(stderr
, "[ perf record: Dump %s.%s ]\n",
1205 data
->path
, timestamp
);
1207 if (rec
->switch_output
.num_files
) {
1208 int n
= rec
->switch_output
.cur_file
+ 1;
1210 if (n
>= rec
->switch_output
.num_files
)
1212 rec
->switch_output
.cur_file
= n
;
1213 if (rec
->switch_output
.filenames
[n
]) {
1214 remove(rec
->switch_output
.filenames
[n
]);
1215 zfree(&rec
->switch_output
.filenames
[n
]);
1217 rec
->switch_output
.filenames
[n
] = new_filename
;
1222 /* Output tracking events */
1224 record__synthesize(rec
, false);
1227 * In 'perf record --switch-output' without -a,
1228 * record__synthesize() in record__switch_output() won't
1229 * generate tracking events because there's no thread_map
1230 * in evlist. Which causes newly created perf.data doesn't
1231 * contain map and comm information.
1232 * Create a fake thread_map and directly call
1233 * perf_event__synthesize_thread_map() for those events.
1235 if (target__none(&rec
->opts
.target
))
1236 record__synthesize_workload(rec
, false);
1241 static volatile int workload_exec_errno
;
1244 * perf_evlist__prepare_workload will send a SIGUSR1
1245 * if the fork fails, since we asked by setting its
1246 * want_signal to true.
1248 static void workload_exec_failed_signal(int signo __maybe_unused
,
1250 void *ucontext __maybe_unused
)
1252 workload_exec_errno
= info
->si_value
.sival_int
;
1257 static void snapshot_sig_handler(int sig
);
1258 static void alarm_sig_handler(int sig
);
1260 static const struct perf_event_mmap_page
*
1261 perf_evlist__pick_pc(struct evlist
*evlist
)
1264 if (evlist
->mmap
&& evlist
->mmap
[0].core
.base
)
1265 return evlist
->mmap
[0].core
.base
;
1266 if (evlist
->overwrite_mmap
&& evlist
->overwrite_mmap
[0].core
.base
)
1267 return evlist
->overwrite_mmap
[0].core
.base
;
1272 static const struct perf_event_mmap_page
*record__pick_pc(struct record
*rec
)
1274 const struct perf_event_mmap_page
*pc
;
1276 pc
= perf_evlist__pick_pc(rec
->evlist
);
1282 static int record__synthesize(struct record
*rec
, bool tail
)
1284 struct perf_session
*session
= rec
->session
;
1285 struct machine
*machine
= &session
->machines
.host
;
1286 struct perf_data
*data
= &rec
->data
;
1287 struct record_opts
*opts
= &rec
->opts
;
1288 struct perf_tool
*tool
= &rec
->tool
;
1289 int fd
= perf_data__fd(data
);
1292 if (rec
->opts
.tail_synthesize
!= tail
)
1295 if (data
->is_pipe
) {
1297 * We need to synthesize events first, because some
1298 * features works on top of them (on report side).
1300 err
= perf_event__synthesize_attrs(tool
, rec
->evlist
,
1301 process_synthesized_event
);
1303 pr_err("Couldn't synthesize attrs.\n");
1307 err
= perf_event__synthesize_features(tool
, session
, rec
->evlist
,
1308 process_synthesized_event
);
1310 pr_err("Couldn't synthesize features.\n");
1314 if (have_tracepoints(&rec
->evlist
->core
.entries
)) {
1316 * FIXME err <= 0 here actually means that
1317 * there were no tracepoints so its not really
1318 * an error, just that we don't need to
1319 * synthesize anything. We really have to
1320 * return this more properly and also
1321 * propagate errors that now are calling die()
1323 err
= perf_event__synthesize_tracing_data(tool
, fd
, rec
->evlist
,
1324 process_synthesized_event
);
1326 pr_err("Couldn't record tracing data.\n");
1329 rec
->bytes_written
+= err
;
1333 err
= perf_event__synth_time_conv(record__pick_pc(rec
), tool
,
1334 process_synthesized_event
, machine
);
1338 /* Synthesize id_index before auxtrace_info */
1339 if (rec
->opts
.auxtrace_sample_mode
) {
1340 err
= perf_event__synthesize_id_index(tool
,
1341 process_synthesized_event
,
1342 session
->evlist
, machine
);
1347 if (rec
->opts
.full_auxtrace
) {
1348 err
= perf_event__synthesize_auxtrace_info(rec
->itr
, tool
,
1349 session
, process_synthesized_event
);
1354 if (!perf_evlist__exclude_kernel(rec
->evlist
)) {
1355 err
= perf_event__synthesize_kernel_mmap(tool
, process_synthesized_event
,
1357 WARN_ONCE(err
< 0, "Couldn't record kernel reference relocation symbol\n"
1358 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1359 "Check /proc/kallsyms permission or run as root.\n");
1361 err
= perf_event__synthesize_modules(tool
, process_synthesized_event
,
1363 WARN_ONCE(err
< 0, "Couldn't record kernel module information.\n"
1364 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1365 "Check /proc/modules permission or run as root.\n");
1369 machines__process_guests(&session
->machines
,
1370 perf_event__synthesize_guest_os
, tool
);
1373 err
= perf_event__synthesize_extra_attr(&rec
->tool
,
1375 process_synthesized_event
,
1380 err
= perf_event__synthesize_thread_map2(&rec
->tool
, rec
->evlist
->core
.threads
,
1381 process_synthesized_event
,
1384 pr_err("Couldn't synthesize thread map.\n");
1388 err
= perf_event__synthesize_cpu_map(&rec
->tool
, rec
->evlist
->core
.cpus
,
1389 process_synthesized_event
, NULL
);
1391 pr_err("Couldn't synthesize cpu map.\n");
1395 err
= perf_event__synthesize_bpf_events(session
, process_synthesized_event
,
1398 pr_warning("Couldn't synthesize bpf events.\n");
1400 err
= perf_event__synthesize_cgroups(tool
, process_synthesized_event
,
1403 pr_warning("Couldn't synthesize cgroup events.\n");
1405 err
= __machine__synthesize_threads(machine
, tool
, &opts
->target
, rec
->evlist
->core
.threads
,
1406 process_synthesized_event
, opts
->sample_address
,
1412 static int __cmd_record(struct record
*rec
, int argc
, const char **argv
)
1416 unsigned long waking
= 0;
1417 const bool forks
= argc
> 0;
1418 struct perf_tool
*tool
= &rec
->tool
;
1419 struct record_opts
*opts
= &rec
->opts
;
1420 struct perf_data
*data
= &rec
->data
;
1421 struct perf_session
*session
;
1422 bool disabled
= false, draining
= false;
1423 struct evlist
*sb_evlist
= NULL
;
1427 atexit(record__sig_exit
);
1428 signal(SIGCHLD
, sig_handler
);
1429 signal(SIGINT
, sig_handler
);
1430 signal(SIGTERM
, sig_handler
);
1431 signal(SIGSEGV
, sigsegv_handler
);
1433 if (rec
->opts
.record_namespaces
)
1434 tool
->namespace_events
= true;
1436 if (rec
->opts
.record_cgroup
) {
1437 #ifdef HAVE_FILE_HANDLE
1438 tool
->cgroup_events
= true;
1440 pr_err("cgroup tracking is not supported\n");
1445 if (rec
->opts
.auxtrace_snapshot_mode
|| rec
->switch_output
.enabled
) {
1446 signal(SIGUSR2
, snapshot_sig_handler
);
1447 if (rec
->opts
.auxtrace_snapshot_mode
)
1448 trigger_on(&auxtrace_snapshot_trigger
);
1449 if (rec
->switch_output
.enabled
)
1450 trigger_on(&switch_output_trigger
);
1452 signal(SIGUSR2
, SIG_IGN
);
1455 session
= perf_session__new(data
, false, tool
);
1456 if (IS_ERR(session
)) {
1457 pr_err("Perf session creation failed.\n");
1458 return PTR_ERR(session
);
1461 fd
= perf_data__fd(data
);
1462 rec
->session
= session
;
1464 if (zstd_init(&session
->zstd_data
, rec
->opts
.comp_level
) < 0) {
1465 pr_err("Compression initialization failed.\n");
1469 session
->header
.env
.comp_type
= PERF_COMP_ZSTD
;
1470 session
->header
.env
.comp_level
= rec
->opts
.comp_level
;
1472 if (rec
->opts
.kcore
&&
1473 !record__kcore_readable(&session
->machines
.host
)) {
1474 pr_err("ERROR: kcore is not readable.\n");
1478 record__init_features(rec
);
1480 if (rec
->opts
.use_clockid
&& rec
->opts
.clockid_res_ns
)
1481 session
->header
.env
.clockid_res_ns
= rec
->opts
.clockid_res_ns
;
1484 err
= perf_evlist__prepare_workload(rec
->evlist
, &opts
->target
,
1485 argv
, data
->is_pipe
,
1486 workload_exec_failed_signal
);
1488 pr_err("Couldn't run the workload!\n");
1490 goto out_delete_session
;
1495 * If we have just single event and are sending data
1496 * through pipe, we need to force the ids allocation,
1497 * because we synthesize event name through the pipe
1498 * and need the id for that.
1500 if (data
->is_pipe
&& rec
->evlist
->core
.nr_entries
== 1)
1501 rec
->opts
.sample_id
= true;
1503 if (record__open(rec
) != 0) {
1507 session
->header
.env
.comp_mmap_len
= session
->evlist
->core
.mmap_len
;
1509 if (rec
->opts
.kcore
) {
1510 err
= record__kcore_copy(&session
->machines
.host
, data
);
1512 pr_err("ERROR: Failed to copy kcore\n");
1517 err
= bpf__apply_obj_config();
1519 char errbuf
[BUFSIZ
];
1521 bpf__strerror_apply_obj_config(err
, errbuf
, sizeof(errbuf
));
1522 pr_err("ERROR: Apply config to BPF failed: %s\n",
1528 * Normally perf_session__new would do this, but it doesn't have the
1531 if (rec
->tool
.ordered_events
&& !perf_evlist__sample_id_all(rec
->evlist
)) {
1532 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1533 rec
->tool
.ordered_events
= false;
1536 if (!rec
->evlist
->nr_groups
)
1537 perf_header__clear_feat(&session
->header
, HEADER_GROUP_DESC
);
1539 if (data
->is_pipe
) {
1540 err
= perf_header__write_pipe(fd
);
1544 err
= perf_session__write_header(session
, rec
->evlist
, fd
, false);
1549 if (!rec
->no_buildid
1550 && !perf_header__has_feat(&session
->header
, HEADER_BUILD_ID
)) {
1551 pr_err("Couldn't generate buildids. "
1552 "Use --no-buildid to profile anyway.\n");
1557 if (!opts
->no_bpf_event
)
1558 bpf_event__add_sb_event(&sb_evlist
, &session
->header
.env
);
1560 if (perf_evlist__start_sb_thread(sb_evlist
, &rec
->opts
.target
)) {
1561 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1562 opts
->no_bpf_event
= true;
1565 err
= record__synthesize(rec
, false);
1569 if (rec
->realtime_prio
) {
1570 struct sched_param param
;
1572 param
.sched_priority
= rec
->realtime_prio
;
1573 if (sched_setscheduler(0, SCHED_FIFO
, ¶m
)) {
1574 pr_err("Could not set realtime priority.\n");
1581 * When perf is starting the traced process, all the events
1582 * (apart from group members) have enable_on_exec=1 set,
1583 * so don't spoil it by prematurely enabling them.
1585 if (!target__none(&opts
->target
) && !opts
->initial_delay
)
1586 evlist__enable(rec
->evlist
);
1592 struct machine
*machine
= &session
->machines
.host
;
1593 union perf_event
*event
;
1596 event
= malloc(sizeof(event
->comm
) + machine
->id_hdr_size
);
1597 if (event
== NULL
) {
1603 * Some H/W events are generated before COMM event
1604 * which is emitted during exec(), so perf script
1605 * cannot see a correct process name for those events.
1606 * Synthesize COMM event to prevent it.
1608 tgid
= perf_event__synthesize_comm(tool
, event
,
1609 rec
->evlist
->workload
.pid
,
1610 process_synthesized_event
,
1617 event
= malloc(sizeof(event
->namespaces
) +
1618 (NR_NAMESPACES
* sizeof(struct perf_ns_link_info
)) +
1619 machine
->id_hdr_size
);
1620 if (event
== NULL
) {
1626 * Synthesize NAMESPACES event for the command specified.
1628 perf_event__synthesize_namespaces(tool
, event
,
1629 rec
->evlist
->workload
.pid
,
1630 tgid
, process_synthesized_event
,
1634 perf_evlist__start_workload(rec
->evlist
);
1637 if (opts
->initial_delay
) {
1638 usleep(opts
->initial_delay
* USEC_PER_MSEC
);
1639 evlist__enable(rec
->evlist
);
1642 trigger_ready(&auxtrace_snapshot_trigger
);
1643 trigger_ready(&switch_output_trigger
);
1644 perf_hooks__invoke_record_start();
1646 unsigned long long hits
= rec
->samples
;
1649 * rec->evlist->bkw_mmap_state is possible to be
1650 * BKW_MMAP_EMPTY here: when done == true and
1651 * hits != rec->samples in previous round.
1653 * perf_evlist__toggle_bkw_mmap ensure we never
1654 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1656 if (trigger_is_hit(&switch_output_trigger
) || done
|| draining
)
1657 perf_evlist__toggle_bkw_mmap(rec
->evlist
, BKW_MMAP_DATA_PENDING
);
1659 if (record__mmap_read_all(rec
, false) < 0) {
1660 trigger_error(&auxtrace_snapshot_trigger
);
1661 trigger_error(&switch_output_trigger
);
1666 if (auxtrace_record__snapshot_started
) {
1667 auxtrace_record__snapshot_started
= 0;
1668 if (!trigger_is_error(&auxtrace_snapshot_trigger
))
1669 record__read_auxtrace_snapshot(rec
, false);
1670 if (trigger_is_error(&auxtrace_snapshot_trigger
)) {
1671 pr_err("AUX area tracing snapshot failed\n");
1677 if (trigger_is_hit(&switch_output_trigger
)) {
1679 * If switch_output_trigger is hit, the data in
1680 * overwritable ring buffer should have been collected,
1681 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1683 * If SIGUSR2 raise after or during record__mmap_read_all(),
1684 * record__mmap_read_all() didn't collect data from
1685 * overwritable ring buffer. Read again.
1687 if (rec
->evlist
->bkw_mmap_state
== BKW_MMAP_RUNNING
)
1689 trigger_ready(&switch_output_trigger
);
1692 * Reenable events in overwrite ring buffer after
1693 * record__mmap_read_all(): we should have collected
1696 perf_evlist__toggle_bkw_mmap(rec
->evlist
, BKW_MMAP_RUNNING
);
1699 fprintf(stderr
, "[ perf record: dump data: Woken up %ld times ]\n",
1702 fd
= record__switch_output(rec
, false);
1704 pr_err("Failed to switch to new file\n");
1705 trigger_error(&switch_output_trigger
);
1710 /* re-arm the alarm */
1711 if (rec
->switch_output
.time
)
1712 alarm(rec
->switch_output
.time
);
1715 if (hits
== rec
->samples
) {
1716 if (done
|| draining
)
1718 err
= evlist__poll(rec
->evlist
, -1);
1720 * Propagate error, only if there's any. Ignore positive
1721 * number of returned events and interrupt error.
1723 if (err
> 0 || (err
< 0 && errno
== EINTR
))
1727 if (evlist__filter_pollfd(rec
->evlist
, POLLERR
| POLLHUP
) == 0)
1732 * When perf is starting the traced process, at the end events
1733 * die with the process and we wait for that. Thus no need to
1734 * disable events in this case.
1736 if (done
&& !disabled
&& !target__none(&opts
->target
)) {
1737 trigger_off(&auxtrace_snapshot_trigger
);
1738 evlist__disable(rec
->evlist
);
1743 trigger_off(&auxtrace_snapshot_trigger
);
1744 trigger_off(&switch_output_trigger
);
1746 if (opts
->auxtrace_snapshot_on_exit
)
1747 record__auxtrace_snapshot_exit(rec
);
1749 if (forks
&& workload_exec_errno
) {
1750 char msg
[STRERR_BUFSIZE
];
1751 const char *emsg
= str_error_r(workload_exec_errno
, msg
, sizeof(msg
));
1752 pr_err("Workload failed: %s\n", emsg
);
1758 fprintf(stderr
, "[ perf record: Woken up %ld times to write data ]\n", waking
);
1760 if (target__none(&rec
->opts
.target
))
1761 record__synthesize_workload(rec
, true);
1764 record__mmap_read_all(rec
, true);
1765 record__aio_mmap_read_sync(rec
);
1767 if (rec
->session
->bytes_transferred
&& rec
->session
->bytes_compressed
) {
1768 ratio
= (float)rec
->session
->bytes_transferred
/(float)rec
->session
->bytes_compressed
;
1769 session
->header
.env
.comp_ratio
= ratio
+ 0.5;
1775 if (!child_finished
)
1776 kill(rec
->evlist
->workload
.pid
, SIGTERM
);
1782 else if (WIFEXITED(exit_status
))
1783 status
= WEXITSTATUS(exit_status
);
1784 else if (WIFSIGNALED(exit_status
))
1785 signr
= WTERMSIG(exit_status
);
1789 record__synthesize(rec
, true);
1790 /* this will be recalculated during process_buildids() */
1794 if (!rec
->timestamp_filename
) {
1795 record__finish_output(rec
);
1797 fd
= record__switch_output(rec
, true);
1800 goto out_delete_session
;
1805 perf_hooks__invoke_record_end();
1807 if (!err
&& !quiet
) {
1809 const char *postfix
= rec
->timestamp_filename
?
1810 ".<timestamp>" : "";
1812 if (rec
->samples
&& !rec
->opts
.full_auxtrace
)
1813 scnprintf(samples
, sizeof(samples
),
1814 " (%" PRIu64
" samples)", rec
->samples
);
1818 fprintf(stderr
, "[ perf record: Captured and wrote %.3f MB %s%s%s",
1819 perf_data__size(data
) / 1024.0 / 1024.0,
1820 data
->path
, postfix
, samples
);
1822 fprintf(stderr
, ", compressed (original %.3f MB, ratio is %.3f)",
1823 rec
->session
->bytes_transferred
/ 1024.0 / 1024.0,
1826 fprintf(stderr
, " ]\n");
1830 zstd_fini(&session
->zstd_data
);
1831 perf_session__delete(session
);
1833 if (!opts
->no_bpf_event
)
1834 perf_evlist__stop_sb_thread(sb_evlist
);
1838 static void callchain_debug(struct callchain_param
*callchain
)
1840 static const char *str
[CALLCHAIN_MAX
] = { "NONE", "FP", "DWARF", "LBR" };
1842 pr_debug("callchain: type %s\n", str
[callchain
->record_mode
]);
1844 if (callchain
->record_mode
== CALLCHAIN_DWARF
)
1845 pr_debug("callchain: stack dump size %d\n",
1846 callchain
->dump_size
);
1849 int record_opts__parse_callchain(struct record_opts
*record
,
1850 struct callchain_param
*callchain
,
1851 const char *arg
, bool unset
)
1854 callchain
->enabled
= !unset
;
1856 /* --no-call-graph */
1858 callchain
->record_mode
= CALLCHAIN_NONE
;
1859 pr_debug("callchain: disabled\n");
1863 ret
= parse_callchain_record_opt(arg
, callchain
);
1865 /* Enable data address sampling for DWARF unwind. */
1866 if (callchain
->record_mode
== CALLCHAIN_DWARF
)
1867 record
->sample_address
= true;
1868 callchain_debug(callchain
);
1874 int record_parse_callchain_opt(const struct option
*opt
,
1878 return record_opts__parse_callchain(opt
->value
, &callchain_param
, arg
, unset
);
1881 int record_callchain_opt(const struct option
*opt
,
1882 const char *arg __maybe_unused
,
1883 int unset __maybe_unused
)
1885 struct callchain_param
*callchain
= opt
->value
;
1887 callchain
->enabled
= true;
1889 if (callchain
->record_mode
== CALLCHAIN_NONE
)
1890 callchain
->record_mode
= CALLCHAIN_FP
;
1892 callchain_debug(callchain
);
1896 static int perf_record_config(const char *var
, const char *value
, void *cb
)
1898 struct record
*rec
= cb
;
1900 if (!strcmp(var
, "record.build-id")) {
1901 if (!strcmp(value
, "cache"))
1902 rec
->no_buildid_cache
= false;
1903 else if (!strcmp(value
, "no-cache"))
1904 rec
->no_buildid_cache
= true;
1905 else if (!strcmp(value
, "skip"))
1906 rec
->no_buildid
= true;
1911 if (!strcmp(var
, "record.call-graph")) {
1912 var
= "call-graph.record-mode";
1913 return perf_default_config(var
, value
, cb
);
1915 #ifdef HAVE_AIO_SUPPORT
1916 if (!strcmp(var
, "record.aio")) {
1917 rec
->opts
.nr_cblocks
= strtol(value
, NULL
, 0);
1918 if (!rec
->opts
.nr_cblocks
)
1919 rec
->opts
.nr_cblocks
= nr_cblocks_default
;
1926 struct clockid_map
{
1931 #define CLOCKID_MAP(n, c) \
1932 { .name = n, .clockid = (c), }
1934 #define CLOCKID_END { .name = NULL, }
1938 * Add the missing ones, we need to build on many distros...
1940 #ifndef CLOCK_MONOTONIC_RAW
1941 #define CLOCK_MONOTONIC_RAW 4
1943 #ifndef CLOCK_BOOTTIME
1944 #define CLOCK_BOOTTIME 7
1947 #define CLOCK_TAI 11
1950 static const struct clockid_map clockids
[] = {
1951 /* available for all events, NMI safe */
1952 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC
),
1953 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW
),
1955 /* available for some events */
1956 CLOCKID_MAP("realtime", CLOCK_REALTIME
),
1957 CLOCKID_MAP("boottime", CLOCK_BOOTTIME
),
1958 CLOCKID_MAP("tai", CLOCK_TAI
),
1960 /* available for the lazy */
1961 CLOCKID_MAP("mono", CLOCK_MONOTONIC
),
1962 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW
),
1963 CLOCKID_MAP("real", CLOCK_REALTIME
),
1964 CLOCKID_MAP("boot", CLOCK_BOOTTIME
),
1969 static int get_clockid_res(clockid_t clk_id
, u64
*res_ns
)
1971 struct timespec res
;
1974 if (!clock_getres(clk_id
, &res
))
1975 *res_ns
= res
.tv_nsec
+ res
.tv_sec
* NSEC_PER_SEC
;
1977 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1982 static int parse_clockid(const struct option
*opt
, const char *str
, int unset
)
1984 struct record_opts
*opts
= (struct record_opts
*)opt
->value
;
1985 const struct clockid_map
*cm
;
1986 const char *ostr
= str
;
1989 opts
->use_clockid
= 0;
1997 /* no setting it twice */
1998 if (opts
->use_clockid
)
2001 opts
->use_clockid
= true;
2003 /* if its a number, we're done */
2004 if (sscanf(str
, "%d", &opts
->clockid
) == 1)
2005 return get_clockid_res(opts
->clockid
, &opts
->clockid_res_ns
);
2007 /* allow a "CLOCK_" prefix to the name */
2008 if (!strncasecmp(str
, "CLOCK_", 6))
2011 for (cm
= clockids
; cm
->name
; cm
++) {
2012 if (!strcasecmp(str
, cm
->name
)) {
2013 opts
->clockid
= cm
->clockid
;
2014 return get_clockid_res(opts
->clockid
,
2015 &opts
->clockid_res_ns
);
2019 opts
->use_clockid
= false;
2020 ui__warning("unknown clockid %s, check man page\n", ostr
);
2024 static int record__parse_affinity(const struct option
*opt
, const char *str
, int unset
)
2026 struct record_opts
*opts
= (struct record_opts
*)opt
->value
;
2031 if (!strcasecmp(str
, "node"))
2032 opts
->affinity
= PERF_AFFINITY_NODE
;
2033 else if (!strcasecmp(str
, "cpu"))
2034 opts
->affinity
= PERF_AFFINITY_CPU
;
2039 static int parse_output_max_size(const struct option
*opt
,
2040 const char *str
, int unset
)
2042 unsigned long *s
= (unsigned long *)opt
->value
;
2043 static struct parse_tag tags_size
[] = {
2044 { .tag
= 'B', .mult
= 1 },
2045 { .tag
= 'K', .mult
= 1 << 10 },
2046 { .tag
= 'M', .mult
= 1 << 20 },
2047 { .tag
= 'G', .mult
= 1 << 30 },
2057 val
= parse_tag_value(str
, tags_size
);
2058 if (val
!= (unsigned long) -1) {
2066 static int record__parse_mmap_pages(const struct option
*opt
,
2068 int unset __maybe_unused
)
2070 struct record_opts
*opts
= opt
->value
;
2072 unsigned int mmap_pages
;
2087 ret
= __perf_evlist__parse_mmap_pages(&mmap_pages
, s
);
2090 opts
->mmap_pages
= mmap_pages
;
2098 ret
= __perf_evlist__parse_mmap_pages(&mmap_pages
, p
+ 1);
2102 opts
->auxtrace_mmap_pages
= mmap_pages
;
2109 static void switch_output_size_warn(struct record
*rec
)
2111 u64 wakeup_size
= evlist__mmap_size(rec
->opts
.mmap_pages
);
2112 struct switch_output
*s
= &rec
->switch_output
;
2116 if (s
->size
< wakeup_size
) {
2119 unit_number__scnprintf(buf
, sizeof(buf
), wakeup_size
);
2120 pr_warning("WARNING: switch-output data size lower than "
2121 "wakeup kernel buffer size (%s) "
2122 "expect bigger perf.data sizes\n", buf
);
2126 static int switch_output_setup(struct record
*rec
)
2128 struct switch_output
*s
= &rec
->switch_output
;
2129 static struct parse_tag tags_size
[] = {
2130 { .tag
= 'B', .mult
= 1 },
2131 { .tag
= 'K', .mult
= 1 << 10 },
2132 { .tag
= 'M', .mult
= 1 << 20 },
2133 { .tag
= 'G', .mult
= 1 << 30 },
2136 static struct parse_tag tags_time
[] = {
2137 { .tag
= 's', .mult
= 1 },
2138 { .tag
= 'm', .mult
= 60 },
2139 { .tag
= 'h', .mult
= 60*60 },
2140 { .tag
= 'd', .mult
= 60*60*24 },
2148 if (!strcmp(s
->str
, "signal")) {
2150 pr_debug("switch-output with SIGUSR2 signal\n");
2154 val
= parse_tag_value(s
->str
, tags_size
);
2155 if (val
!= (unsigned long) -1) {
2157 pr_debug("switch-output with %s size threshold\n", s
->str
);
2161 val
= parse_tag_value(s
->str
, tags_time
);
2162 if (val
!= (unsigned long) -1) {
2164 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2172 rec
->timestamp_filename
= true;
2175 if (s
->size
&& !rec
->opts
.no_buffering
)
2176 switch_output_size_warn(rec
);
2181 static const char * const __record_usage
[] = {
2182 "perf record [<options>] [<command>]",
2183 "perf record [<options>] -- <command> [<options>]",
2186 const char * const *record_usage
= __record_usage
;
2188 static int build_id__process_mmap(struct perf_tool
*tool
, union perf_event
*event
,
2189 struct perf_sample
*sample
, struct machine
*machine
)
2192 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2193 * no need to add them twice.
2195 if (!(event
->header
.misc
& PERF_RECORD_MISC_USER
))
2197 return perf_event__process_mmap(tool
, event
, sample
, machine
);
2200 static int build_id__process_mmap2(struct perf_tool
*tool
, union perf_event
*event
,
2201 struct perf_sample
*sample
, struct machine
*machine
)
2204 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2205 * no need to add them twice.
2207 if (!(event
->header
.misc
& PERF_RECORD_MISC_USER
))
2210 return perf_event__process_mmap2(tool
, event
, sample
, machine
);
2214 * XXX Ideally would be local to cmd_record() and passed to a record__new
2215 * because we need to have access to it in record__exit, that is called
2216 * after cmd_record() exits, but since record_options need to be accessible to
2217 * builtin-script, leave it here.
2219 * At least we don't ouch it in all the other functions here directly.
2221 * Just say no to tons of global variables, sigh.
2223 static struct record record
= {
2225 .sample_time
= true,
2226 .mmap_pages
= UINT_MAX
,
2227 .user_freq
= UINT_MAX
,
2228 .user_interval
= ULLONG_MAX
,
2232 .default_per_cpu
= true,
2234 .mmap_flush
= MMAP_FLUSH_DEFAULT
,
2237 .sample
= process_sample_event
,
2238 .fork
= perf_event__process_fork
,
2239 .exit
= perf_event__process_exit
,
2240 .comm
= perf_event__process_comm
,
2241 .namespaces
= perf_event__process_namespaces
,
2242 .mmap
= build_id__process_mmap
,
2243 .mmap2
= build_id__process_mmap2
,
2244 .ordered_events
= true,
2248 const char record_callchain_help
[] = CALLCHAIN_RECORD_HELP
2249 "\n\t\t\t\tDefault: fp";
2251 static bool dry_run
;
2254 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
2255 * with it and switch to use the library functions in perf_evlist that came
2256 * from builtin-record.c, i.e. use record_opts,
2257 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
2260 static struct option __record_options
[] = {
2261 OPT_CALLBACK('e', "event", &record
.evlist
, "event",
2262 "event selector. use 'perf list' to list available events",
2263 parse_events_option
),
2264 OPT_CALLBACK(0, "filter", &record
.evlist
, "filter",
2265 "event filter", parse_filter
),
2266 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record
.evlist
,
2267 NULL
, "don't record events from perf itself",
2269 OPT_STRING('p', "pid", &record
.opts
.target
.pid
, "pid",
2270 "record events on existing process id"),
2271 OPT_STRING('t', "tid", &record
.opts
.target
.tid
, "tid",
2272 "record events on existing thread id"),
2273 OPT_INTEGER('r', "realtime", &record
.realtime_prio
,
2274 "collect data with this RT SCHED_FIFO priority"),
2275 OPT_BOOLEAN(0, "no-buffering", &record
.opts
.no_buffering
,
2276 "collect data without buffering"),
2277 OPT_BOOLEAN('R', "raw-samples", &record
.opts
.raw_samples
,
2278 "collect raw sample records from all opened counters"),
2279 OPT_BOOLEAN('a', "all-cpus", &record
.opts
.target
.system_wide
,
2280 "system-wide collection from all CPUs"),
2281 OPT_STRING('C', "cpu", &record
.opts
.target
.cpu_list
, "cpu",
2282 "list of cpus to monitor"),
2283 OPT_U64('c', "count", &record
.opts
.user_interval
, "event period to sample"),
2284 OPT_STRING('o', "output", &record
.data
.path
, "file",
2285 "output file name"),
2286 OPT_BOOLEAN_SET('i', "no-inherit", &record
.opts
.no_inherit
,
2287 &record
.opts
.no_inherit_set
,
2288 "child tasks do not inherit counters"),
2289 OPT_BOOLEAN(0, "tail-synthesize", &record
.opts
.tail_synthesize
,
2290 "synthesize non-sample events at the end of output"),
2291 OPT_BOOLEAN(0, "overwrite", &record
.opts
.overwrite
, "use overwrite mode"),
2292 OPT_BOOLEAN(0, "no-bpf-event", &record
.opts
.no_bpf_event
, "record bpf events"),
2293 OPT_BOOLEAN(0, "strict-freq", &record
.opts
.strict_freq
,
2294 "Fail if the specified frequency can't be used"),
2295 OPT_CALLBACK('F', "freq", &record
.opts
, "freq or 'max'",
2296 "profile at this frequency",
2297 record__parse_freq
),
2298 OPT_CALLBACK('m', "mmap-pages", &record
.opts
, "pages[,pages]",
2299 "number of mmap data pages and AUX area tracing mmap pages",
2300 record__parse_mmap_pages
),
2301 OPT_CALLBACK(0, "mmap-flush", &record
.opts
, "number",
2302 "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
2303 record__mmap_flush_parse
),
2304 OPT_BOOLEAN(0, "group", &record
.opts
.group
,
2305 "put the counters into a counter group"),
2306 OPT_CALLBACK_NOOPT('g', NULL
, &callchain_param
,
2307 NULL
, "enables call-graph recording" ,
2308 &record_callchain_opt
),
2309 OPT_CALLBACK(0, "call-graph", &record
.opts
,
2310 "record_mode[,record_size]", record_callchain_help
,
2311 &record_parse_callchain_opt
),
2312 OPT_INCR('v', "verbose", &verbose
,
2313 "be more verbose (show counter open errors, etc)"),
2314 OPT_BOOLEAN('q', "quiet", &quiet
, "don't print any message"),
2315 OPT_BOOLEAN('s', "stat", &record
.opts
.inherit_stat
,
2316 "per thread counts"),
2317 OPT_BOOLEAN('d', "data", &record
.opts
.sample_address
, "Record the sample addresses"),
2318 OPT_BOOLEAN(0, "phys-data", &record
.opts
.sample_phys_addr
,
2319 "Record the sample physical addresses"),
2320 OPT_BOOLEAN(0, "sample-cpu", &record
.opts
.sample_cpu
, "Record the sample cpu"),
2321 OPT_BOOLEAN_SET('T', "timestamp", &record
.opts
.sample_time
,
2322 &record
.opts
.sample_time_set
,
2323 "Record the sample timestamps"),
2324 OPT_BOOLEAN_SET('P', "period", &record
.opts
.period
, &record
.opts
.period_set
,
2325 "Record the sample period"),
2326 OPT_BOOLEAN('n', "no-samples", &record
.opts
.no_samples
,
2328 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record
.no_buildid_cache
,
2329 &record
.no_buildid_cache_set
,
2330 "do not update the buildid cache"),
2331 OPT_BOOLEAN_SET('B', "no-buildid", &record
.no_buildid
,
2332 &record
.no_buildid_set
,
2333 "do not collect buildids in perf.data"),
2334 OPT_CALLBACK('G', "cgroup", &record
.evlist
, "name",
2335 "monitor event in cgroup name only",
2337 OPT_UINTEGER('D', "delay", &record
.opts
.initial_delay
,
2338 "ms to wait before starting measurement after program start"),
2339 OPT_BOOLEAN(0, "kcore", &record
.opts
.kcore
, "copy /proc/kcore"),
2340 OPT_STRING('u', "uid", &record
.opts
.target
.uid_str
, "user",
2343 OPT_CALLBACK_NOOPT('b', "branch-any", &record
.opts
.branch_stack
,
2344 "branch any", "sample any taken branches",
2345 parse_branch_stack
),
2347 OPT_CALLBACK('j', "branch-filter", &record
.opts
.branch_stack
,
2348 "branch filter mask", "branch stack filter modes",
2349 parse_branch_stack
),
2350 OPT_BOOLEAN('W', "weight", &record
.opts
.sample_weight
,
2351 "sample by weight (on special events only)"),
2352 OPT_BOOLEAN(0, "transaction", &record
.opts
.sample_transaction
,
2353 "sample transaction flags (special events only)"),
2354 OPT_BOOLEAN(0, "per-thread", &record
.opts
.target
.per_thread
,
2355 "use per-thread mmaps"),
2356 OPT_CALLBACK_OPTARG('I', "intr-regs", &record
.opts
.sample_intr_regs
, NULL
, "any register",
2357 "sample selected machine registers on interrupt,"
2358 " use '-I?' to list register names", parse_intr_regs
),
2359 OPT_CALLBACK_OPTARG(0, "user-regs", &record
.opts
.sample_user_regs
, NULL
, "any register",
2360 "sample selected machine registers on interrupt,"
2361 " use '--user-regs=?' to list register names", parse_user_regs
),
2362 OPT_BOOLEAN(0, "running-time", &record
.opts
.running_time
,
2363 "Record running/enabled time of read (:S) events"),
2364 OPT_CALLBACK('k', "clockid", &record
.opts
,
2365 "clockid", "clockid to use for events, see clock_gettime()",
2367 OPT_STRING_OPTARG('S', "snapshot", &record
.opts
.auxtrace_snapshot_opts
,
2368 "opts", "AUX area tracing Snapshot Mode", ""),
2369 OPT_STRING_OPTARG(0, "aux-sample", &record
.opts
.auxtrace_sample_opts
,
2370 "opts", "sample AUX area", ""),
2371 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout
,
2372 "per thread proc mmap processing timeout in ms"),
2373 OPT_BOOLEAN(0, "namespaces", &record
.opts
.record_namespaces
,
2374 "Record namespaces events"),
2375 OPT_BOOLEAN(0, "all-cgroups", &record
.opts
.record_cgroup
,
2376 "Record cgroup events"),
2377 OPT_BOOLEAN(0, "switch-events", &record
.opts
.record_switch_events
,
2378 "Record context switch events"),
2379 OPT_BOOLEAN_FLAG(0, "all-kernel", &record
.opts
.all_kernel
,
2380 "Configure all used events to run in kernel space.",
2381 PARSE_OPT_EXCLUSIVE
),
2382 OPT_BOOLEAN_FLAG(0, "all-user", &record
.opts
.all_user
,
2383 "Configure all used events to run in user space.",
2384 PARSE_OPT_EXCLUSIVE
),
2385 OPT_BOOLEAN(0, "kernel-callchains", &record
.opts
.kernel_callchains
,
2386 "collect kernel callchains"),
2387 OPT_BOOLEAN(0, "user-callchains", &record
.opts
.user_callchains
,
2388 "collect user callchains"),
2389 OPT_STRING(0, "clang-path", &llvm_param
.clang_path
, "clang path",
2390 "clang binary to use for compiling BPF scriptlets"),
2391 OPT_STRING(0, "clang-opt", &llvm_param
.clang_opt
, "clang options",
2392 "options passed to clang when compiling BPF scriptlets"),
2393 OPT_STRING(0, "vmlinux", &symbol_conf
.vmlinux_name
,
2394 "file", "vmlinux pathname"),
2395 OPT_BOOLEAN(0, "buildid-all", &record
.buildid_all
,
2396 "Record build-id of all DSOs regardless of hits"),
2397 OPT_BOOLEAN(0, "timestamp-filename", &record
.timestamp_filename
,
2398 "append timestamp to output filename"),
2399 OPT_BOOLEAN(0, "timestamp-boundary", &record
.timestamp_boundary
,
2400 "Record timestamp boundary (time of first/last samples)"),
2401 OPT_STRING_OPTARG_SET(0, "switch-output", &record
.switch_output
.str
,
2402 &record
.switch_output
.set
, "signal or size[BKMG] or time[smhd]",
2403 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2405 OPT_INTEGER(0, "switch-max-files", &record
.switch_output
.num_files
,
2406 "Limit number of switch output generated files"),
2407 OPT_BOOLEAN(0, "dry-run", &dry_run
,
2408 "Parse options then exit"),
2409 #ifdef HAVE_AIO_SUPPORT
2410 OPT_CALLBACK_OPTARG(0, "aio", &record
.opts
,
2411 &nr_cblocks_default
, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2414 OPT_CALLBACK(0, "affinity", &record
.opts
, "node|cpu",
2415 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2416 record__parse_affinity
),
2417 #ifdef HAVE_ZSTD_SUPPORT
2418 OPT_CALLBACK_OPTARG('z', "compression-level", &record
.opts
, &comp_level_default
,
2419 "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
2420 record__parse_comp_level
),
2422 OPT_CALLBACK(0, "max-size", &record
.output_max_size
,
2423 "size", "Limit the maximum size of the output file", parse_output_max_size
),
2427 struct option
*record_options
= __record_options
;
2429 int cmd_record(int argc
, const char **argv
)
2432 struct record
*rec
= &record
;
2433 char errbuf
[BUFSIZ
];
2435 setlocale(LC_ALL
, "");
2437 #ifndef HAVE_LIBBPF_SUPPORT
2438 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2439 set_nobuild('\0', "clang-path", true);
2440 set_nobuild('\0', "clang-opt", true);
2444 #ifndef HAVE_BPF_PROLOGUE
2445 # if !defined (HAVE_DWARF_SUPPORT)
2446 # define REASON "NO_DWARF=1"
2447 # elif !defined (HAVE_LIBBPF_SUPPORT)
2448 # define REASON "NO_LIBBPF=1"
2450 # define REASON "this architecture doesn't support BPF prologue"
2452 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2453 set_nobuild('\0', "vmlinux", true);
2458 rec
->opts
.affinity
= PERF_AFFINITY_SYS
;
2460 rec
->evlist
= evlist__new();
2461 if (rec
->evlist
== NULL
)
2464 err
= perf_config(perf_record_config
, rec
);
2468 argc
= parse_options(argc
, argv
, record_options
, record_usage
,
2469 PARSE_OPT_STOP_AT_NON_OPTION
);
2471 perf_quiet_option();
2473 /* Make system wide (-a) the default target. */
2474 if (!argc
&& target__none(&rec
->opts
.target
))
2475 rec
->opts
.target
.system_wide
= true;
2477 if (nr_cgroups
&& !rec
->opts
.target
.system_wide
) {
2478 usage_with_options_msg(record_usage
, record_options
,
2479 "cgroup monitoring only available in system-wide mode");
2483 if (rec
->opts
.kcore
)
2484 rec
->data
.is_dir
= true;
2486 if (rec
->opts
.comp_level
!= 0) {
2487 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
2488 rec
->no_buildid
= true;
2491 if (rec
->opts
.record_switch_events
&&
2492 !perf_can_record_switch_events()) {
2493 ui__error("kernel does not support recording context switch events\n");
2494 parse_options_usage(record_usage
, record_options
, "switch-events", 0);
2498 if (switch_output_setup(rec
)) {
2499 parse_options_usage(record_usage
, record_options
, "switch-output", 0);
2503 if (rec
->switch_output
.time
) {
2504 signal(SIGALRM
, alarm_sig_handler
);
2505 alarm(rec
->switch_output
.time
);
2508 if (rec
->switch_output
.num_files
) {
2509 rec
->switch_output
.filenames
= calloc(sizeof(char *),
2510 rec
->switch_output
.num_files
);
2511 if (!rec
->switch_output
.filenames
)
2516 * Allow aliases to facilitate the lookup of symbols for address
2517 * filters. Refer to auxtrace_parse_filters().
2519 symbol_conf
.allow_aliases
= true;
2523 if (rec
->opts
.affinity
!= PERF_AFFINITY_SYS
) {
2524 rec
->affinity_mask
.nbits
= cpu__max_cpu();
2525 rec
->affinity_mask
.bits
= bitmap_alloc(rec
->affinity_mask
.nbits
);
2526 if (!rec
->affinity_mask
.bits
) {
2527 pr_err("Failed to allocate thread mask for %zd cpus\n", rec
->affinity_mask
.nbits
);
2530 pr_debug2("thread mask[%zd]: empty\n", rec
->affinity_mask
.nbits
);
2533 err
= record__auxtrace_init(rec
);
2540 err
= bpf__setup_stdout(rec
->evlist
);
2542 bpf__strerror_setup_stdout(rec
->evlist
, err
, errbuf
, sizeof(errbuf
));
2543 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2550 if (rec
->no_buildid_cache
|| rec
->no_buildid
) {
2551 disable_buildid_cache();
2552 } else if (rec
->switch_output
.enabled
) {
2554 * In 'perf record --switch-output', disable buildid
2555 * generation by default to reduce data file switching
2556 * overhead. Still generate buildid if they are required
2559 * perf record --switch-output --no-no-buildid \
2560 * --no-no-buildid-cache
2562 * Following code equals to:
2564 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2565 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2566 * disable_buildid_cache();
2568 bool disable
= true;
2570 if (rec
->no_buildid_set
&& !rec
->no_buildid
)
2572 if (rec
->no_buildid_cache_set
&& !rec
->no_buildid_cache
)
2575 rec
->no_buildid
= true;
2576 rec
->no_buildid_cache
= true;
2577 disable_buildid_cache();
2581 if (record
.opts
.overwrite
)
2582 record
.opts
.tail_synthesize
= true;
2584 if (rec
->evlist
->core
.nr_entries
== 0 &&
2585 __perf_evlist__add_default(rec
->evlist
, !record
.opts
.no_samples
) < 0) {
2586 pr_err("Not enough memory for event selector list\n");
2590 if (rec
->opts
.target
.tid
&& !rec
->opts
.no_inherit_set
)
2591 rec
->opts
.no_inherit
= true;
2593 err
= target__validate(&rec
->opts
.target
);
2595 target__strerror(&rec
->opts
.target
, err
, errbuf
, BUFSIZ
);
2596 ui__warning("%s\n", errbuf
);
2599 err
= target__parse_uid(&rec
->opts
.target
);
2601 int saved_errno
= errno
;
2603 target__strerror(&rec
->opts
.target
, err
, errbuf
, BUFSIZ
);
2604 ui__error("%s", errbuf
);
2610 /* Enable ignoring missing threads when -u/-p option is defined. */
2611 rec
->opts
.ignore_missing_thread
= rec
->opts
.target
.uid
!= UINT_MAX
|| rec
->opts
.target
.pid
;
2614 if (perf_evlist__create_maps(rec
->evlist
, &rec
->opts
.target
) < 0)
2615 usage_with_options(record_usage
, record_options
);
2617 err
= auxtrace_record__options(rec
->itr
, rec
->evlist
, &rec
->opts
);
2622 * We take all buildids when the file contains
2623 * AUX area tracing data because we do not decode the
2624 * trace because it would take too long.
2626 if (rec
->opts
.full_auxtrace
)
2627 rec
->buildid_all
= true;
2629 if (record_opts__config(&rec
->opts
)) {
2634 if (rec
->opts
.nr_cblocks
> nr_cblocks_max
)
2635 rec
->opts
.nr_cblocks
= nr_cblocks_max
;
2636 pr_debug("nr_cblocks: %d\n", rec
->opts
.nr_cblocks
);
2638 pr_debug("affinity: %s\n", affinity_tags
[rec
->opts
.affinity
]);
2639 pr_debug("mmap flush: %d\n", rec
->opts
.mmap_flush
);
2641 if (rec
->opts
.comp_level
> comp_level_max
)
2642 rec
->opts
.comp_level
= comp_level_max
;
2643 pr_debug("comp level: %d\n", rec
->opts
.comp_level
);
2645 err
= __cmd_record(&record
, argc
, argv
);
2647 bitmap_free(rec
->affinity_mask
.bits
);
2648 evlist__delete(rec
->evlist
);
2650 auxtrace_record__free(rec
->itr
);
2654 static void snapshot_sig_handler(int sig __maybe_unused
)
2656 struct record
*rec
= &record
;
2658 if (trigger_is_ready(&auxtrace_snapshot_trigger
)) {
2659 trigger_hit(&auxtrace_snapshot_trigger
);
2660 auxtrace_record__snapshot_started
= 1;
2661 if (auxtrace_record__snapshot_start(record
.itr
))
2662 trigger_error(&auxtrace_snapshot_trigger
);
2665 if (switch_output_signal(rec
))
2666 trigger_hit(&switch_output_trigger
);
2669 static void alarm_sig_handler(int sig __maybe_unused
)
2671 struct record
*rec
= &record
;
2673 if (switch_output_time(rec
))
2674 trigger_hit(&switch_output_trigger
);