Merge tag 'trace-printf-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace...
[drm/drm-misc.git] / tools / perf / util / bpf-filter.c
bloba4fdf6911ec1c32e799983c0951f39b16f77b7b7
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /**
3 * Generic event filter for sampling events in BPF.
5 * The BPF program is fixed and just to read filter expressions in the 'filters'
6 * map and compare the sample data in order to reject samples that don't match.
7 * Each filter expression contains a sample flag (term) to compare, an operation
8 * (==, >=, and so on) and a value.
10 * Note that each entry has an array of filter expressions and it only succeeds
11 * when all of the expressions are satisfied. But it supports the logical OR
12 * using a GROUP operation which is satisfied when any of its member expression
13 * is evaluated to true. But it doesn't allow nested GROUP operations for now.
15 * To support non-root users, the filters map can be loaded and pinned in the BPF
16 * filesystem by root (perf record --setup-filter pin). Then each user will get
17 * a new entry in the shared filters map to fill the filter expressions. And the
18 * BPF program will find the filter using (task-id, event-id) as a key.
20 * The pinned BPF object (shared for regular users) has:
22 * event_hash |
23 * | | |
24 * event->id ---> | id | ---+ idx_hash | filters
25 * | | | | | | | |
26 * | .... | +-> | idx | --+--> | exprs | ---> perf_bpf_filter_entry[]
27 * | | | | | | .op
28 * task id (tgid) --------------+ | .... | | | ... | .term (+ part)
29 * | .value
30 * |
31 * ======= (root would skip this part) ======== (compares it in a loop)
33 * This is used for per-task use cases while system-wide profiling (normally from
34 * root user) uses a separate copy of the program and the maps for its own so that
35 * it can proceed even if a lot of non-root users are using the filters at the
36 * same time. In this case the filters map has a single entry and no need to use
37 * the hash maps to get the index (key) of the filters map (IOW it's always 0).
39 * The BPF program returns 1 to accept the sample or 0 to drop it.
40 * The 'dropped' map is to keep how many samples it dropped by the filter and
41 * it will be reported as lost samples.
43 #include <stdlib.h>
44 #include <fcntl.h>
45 #include <sys/ioctl.h>
46 #include <sys/stat.h>
48 #include <bpf/bpf.h>
49 #include <linux/err.h>
50 #include <linux/list.h>
51 #include <api/fs/fs.h>
52 #include <internal/xyarray.h>
53 #include <perf/threadmap.h>
55 #include "util/debug.h"
56 #include "util/evsel.h"
57 #include "util/target.h"
59 #include "util/bpf-filter.h"
60 #include <util/bpf-filter-flex.h>
61 #include <util/bpf-filter-bison.h>
63 #include "bpf_skel/sample-filter.h"
64 #include "bpf_skel/sample_filter.skel.h"
66 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
68 #define __PERF_SAMPLE_TYPE(tt, st, opt) { tt, #st, opt }
69 #define PERF_SAMPLE_TYPE(_st, opt) __PERF_SAMPLE_TYPE(PBF_TERM_##_st, PERF_SAMPLE_##_st, opt)
71 /* Index in the pinned 'filters' map. Should be released after use. */
72 struct pinned_filter_idx {
73 struct list_head list;
74 struct evsel *evsel;
75 u64 event_id;
76 int hash_idx;
79 static LIST_HEAD(pinned_filters);
81 static const struct perf_sample_info {
82 enum perf_bpf_filter_term type;
83 const char *name;
84 const char *option;
85 } sample_table[] = {
86 /* default sample flags */
87 PERF_SAMPLE_TYPE(IP, NULL),
88 PERF_SAMPLE_TYPE(TID, NULL),
89 PERF_SAMPLE_TYPE(PERIOD, NULL),
90 /* flags mostly set by default, but still have options */
91 PERF_SAMPLE_TYPE(ID, "--sample-identifier"),
92 PERF_SAMPLE_TYPE(CPU, "--sample-cpu"),
93 PERF_SAMPLE_TYPE(TIME, "-T"),
94 /* optional sample flags */
95 PERF_SAMPLE_TYPE(ADDR, "-d"),
96 PERF_SAMPLE_TYPE(DATA_SRC, "-d"),
97 PERF_SAMPLE_TYPE(PHYS_ADDR, "--phys-data"),
98 PERF_SAMPLE_TYPE(WEIGHT, "-W"),
99 PERF_SAMPLE_TYPE(WEIGHT_STRUCT, "-W"),
100 PERF_SAMPLE_TYPE(TRANSACTION, "--transaction"),
101 PERF_SAMPLE_TYPE(CODE_PAGE_SIZE, "--code-page-size"),
102 PERF_SAMPLE_TYPE(DATA_PAGE_SIZE, "--data-page-size"),
103 PERF_SAMPLE_TYPE(CGROUP, "--all-cgroups"),
106 static int get_pinned_fd(const char *name);
108 static const struct perf_sample_info *get_sample_info(enum perf_bpf_filter_term type)
110 size_t i;
112 for (i = 0; i < ARRAY_SIZE(sample_table); i++) {
113 if (sample_table[i].type == type)
114 return &sample_table[i];
116 return NULL;
119 static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr *expr)
121 const struct perf_sample_info *info;
123 if (expr->term >= PBF_TERM_SAMPLE_START && expr->term <= PBF_TERM_SAMPLE_END &&
124 (evsel->core.attr.sample_type & (1 << (expr->term - PBF_TERM_SAMPLE_START))))
125 return 0;
127 if (expr->term == PBF_TERM_UID || expr->term == PBF_TERM_GID) {
128 /* Not dependent on the sample_type as computed from a BPF helper. */
129 return 0;
132 if (expr->op == PBF_OP_GROUP_BEGIN) {
133 struct perf_bpf_filter_expr *group;
135 list_for_each_entry(group, &expr->groups, list) {
136 if (check_sample_flags(evsel, group) < 0)
137 return -1;
139 return 0;
142 info = get_sample_info(expr->term);
143 if (info == NULL) {
144 pr_err("Error: %s event does not have sample flags %d\n",
145 evsel__name(evsel), expr->term);
146 return -1;
149 pr_err("Error: %s event does not have %s\n", evsel__name(evsel), info->name);
150 if (info->option)
151 pr_err(" Hint: please add %s option to perf record\n", info->option);
152 return -1;
155 static int get_filter_entries(struct evsel *evsel, struct perf_bpf_filter_entry *entry)
157 int i = 0;
158 struct perf_bpf_filter_expr *expr;
160 list_for_each_entry(expr, &evsel->bpf_filters, list) {
161 if (check_sample_flags(evsel, expr) < 0)
162 return -EINVAL;
164 if (i == MAX_FILTERS)
165 return -E2BIG;
167 entry[i].op = expr->op;
168 entry[i].part = expr->part;
169 entry[i].term = expr->term;
170 entry[i].value = expr->val;
171 i++;
173 if (expr->op == PBF_OP_GROUP_BEGIN) {
174 struct perf_bpf_filter_expr *group;
176 list_for_each_entry(group, &expr->groups, list) {
177 if (i == MAX_FILTERS)
178 return -E2BIG;
180 entry[i].op = group->op;
181 entry[i].part = group->part;
182 entry[i].term = group->term;
183 entry[i].value = group->val;
184 i++;
187 if (i == MAX_FILTERS)
188 return -E2BIG;
190 entry[i].op = PBF_OP_GROUP_END;
191 i++;
195 if (i < MAX_FILTERS) {
196 /* to terminate the loop early */
197 entry[i].op = PBF_OP_DONE;
198 i++;
200 return 0;
203 static int convert_to_tgid(int tid)
205 char path[128];
206 char *buf, *p, *q;
207 int tgid;
208 size_t len;
210 scnprintf(path, sizeof(path), "%d/status", tid);
211 if (procfs__read_str(path, &buf, &len) < 0)
212 return -1;
214 p = strstr(buf, "Tgid:");
215 if (p == NULL) {
216 free(buf);
217 return -1;
220 tgid = strtol(p + 6, &q, 0);
221 free(buf);
222 if (*q != '\n')
223 return -1;
225 return tgid;
229 * The event might be closed already so we cannot get the list of ids using FD
230 * like in create_event_hash() below, let's iterate the event_hash map and
231 * delete all entries that have the event id as a key.
233 static void destroy_event_hash(u64 event_id)
235 int fd;
236 u64 key, *prev_key = NULL;
237 int num = 0, alloced = 32;
238 u64 *ids = calloc(alloced, sizeof(*ids));
240 if (ids == NULL)
241 return;
243 fd = get_pinned_fd("event_hash");
244 if (fd < 0) {
245 pr_debug("cannot get fd for 'event_hash' map\n");
246 free(ids);
247 return;
250 /* Iterate the whole map to collect keys for the event id. */
251 while (!bpf_map_get_next_key(fd, prev_key, &key)) {
252 u64 id;
254 if (bpf_map_lookup_elem(fd, &key, &id) == 0 && id == event_id) {
255 if (num == alloced) {
256 void *tmp;
258 alloced *= 2;
259 tmp = realloc(ids, alloced * sizeof(*ids));
260 if (tmp == NULL)
261 break;
263 ids = tmp;
265 ids[num++] = key;
268 prev_key = &key;
271 for (int i = 0; i < num; i++)
272 bpf_map_delete_elem(fd, &ids[i]);
274 free(ids);
275 close(fd);
279 * Return a representative id if ok, or 0 for failures.
281 * The perf_event->id is good for this, but an evsel would have multiple
282 * instances for CPUs and tasks. So pick up the first id and setup a hash
283 * from id of each instance to the representative id (the first one).
285 static u64 create_event_hash(struct evsel *evsel)
287 int x, y, fd;
288 u64 the_id = 0, id;
290 fd = get_pinned_fd("event_hash");
291 if (fd < 0) {
292 pr_err("cannot get fd for 'event_hash' map\n");
293 return 0;
296 for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) {
297 for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) {
298 int ret = ioctl(FD(evsel, x, y), PERF_EVENT_IOC_ID, &id);
300 if (ret < 0) {
301 pr_err("Failed to get the event id\n");
302 if (the_id)
303 destroy_event_hash(the_id);
304 return 0;
307 if (the_id == 0)
308 the_id = id;
310 bpf_map_update_elem(fd, &id, &the_id, BPF_ANY);
314 close(fd);
315 return the_id;
318 static void destroy_idx_hash(struct pinned_filter_idx *pfi)
320 int fd, nr;
321 struct perf_thread_map *threads;
323 fd = get_pinned_fd("filters");
324 bpf_map_delete_elem(fd, &pfi->hash_idx);
325 close(fd);
327 if (pfi->event_id)
328 destroy_event_hash(pfi->event_id);
330 threads = perf_evsel__threads(&pfi->evsel->core);
331 if (threads == NULL)
332 return;
334 fd = get_pinned_fd("idx_hash");
335 nr = perf_thread_map__nr(threads);
336 for (int i = 0; i < nr; i++) {
337 /* The target task might be dead already, just try the pid */
338 struct idx_hash_key key = {
339 .evt_id = pfi->event_id,
340 .tgid = perf_thread_map__pid(threads, i),
343 bpf_map_delete_elem(fd, &key);
345 close(fd);
348 /* Maintain a hashmap from (tgid, event-id) to filter index */
349 static int create_idx_hash(struct evsel *evsel, struct perf_bpf_filter_entry *entry)
351 int filter_idx;
352 int fd, nr, last;
353 u64 event_id = 0;
354 struct pinned_filter_idx *pfi = NULL;
355 struct perf_thread_map *threads;
357 fd = get_pinned_fd("filters");
358 if (fd < 0) {
359 pr_err("cannot get fd for 'filters' map\n");
360 return fd;
363 /* Find the first available entry in the filters map */
364 for (filter_idx = 0; filter_idx < MAX_FILTERS; filter_idx++) {
365 if (bpf_map_update_elem(fd, &filter_idx, entry, BPF_NOEXIST) == 0)
366 break;
368 close(fd);
370 if (filter_idx == MAX_FILTERS) {
371 pr_err("Too many users for the filter map\n");
372 return -EBUSY;
375 pfi = zalloc(sizeof(*pfi));
376 if (pfi == NULL) {
377 pr_err("Cannot save pinned filter index\n");
378 return -ENOMEM;
381 pfi->evsel = evsel;
382 pfi->hash_idx = filter_idx;
384 event_id = create_event_hash(evsel);
385 if (event_id == 0) {
386 pr_err("Cannot update the event hash\n");
387 goto err;
390 pfi->event_id = event_id;
392 threads = perf_evsel__threads(&evsel->core);
393 if (threads == NULL) {
394 pr_err("Cannot get the thread list of the event\n");
395 goto err;
398 /* save the index to a hash map */
399 fd = get_pinned_fd("idx_hash");
400 if (fd < 0) {
401 pr_err("cannot get fd for 'idx_hash' map\n");
402 goto err;
405 last = -1;
406 nr = perf_thread_map__nr(threads);
407 for (int i = 0; i < nr; i++) {
408 int pid = perf_thread_map__pid(threads, i);
409 int tgid;
410 struct idx_hash_key key = {
411 .evt_id = event_id,
414 /* it actually needs tgid, let's get tgid from /proc. */
415 tgid = convert_to_tgid(pid);
416 if (tgid < 0) {
417 /* the thread may be dead, ignore. */
418 continue;
421 if (tgid == last)
422 continue;
423 last = tgid;
424 key.tgid = tgid;
426 if (bpf_map_update_elem(fd, &key, &filter_idx, BPF_ANY) < 0) {
427 pr_err("Failed to update the idx_hash\n");
428 close(fd);
429 goto err;
431 pr_debug("bpf-filter: idx_hash (task=%d,%s) -> %d\n",
432 tgid, evsel__name(evsel), filter_idx);
435 list_add(&pfi->list, &pinned_filters);
436 close(fd);
437 return filter_idx;
439 err:
440 destroy_idx_hash(pfi);
441 free(pfi);
442 return -1;
445 int perf_bpf_filter__prepare(struct evsel *evsel, struct target *target)
447 int i, x, y, fd, ret;
448 struct sample_filter_bpf *skel = NULL;
449 struct bpf_program *prog;
450 struct bpf_link *link;
451 struct perf_bpf_filter_entry *entry;
452 bool needs_idx_hash = !target__has_cpu(target) && !target->uid_str;
454 entry = calloc(MAX_FILTERS, sizeof(*entry));
455 if (entry == NULL)
456 return -1;
458 ret = get_filter_entries(evsel, entry);
459 if (ret < 0) {
460 pr_err("Failed to process filter entries\n");
461 goto err;
464 if (needs_idx_hash && geteuid() != 0) {
465 int zero = 0;
467 /* The filters map is shared among other processes */
468 ret = create_idx_hash(evsel, entry);
469 if (ret < 0)
470 goto err;
472 fd = get_pinned_fd("dropped");
473 if (fd < 0) {
474 ret = fd;
475 goto err;
478 /* Reset the lost count */
479 bpf_map_update_elem(fd, &ret, &zero, BPF_ANY);
480 close(fd);
482 fd = get_pinned_fd("perf_sample_filter");
483 if (fd < 0) {
484 ret = fd;
485 goto err;
488 for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) {
489 for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) {
490 ret = ioctl(FD(evsel, x, y), PERF_EVENT_IOC_SET_BPF, fd);
491 if (ret < 0) {
492 pr_err("Failed to attach perf sample-filter\n");
493 close(fd);
494 goto err;
499 close(fd);
500 free(entry);
501 return 0;
504 skel = sample_filter_bpf__open_and_load();
505 if (!skel) {
506 ret = -errno;
507 pr_err("Failed to load perf sample-filter BPF skeleton\n");
508 goto err;
511 i = 0;
512 fd = bpf_map__fd(skel->maps.filters);
514 /* The filters map has only one entry in this case */
515 if (bpf_map_update_elem(fd, &i, entry, BPF_ANY) < 0) {
516 ret = -errno;
517 pr_err("Failed to update the filter map\n");
518 goto err;
521 prog = skel->progs.perf_sample_filter;
522 for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) {
523 for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) {
524 link = bpf_program__attach_perf_event(prog, FD(evsel, x, y));
525 if (IS_ERR(link)) {
526 pr_err("Failed to attach perf sample-filter program\n");
527 ret = PTR_ERR(link);
528 goto err;
532 free(entry);
533 evsel->bpf_skel = skel;
534 return 0;
536 err:
537 free(entry);
538 if (!list_empty(&pinned_filters)) {
539 struct pinned_filter_idx *pfi, *tmp;
541 list_for_each_entry_safe(pfi, tmp, &pinned_filters, list) {
542 destroy_idx_hash(pfi);
543 list_del(&pfi->list);
544 free(pfi);
547 sample_filter_bpf__destroy(skel);
548 return ret;
551 int perf_bpf_filter__destroy(struct evsel *evsel)
553 struct perf_bpf_filter_expr *expr, *tmp;
554 struct pinned_filter_idx *pfi, *pos;
556 list_for_each_entry_safe(expr, tmp, &evsel->bpf_filters, list) {
557 list_del(&expr->list);
558 free(expr);
560 sample_filter_bpf__destroy(evsel->bpf_skel);
562 list_for_each_entry_safe(pfi, pos, &pinned_filters, list) {
563 destroy_idx_hash(pfi);
564 list_del(&pfi->list);
565 free(pfi);
567 return 0;
570 u64 perf_bpf_filter__lost_count(struct evsel *evsel)
572 int count = 0;
574 if (list_empty(&evsel->bpf_filters))
575 return 0;
577 if (!list_empty(&pinned_filters)) {
578 int fd = get_pinned_fd("dropped");
579 struct pinned_filter_idx *pfi;
581 if (fd < 0)
582 return 0;
584 list_for_each_entry(pfi, &pinned_filters, list) {
585 if (pfi->evsel != evsel)
586 continue;
588 bpf_map_lookup_elem(fd, &pfi->hash_idx, &count);
589 break;
591 close(fd);
592 } else if (evsel->bpf_skel) {
593 struct sample_filter_bpf *skel = evsel->bpf_skel;
594 int fd = bpf_map__fd(skel->maps.dropped);
595 int idx = 0;
597 bpf_map_lookup_elem(fd, &idx, &count);
600 return count;
603 struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(enum perf_bpf_filter_term term,
604 int part,
605 enum perf_bpf_filter_op op,
606 unsigned long val)
608 struct perf_bpf_filter_expr *expr;
610 expr = malloc(sizeof(*expr));
611 if (expr != NULL) {
612 expr->term = term;
613 expr->part = part;
614 expr->op = op;
615 expr->val = val;
616 INIT_LIST_HEAD(&expr->groups);
618 return expr;
621 int perf_bpf_filter__parse(struct list_head *expr_head, const char *str)
623 YY_BUFFER_STATE buffer;
624 int ret;
626 buffer = perf_bpf_filter__scan_string(str);
628 ret = perf_bpf_filter_parse(expr_head);
630 perf_bpf_filter__flush_buffer(buffer);
631 perf_bpf_filter__delete_buffer(buffer);
632 perf_bpf_filter_lex_destroy();
634 return ret;
637 int perf_bpf_filter__pin(void)
639 struct sample_filter_bpf *skel;
640 char *path = NULL;
641 int dir_fd, ret = -1;
643 skel = sample_filter_bpf__open();
644 if (!skel) {
645 ret = -errno;
646 pr_err("Failed to open perf sample-filter BPF skeleton\n");
647 goto err;
650 /* pinned program will use pid-hash */
651 bpf_map__set_max_entries(skel->maps.filters, MAX_FILTERS);
652 bpf_map__set_max_entries(skel->maps.event_hash, MAX_EVT_HASH);
653 bpf_map__set_max_entries(skel->maps.idx_hash, MAX_IDX_HASH);
654 bpf_map__set_max_entries(skel->maps.dropped, MAX_FILTERS);
655 skel->rodata->use_idx_hash = 1;
657 if (sample_filter_bpf__load(skel) < 0) {
658 ret = -errno;
659 pr_err("Failed to load perf sample-filter BPF skeleton\n");
660 goto err;
663 if (asprintf(&path, "%s/fs/bpf/%s", sysfs__mountpoint(),
664 PERF_BPF_FILTER_PIN_PATH) < 0) {
665 ret = -errno;
666 pr_err("Failed to allocate pathname in the BPF-fs\n");
667 goto err;
670 ret = bpf_object__pin(skel->obj, path);
671 if (ret < 0) {
672 pr_err("Failed to pin BPF filter objects\n");
673 goto err;
676 /* setup access permissions for the pinned objects */
677 dir_fd = open(path, O_PATH);
678 if (dir_fd < 0) {
679 bpf_object__unpin(skel->obj, path);
680 ret = dir_fd;
681 goto err;
684 /* BPF-fs root has the sticky bit */
685 if (fchmodat(dir_fd, "..", 01755, 0) < 0) {
686 pr_debug("chmod for BPF-fs failed\n");
687 ret = -errno;
688 goto err_close;
691 /* perf_filter directory */
692 if (fchmodat(dir_fd, ".", 0755, 0) < 0) {
693 pr_debug("chmod for perf_filter directory failed?\n");
694 ret = -errno;
695 goto err_close;
698 /* programs need write permission for some reason */
699 if (fchmodat(dir_fd, "perf_sample_filter", 0777, 0) < 0) {
700 pr_debug("chmod for perf_sample_filter failed\n");
701 ret = -errno;
703 /* maps */
704 if (fchmodat(dir_fd, "filters", 0666, 0) < 0) {
705 pr_debug("chmod for filters failed\n");
706 ret = -errno;
708 if (fchmodat(dir_fd, "event_hash", 0666, 0) < 0) {
709 pr_debug("chmod for event_hash failed\n");
710 ret = -errno;
712 if (fchmodat(dir_fd, "idx_hash", 0666, 0) < 0) {
713 pr_debug("chmod for idx_hash failed\n");
714 ret = -errno;
716 if (fchmodat(dir_fd, "dropped", 0666, 0) < 0) {
717 pr_debug("chmod for dropped failed\n");
718 ret = -errno;
721 err_close:
722 close(dir_fd);
724 err:
725 free(path);
726 sample_filter_bpf__destroy(skel);
727 return ret;
730 int perf_bpf_filter__unpin(void)
732 struct sample_filter_bpf *skel;
733 char *path = NULL;
734 int ret = -1;
736 skel = sample_filter_bpf__open_and_load();
737 if (!skel) {
738 ret = -errno;
739 pr_err("Failed to open perf sample-filter BPF skeleton\n");
740 goto err;
743 if (asprintf(&path, "%s/fs/bpf/%s", sysfs__mountpoint(),
744 PERF_BPF_FILTER_PIN_PATH) < 0) {
745 ret = -errno;
746 pr_err("Failed to allocate pathname in the BPF-fs\n");
747 goto err;
750 ret = bpf_object__unpin(skel->obj, path);
752 err:
753 free(path);
754 sample_filter_bpf__destroy(skel);
755 return ret;
758 static int get_pinned_fd(const char *name)
760 char *path = NULL;
761 int fd;
763 if (asprintf(&path, "%s/fs/bpf/%s/%s", sysfs__mountpoint(),
764 PERF_BPF_FILTER_PIN_PATH, name) < 0)
765 return -1;
767 fd = bpf_obj_get(path);
769 free(path);
770 return fd;