1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2020 Facebook */
5 #include <linux/anon_inodes.h>
6 #include <linux/filter.h>
9 struct bpf_iter_target_info
{
10 struct list_head list
;
11 const struct bpf_iter_reg
*reg_info
;
12 u32 btf_id
; /* cached value */
15 struct bpf_iter_link
{
17 struct bpf_iter_aux_info aux
;
18 struct bpf_iter_target_info
*tinfo
;
21 struct bpf_iter_priv_data
{
22 struct bpf_iter_target_info
*tinfo
;
23 const struct bpf_iter_seq_info
*seq_info
;
24 struct bpf_prog
*prog
;
28 u8 target_private
[] __aligned(8);
31 static struct list_head targets
= LIST_HEAD_INIT(targets
);
32 static DEFINE_MUTEX(targets_mutex
);
34 /* protect bpf_iter_link changes */
35 static DEFINE_MUTEX(link_mutex
);
37 /* incremented on every opened seq_file */
38 static atomic64_t session_id
;
40 static int prepare_seq_file(struct file
*file
, struct bpf_iter_link
*link
,
41 const struct bpf_iter_seq_info
*seq_info
);
43 static void bpf_iter_inc_seq_num(struct seq_file
*seq
)
45 struct bpf_iter_priv_data
*iter_priv
;
47 iter_priv
= container_of(seq
->private, struct bpf_iter_priv_data
,
52 static void bpf_iter_dec_seq_num(struct seq_file
*seq
)
54 struct bpf_iter_priv_data
*iter_priv
;
56 iter_priv
= container_of(seq
->private, struct bpf_iter_priv_data
,
61 static void bpf_iter_done_stop(struct seq_file
*seq
)
63 struct bpf_iter_priv_data
*iter_priv
;
65 iter_priv
= container_of(seq
->private, struct bpf_iter_priv_data
,
67 iter_priv
->done_stop
= true;
70 static bool bpf_iter_support_resched(struct seq_file
*seq
)
72 struct bpf_iter_priv_data
*iter_priv
;
74 iter_priv
= container_of(seq
->private, struct bpf_iter_priv_data
,
76 return iter_priv
->tinfo
->reg_info
->feature
& BPF_ITER_RESCHED
;
79 /* maximum visited objects before bailing out */
80 #define MAX_ITER_OBJECTS 1000000
82 /* bpf_seq_read, a customized and simpler version for bpf iterator.
83 * no_llseek is assumed for this file.
84 * The following are differences from seq_read():
85 * . fixed buffer size (PAGE_SIZE)
86 * . assuming no_llseek
87 * . stop() may call bpf program, handling potential overflow there
89 static ssize_t
bpf_seq_read(struct file
*file
, char __user
*buf
, size_t size
,
92 struct seq_file
*seq
= file
->private_data
;
93 size_t n
, offs
, copied
= 0;
94 int err
= 0, num_objs
= 0;
98 mutex_lock(&seq
->lock
);
101 seq
->size
= PAGE_SIZE
<< 3;
102 seq
->buf
= kvmalloc(seq
->size
, GFP_KERNEL
);
110 n
= min(seq
->count
, size
);
111 err
= copy_to_user(buf
, seq
->buf
+ seq
->from
, n
);
123 p
= seq
->op
->start(seq
, &seq
->index
);
128 seq
->op
->stop(seq
, p
);
133 err
= seq
->op
->show(seq
, p
);
135 /* object is skipped, decrease seq_num, so next
136 * valid object can reuse the same seq_num.
138 bpf_iter_dec_seq_num(seq
);
140 } else if (err
< 0 || seq_has_overflowed(seq
)) {
143 seq
->op
->stop(seq
, p
);
148 can_resched
= bpf_iter_support_resched(seq
);
150 loff_t pos
= seq
->index
;
154 p
= seq
->op
->next(seq
, p
, &seq
->index
);
155 if (pos
== seq
->index
) {
156 pr_info_ratelimited("buggy seq_file .next function %ps "
157 "did not updated position index\n",
162 if (IS_ERR_OR_NULL(p
))
165 /* got a valid next object, increase seq_num */
166 bpf_iter_inc_seq_num(seq
);
168 if (seq
->count
>= size
)
171 if (num_objs
>= MAX_ITER_OBJECTS
) {
174 seq
->op
->stop(seq
, p
);
180 err
= seq
->op
->show(seq
, p
);
182 bpf_iter_dec_seq_num(seq
);
184 } else if (err
< 0 || seq_has_overflowed(seq
)) {
189 seq
->op
->stop(seq
, p
);
200 /* bpf program called if !p */
201 seq
->op
->stop(seq
, p
);
203 if (!seq_has_overflowed(seq
)) {
204 bpf_iter_done_stop(seq
);
214 n
= min(seq
->count
, size
);
215 err
= copy_to_user(buf
, seq
->buf
, n
);
228 mutex_unlock(&seq
->lock
);
232 static const struct bpf_iter_seq_info
*
233 __get_seq_info(struct bpf_iter_link
*link
)
235 const struct bpf_iter_seq_info
*seq_info
;
238 seq_info
= link
->aux
.map
->ops
->iter_seq_info
;
243 return link
->tinfo
->reg_info
->seq_info
;
246 static int iter_open(struct inode
*inode
, struct file
*file
)
248 struct bpf_iter_link
*link
= inode
->i_private
;
250 return prepare_seq_file(file
, link
, __get_seq_info(link
));
253 static int iter_release(struct inode
*inode
, struct file
*file
)
255 struct bpf_iter_priv_data
*iter_priv
;
256 struct seq_file
*seq
;
258 seq
= file
->private_data
;
262 iter_priv
= container_of(seq
->private, struct bpf_iter_priv_data
,
265 if (iter_priv
->seq_info
->fini_seq_private
)
266 iter_priv
->seq_info
->fini_seq_private(seq
->private);
268 bpf_prog_put(iter_priv
->prog
);
269 seq
->private = iter_priv
;
271 return seq_release_private(inode
, file
);
274 const struct file_operations bpf_iter_fops
= {
277 .read
= bpf_seq_read
,
278 .release
= iter_release
,
281 /* The argument reg_info will be cached in bpf_iter_target_info.
282 * The common practice is to declare target reg_info as
283 * a const static variable and passed as an argument to
284 * bpf_iter_reg_target().
286 int bpf_iter_reg_target(const struct bpf_iter_reg
*reg_info
)
288 struct bpf_iter_target_info
*tinfo
;
290 tinfo
= kmalloc(sizeof(*tinfo
), GFP_KERNEL
);
294 tinfo
->reg_info
= reg_info
;
295 INIT_LIST_HEAD(&tinfo
->list
);
297 mutex_lock(&targets_mutex
);
298 list_add(&tinfo
->list
, &targets
);
299 mutex_unlock(&targets_mutex
);
304 void bpf_iter_unreg_target(const struct bpf_iter_reg
*reg_info
)
306 struct bpf_iter_target_info
*tinfo
;
309 mutex_lock(&targets_mutex
);
310 list_for_each_entry(tinfo
, &targets
, list
) {
311 if (reg_info
== tinfo
->reg_info
) {
312 list_del(&tinfo
->list
);
318 mutex_unlock(&targets_mutex
);
320 WARN_ON(found
== false);
323 static void cache_btf_id(struct bpf_iter_target_info
*tinfo
,
324 struct bpf_prog
*prog
)
326 tinfo
->btf_id
= prog
->aux
->attach_btf_id
;
329 bool bpf_iter_prog_supported(struct bpf_prog
*prog
)
331 const char *attach_fname
= prog
->aux
->attach_func_name
;
332 u32 prog_btf_id
= prog
->aux
->attach_btf_id
;
333 const char *prefix
= BPF_ITER_FUNC_PREFIX
;
334 struct bpf_iter_target_info
*tinfo
;
335 int prefix_len
= strlen(prefix
);
336 bool supported
= false;
338 if (strncmp(attach_fname
, prefix
, prefix_len
))
341 mutex_lock(&targets_mutex
);
342 list_for_each_entry(tinfo
, &targets
, list
) {
343 if (tinfo
->btf_id
&& tinfo
->btf_id
== prog_btf_id
) {
347 if (!strcmp(attach_fname
+ prefix_len
, tinfo
->reg_info
->target
)) {
348 cache_btf_id(tinfo
, prog
);
353 mutex_unlock(&targets_mutex
);
356 prog
->aux
->ctx_arg_info_size
= tinfo
->reg_info
->ctx_arg_info_size
;
357 prog
->aux
->ctx_arg_info
= tinfo
->reg_info
->ctx_arg_info
;
363 static void bpf_iter_link_release(struct bpf_link
*link
)
365 struct bpf_iter_link
*iter_link
=
366 container_of(link
, struct bpf_iter_link
, link
);
368 if (iter_link
->tinfo
->reg_info
->detach_target
)
369 iter_link
->tinfo
->reg_info
->detach_target(&iter_link
->aux
);
372 static void bpf_iter_link_dealloc(struct bpf_link
*link
)
374 struct bpf_iter_link
*iter_link
=
375 container_of(link
, struct bpf_iter_link
, link
);
380 static int bpf_iter_link_replace(struct bpf_link
*link
,
381 struct bpf_prog
*new_prog
,
382 struct bpf_prog
*old_prog
)
386 mutex_lock(&link_mutex
);
387 if (old_prog
&& link
->prog
!= old_prog
) {
392 if (link
->prog
->type
!= new_prog
->type
||
393 link
->prog
->expected_attach_type
!= new_prog
->expected_attach_type
||
394 link
->prog
->aux
->attach_btf_id
!= new_prog
->aux
->attach_btf_id
) {
399 old_prog
= xchg(&link
->prog
, new_prog
);
400 bpf_prog_put(old_prog
);
403 mutex_unlock(&link_mutex
);
407 static void bpf_iter_link_show_fdinfo(const struct bpf_link
*link
,
408 struct seq_file
*seq
)
410 struct bpf_iter_link
*iter_link
=
411 container_of(link
, struct bpf_iter_link
, link
);
412 bpf_iter_show_fdinfo_t show_fdinfo
;
415 "target_name:\t%s\n",
416 iter_link
->tinfo
->reg_info
->target
);
418 show_fdinfo
= iter_link
->tinfo
->reg_info
->show_fdinfo
;
420 show_fdinfo(&iter_link
->aux
, seq
);
423 static int bpf_iter_link_fill_link_info(const struct bpf_link
*link
,
424 struct bpf_link_info
*info
)
426 struct bpf_iter_link
*iter_link
=
427 container_of(link
, struct bpf_iter_link
, link
);
428 char __user
*ubuf
= u64_to_user_ptr(info
->iter
.target_name
);
429 bpf_iter_fill_link_info_t fill_link_info
;
430 u32 ulen
= info
->iter
.target_name_len
;
431 const char *target_name
;
437 target_name
= iter_link
->tinfo
->reg_info
->target
;
438 target_len
= strlen(target_name
);
439 info
->iter
.target_name_len
= target_len
+ 1;
442 if (ulen
>= target_len
+ 1) {
443 if (copy_to_user(ubuf
, target_name
, target_len
+ 1))
448 if (copy_to_user(ubuf
, target_name
, ulen
- 1))
450 if (put_user(zero
, ubuf
+ ulen
- 1))
456 fill_link_info
= iter_link
->tinfo
->reg_info
->fill_link_info
;
458 return fill_link_info(&iter_link
->aux
, info
);
463 static const struct bpf_link_ops bpf_iter_link_lops
= {
464 .release
= bpf_iter_link_release
,
465 .dealloc
= bpf_iter_link_dealloc
,
466 .update_prog
= bpf_iter_link_replace
,
467 .show_fdinfo
= bpf_iter_link_show_fdinfo
,
468 .fill_link_info
= bpf_iter_link_fill_link_info
,
471 bool bpf_link_is_iter(struct bpf_link
*link
)
473 return link
->ops
== &bpf_iter_link_lops
;
476 int bpf_iter_link_attach(const union bpf_attr
*attr
, struct bpf_prog
*prog
)
478 union bpf_iter_link_info __user
*ulinfo
;
479 struct bpf_link_primer link_primer
;
480 struct bpf_iter_target_info
*tinfo
;
481 union bpf_iter_link_info linfo
;
482 struct bpf_iter_link
*link
;
483 u32 prog_btf_id
, linfo_len
;
484 bool existed
= false;
487 if (attr
->link_create
.target_fd
|| attr
->link_create
.flags
)
490 memset(&linfo
, 0, sizeof(union bpf_iter_link_info
));
492 ulinfo
= u64_to_user_ptr(attr
->link_create
.iter_info
);
493 linfo_len
= attr
->link_create
.iter_info_len
;
494 if (!ulinfo
^ !linfo_len
)
498 err
= bpf_check_uarg_tail_zero(ulinfo
, sizeof(linfo
),
502 linfo_len
= min_t(u32
, linfo_len
, sizeof(linfo
));
503 if (copy_from_user(&linfo
, ulinfo
, linfo_len
))
507 prog_btf_id
= prog
->aux
->attach_btf_id
;
508 mutex_lock(&targets_mutex
);
509 list_for_each_entry(tinfo
, &targets
, list
) {
510 if (tinfo
->btf_id
== prog_btf_id
) {
515 mutex_unlock(&targets_mutex
);
519 link
= kzalloc(sizeof(*link
), GFP_USER
| __GFP_NOWARN
);
523 bpf_link_init(&link
->link
, BPF_LINK_TYPE_ITER
, &bpf_iter_link_lops
, prog
);
526 err
= bpf_link_prime(&link
->link
, &link_primer
);
532 if (tinfo
->reg_info
->attach_target
) {
533 err
= tinfo
->reg_info
->attach_target(prog
, &linfo
, &link
->aux
);
535 bpf_link_cleanup(&link_primer
);
540 return bpf_link_settle(&link_primer
);
543 static void init_seq_meta(struct bpf_iter_priv_data
*priv_data
,
544 struct bpf_iter_target_info
*tinfo
,
545 const struct bpf_iter_seq_info
*seq_info
,
546 struct bpf_prog
*prog
)
548 priv_data
->tinfo
= tinfo
;
549 priv_data
->seq_info
= seq_info
;
550 priv_data
->prog
= prog
;
551 priv_data
->session_id
= atomic64_inc_return(&session_id
);
552 priv_data
->seq_num
= 0;
553 priv_data
->done_stop
= false;
556 static int prepare_seq_file(struct file
*file
, struct bpf_iter_link
*link
,
557 const struct bpf_iter_seq_info
*seq_info
)
559 struct bpf_iter_priv_data
*priv_data
;
560 struct bpf_iter_target_info
*tinfo
;
561 struct bpf_prog
*prog
;
562 u32 total_priv_dsize
;
563 struct seq_file
*seq
;
566 mutex_lock(&link_mutex
);
567 prog
= link
->link
.prog
;
569 mutex_unlock(&link_mutex
);
572 total_priv_dsize
= offsetof(struct bpf_iter_priv_data
, target_private
) +
573 seq_info
->seq_priv_size
;
574 priv_data
= __seq_open_private(file
, seq_info
->seq_ops
,
581 if (seq_info
->init_seq_private
) {
582 err
= seq_info
->init_seq_private(priv_data
->target_private
, &link
->aux
);
584 goto release_seq_file
;
587 init_seq_meta(priv_data
, tinfo
, seq_info
, prog
);
588 seq
= file
->private_data
;
589 seq
->private = priv_data
->target_private
;
594 seq_release_private(file
->f_inode
, file
);
595 file
->private_data
= NULL
;
601 int bpf_iter_new_fd(struct bpf_link
*link
)
603 struct bpf_iter_link
*iter_link
;
608 if (link
->ops
!= &bpf_iter_link_lops
)
611 flags
= O_RDONLY
| O_CLOEXEC
;
612 fd
= get_unused_fd_flags(flags
);
616 file
= anon_inode_getfile("bpf_iter", &bpf_iter_fops
, NULL
, flags
);
622 iter_link
= container_of(link
, struct bpf_iter_link
, link
);
623 err
= prepare_seq_file(file
, iter_link
, __get_seq_info(iter_link
));
627 fd_install(fd
, file
);
637 struct bpf_prog
*bpf_iter_get_info(struct bpf_iter_meta
*meta
, bool in_stop
)
639 struct bpf_iter_priv_data
*iter_priv
;
640 struct seq_file
*seq
;
644 if (seq
->file
->f_op
!= &bpf_iter_fops
)
647 seq_priv
= seq
->private;
648 iter_priv
= container_of(seq_priv
, struct bpf_iter_priv_data
,
651 if (in_stop
&& iter_priv
->done_stop
)
654 meta
->session_id
= iter_priv
->session_id
;
655 meta
->seq_num
= iter_priv
->seq_num
;
657 return iter_priv
->prog
;
660 int bpf_iter_run_prog(struct bpf_prog
*prog
, void *ctx
)
666 ret
= BPF_PROG_RUN(prog
, ctx
);
670 /* bpf program can only return 0 or 1:
672 * 1 : retry the same object
673 * The bpf_iter_run_prog() return value
674 * will be seq_ops->show() return value.
676 return ret
== 0 ? 0 : -EAGAIN
;