1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2020 Facebook */
4 #include <linux/init.h>
5 #include <linux/namei.h>
6 #include <linux/pid_namespace.h>
8 #include <linux/filter.h>
9 #include <linux/bpf_mem_alloc.h>
10 #include <linux/btf_ids.h>
11 #include <linux/mm_types.h>
12 #include "mmap_unlock_work.h"
14 static const char * const iter_task_type_names
[] = {
20 struct bpf_iter_seq_task_common
{
21 struct pid_namespace
*ns
;
22 enum bpf_iter_task_type type
;
27 struct bpf_iter_seq_task_info
{
28 /* The first field must be struct bpf_iter_seq_task_common.
29 * this is assumed by {init, fini}_seq_pidns() callback functions.
31 struct bpf_iter_seq_task_common common
;
35 static struct task_struct
*task_group_seq_get_next(struct bpf_iter_seq_task_common
*common
,
37 bool skip_if_dup_files
)
39 struct task_struct
*task
;
44 /* The first time, the iterator calls this function. */
45 pid
= find_pid_ns(common
->pid
, common
->ns
);
46 task
= get_pid_task(pid
, PIDTYPE_TGID
);
51 common
->pid_visiting
= common
->pid
;
56 /* If the control returns to user space and comes back to the
57 * kernel again, *tid and common->pid_visiting should be the
58 * same for task_seq_start() to pick up the correct task.
60 if (*tid
== common
->pid_visiting
) {
61 pid
= find_pid_ns(common
->pid_visiting
, common
->ns
);
62 task
= get_pid_task(pid
, PIDTYPE_PID
);
67 task
= find_task_by_pid_ns(common
->pid_visiting
, common
->ns
);
72 task
= __next_thread(task
);
76 next_tid
= __task_pid_nr_ns(task
, PIDTYPE_PID
, common
->ns
);
80 if (skip_if_dup_files
&& task
->files
== task
->group_leader
->files
)
83 *tid
= common
->pid_visiting
= next_tid
;
84 get_task_struct(task
);
88 static struct task_struct
*task_seq_get_next(struct bpf_iter_seq_task_common
*common
,
90 bool skip_if_dup_files
)
92 struct task_struct
*task
= NULL
;
95 if (common
->type
== BPF_TASK_ITER_TID
) {
96 if (*tid
&& *tid
!= common
->pid
)
99 pid
= find_pid_ns(common
->pid
, common
->ns
);
101 task
= get_pid_task(pid
, PIDTYPE_PID
);
109 if (common
->type
== BPF_TASK_ITER_TGID
) {
111 task
= task_group_seq_get_next(common
, tid
, skip_if_dup_files
);
119 pid
= find_ge_pid(*tid
, common
->ns
);
121 *tid
= pid_nr_ns(pid
, common
->ns
);
122 task
= get_pid_task(pid
, PIDTYPE_PID
);
126 } else if (skip_if_dup_files
&& !thread_group_leader(task
) &&
127 task
->files
== task
->group_leader
->files
) {
128 put_task_struct(task
);
139 static void *task_seq_start(struct seq_file
*seq
, loff_t
*pos
)
141 struct bpf_iter_seq_task_info
*info
= seq
->private;
142 struct task_struct
*task
;
144 task
= task_seq_get_next(&info
->common
, &info
->tid
, false);
153 static void *task_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
155 struct bpf_iter_seq_task_info
*info
= seq
->private;
156 struct task_struct
*task
;
160 put_task_struct((struct task_struct
*)v
);
161 task
= task_seq_get_next(&info
->common
, &info
->tid
, false);
168 struct bpf_iter__task
{
169 __bpf_md_ptr(struct bpf_iter_meta
*, meta
);
170 __bpf_md_ptr(struct task_struct
*, task
);
173 DEFINE_BPF_ITER_FUNC(task
, struct bpf_iter_meta
*meta
, struct task_struct
*task
)
175 static int __task_seq_show(struct seq_file
*seq
, struct task_struct
*task
,
178 struct bpf_iter_meta meta
;
179 struct bpf_iter__task ctx
;
180 struct bpf_prog
*prog
;
183 prog
= bpf_iter_get_info(&meta
, in_stop
);
189 return bpf_iter_run_prog(prog
, &ctx
);
192 static int task_seq_show(struct seq_file
*seq
, void *v
)
194 return __task_seq_show(seq
, v
, false);
197 static void task_seq_stop(struct seq_file
*seq
, void *v
)
200 (void)__task_seq_show(seq
, v
, true);
202 put_task_struct((struct task_struct
*)v
);
205 static int bpf_iter_attach_task(struct bpf_prog
*prog
,
206 union bpf_iter_link_info
*linfo
,
207 struct bpf_iter_aux_info
*aux
)
213 if ((!!linfo
->task
.tid
+ !!linfo
->task
.pid
+ !!linfo
->task
.pid_fd
) > 1)
216 aux
->task
.type
= BPF_TASK_ITER_ALL
;
217 if (linfo
->task
.tid
!= 0) {
218 aux
->task
.type
= BPF_TASK_ITER_TID
;
219 aux
->task
.pid
= linfo
->task
.tid
;
221 if (linfo
->task
.pid
!= 0) {
222 aux
->task
.type
= BPF_TASK_ITER_TGID
;
223 aux
->task
.pid
= linfo
->task
.pid
;
225 if (linfo
->task
.pid_fd
!= 0) {
226 aux
->task
.type
= BPF_TASK_ITER_TGID
;
228 pid
= pidfd_get_pid(linfo
->task
.pid_fd
, &flags
);
232 tgid
= pid_nr_ns(pid
, task_active_pid_ns(current
));
233 aux
->task
.pid
= tgid
;
240 static const struct seq_operations task_seq_ops
= {
241 .start
= task_seq_start
,
242 .next
= task_seq_next
,
243 .stop
= task_seq_stop
,
244 .show
= task_seq_show
,
247 struct bpf_iter_seq_task_file_info
{
248 /* The first field must be struct bpf_iter_seq_task_common.
249 * this is assumed by {init, fini}_seq_pidns() callback functions.
251 struct bpf_iter_seq_task_common common
;
252 struct task_struct
*task
;
258 task_file_seq_get_next(struct bpf_iter_seq_task_file_info
*info
)
260 u32 saved_tid
= info
->tid
;
261 struct task_struct
*curr_task
;
262 unsigned int curr_fd
= info
->fd
;
265 /* If this function returns a non-NULL file object,
266 * it held a reference to the task/file.
267 * Otherwise, it does not hold any reference.
271 curr_task
= info
->task
;
274 curr_task
= task_seq_get_next(&info
->common
, &info
->tid
, true);
281 info
->task
= curr_task
;
282 if (saved_tid
== info
->tid
)
288 f
= fget_task_next(curr_task
, &curr_fd
);
295 /* the current task is done, go to the next task */
296 put_task_struct(curr_task
);
298 if (info
->common
.type
== BPF_TASK_ITER_TID
) {
305 saved_tid
= ++(info
->tid
);
309 static void *task_file_seq_start(struct seq_file
*seq
, loff_t
*pos
)
311 struct bpf_iter_seq_task_file_info
*info
= seq
->private;
315 file
= task_file_seq_get_next(info
);
316 if (file
&& *pos
== 0)
322 static void *task_file_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
324 struct bpf_iter_seq_task_file_info
*info
= seq
->private;
328 fput((struct file
*)v
);
329 return task_file_seq_get_next(info
);
332 struct bpf_iter__task_file
{
333 __bpf_md_ptr(struct bpf_iter_meta
*, meta
);
334 __bpf_md_ptr(struct task_struct
*, task
);
336 __bpf_md_ptr(struct file
*, file
);
339 DEFINE_BPF_ITER_FUNC(task_file
, struct bpf_iter_meta
*meta
,
340 struct task_struct
*task
, u32 fd
,
343 static int __task_file_seq_show(struct seq_file
*seq
, struct file
*file
,
346 struct bpf_iter_seq_task_file_info
*info
= seq
->private;
347 struct bpf_iter__task_file ctx
;
348 struct bpf_iter_meta meta
;
349 struct bpf_prog
*prog
;
352 prog
= bpf_iter_get_info(&meta
, in_stop
);
357 ctx
.task
= info
->task
;
360 return bpf_iter_run_prog(prog
, &ctx
);
363 static int task_file_seq_show(struct seq_file
*seq
, void *v
)
365 return __task_file_seq_show(seq
, v
, false);
368 static void task_file_seq_stop(struct seq_file
*seq
, void *v
)
370 struct bpf_iter_seq_task_file_info
*info
= seq
->private;
373 (void)__task_file_seq_show(seq
, v
, true);
375 fput((struct file
*)v
);
376 put_task_struct(info
->task
);
381 static int init_seq_pidns(void *priv_data
, struct bpf_iter_aux_info
*aux
)
383 struct bpf_iter_seq_task_common
*common
= priv_data
;
385 common
->ns
= get_pid_ns(task_active_pid_ns(current
));
386 common
->type
= aux
->task
.type
;
387 common
->pid
= aux
->task
.pid
;
392 static void fini_seq_pidns(void *priv_data
)
394 struct bpf_iter_seq_task_common
*common
= priv_data
;
396 put_pid_ns(common
->ns
);
399 static const struct seq_operations task_file_seq_ops
= {
400 .start
= task_file_seq_start
,
401 .next
= task_file_seq_next
,
402 .stop
= task_file_seq_stop
,
403 .show
= task_file_seq_show
,
406 struct bpf_iter_seq_task_vma_info
{
407 /* The first field must be struct bpf_iter_seq_task_common.
408 * this is assumed by {init, fini}_seq_pidns() callback functions.
410 struct bpf_iter_seq_task_common common
;
411 struct task_struct
*task
;
412 struct mm_struct
*mm
;
413 struct vm_area_struct
*vma
;
415 unsigned long prev_vm_start
;
416 unsigned long prev_vm_end
;
419 enum bpf_task_vma_iter_find_op
{
420 task_vma_iter_first_vma
, /* use find_vma() with addr 0 */
421 task_vma_iter_next_vma
, /* use vma_next() with curr_vma */
422 task_vma_iter_find_vma
, /* use find_vma() to find next vma */
425 static struct vm_area_struct
*
426 task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info
*info
)
428 enum bpf_task_vma_iter_find_op op
;
429 struct vm_area_struct
*curr_vma
;
430 struct task_struct
*curr_task
;
431 struct mm_struct
*curr_mm
;
432 u32 saved_tid
= info
->tid
;
434 /* If this function returns a non-NULL vma, it holds a reference to
435 * the task_struct, holds a refcount on mm->mm_users, and holds
436 * read lock on vma->mm->mmap_lock.
437 * If this function returns NULL, it does not hold any reference or
441 curr_task
= info
->task
;
442 curr_vma
= info
->vma
;
444 /* In case of lock contention, drop mmap_lock to unblock
447 * After relock, call find(mm, prev_vm_end - 1) to find
448 * new vma to process.
450 * +------+------+-----------+
451 * | VMA1 | VMA2 | VMA3 |
452 * +------+------+-----------+
456 * For example, curr_vma == VMA2. Before unlock, we set
461 * There are a few cases:
463 * 1) VMA2 is freed, but VMA3 exists.
465 * find_vma() will return VMA3, just process VMA3.
467 * 2) VMA2 still exists.
469 * find_vma() will return VMA2, process VMA2->next.
471 * 3) no more vma in this mm.
473 * Process the next task.
475 * 4) find_vma() returns a different vma, VMA2'.
477 * 4.1) If VMA2 covers same range as VMA2', skip VMA2',
478 * because we already covered the range;
479 * 4.2) VMA2 and VMA2' covers different ranges, process
482 if (mmap_lock_is_contended(curr_mm
)) {
483 info
->prev_vm_start
= curr_vma
->vm_start
;
484 info
->prev_vm_end
= curr_vma
->vm_end
;
485 op
= task_vma_iter_find_vma
;
486 mmap_read_unlock(curr_mm
);
487 if (mmap_read_lock_killable(curr_mm
)) {
492 op
= task_vma_iter_next_vma
;
496 curr_task
= task_seq_get_next(&info
->common
, &info
->tid
, true);
502 if (saved_tid
!= info
->tid
) {
503 /* new task, process the first vma */
504 op
= task_vma_iter_first_vma
;
506 /* Found the same tid, which means the user space
507 * finished data in previous buffer and read more.
508 * We dropped mmap_lock before returning to user
509 * space, so it is necessary to use find_vma() to
510 * find the next vma to process.
512 op
= task_vma_iter_find_vma
;
515 curr_mm
= get_task_mm(curr_task
);
519 if (mmap_read_lock_killable(curr_mm
)) {
526 case task_vma_iter_first_vma
:
527 curr_vma
= find_vma(curr_mm
, 0);
529 case task_vma_iter_next_vma
:
530 curr_vma
= find_vma(curr_mm
, curr_vma
->vm_end
);
532 case task_vma_iter_find_vma
:
533 /* We dropped mmap_lock so it is necessary to use find_vma
534 * to find the next vma. This is similar to the mechanism
535 * in show_smaps_rollup().
537 curr_vma
= find_vma(curr_mm
, info
->prev_vm_end
- 1);
538 /* case 1) and 4.2) above just use curr_vma */
540 /* check for case 2) or case 4.1) above */
542 curr_vma
->vm_start
== info
->prev_vm_start
&&
543 curr_vma
->vm_end
== info
->prev_vm_end
)
544 curr_vma
= find_vma(curr_mm
, curr_vma
->vm_end
);
548 /* case 3) above, or case 2) 4.1) with vma->next == NULL */
549 mmap_read_unlock(curr_mm
);
553 info
->task
= curr_task
;
554 info
->vma
= curr_vma
;
559 if (info
->common
.type
== BPF_TASK_ITER_TID
)
562 put_task_struct(curr_task
);
570 put_task_struct(curr_task
);
577 static void *task_vma_seq_start(struct seq_file
*seq
, loff_t
*pos
)
579 struct bpf_iter_seq_task_vma_info
*info
= seq
->private;
580 struct vm_area_struct
*vma
;
582 vma
= task_vma_seq_get_next(info
);
583 if (vma
&& *pos
== 0)
589 static void *task_vma_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
591 struct bpf_iter_seq_task_vma_info
*info
= seq
->private;
594 return task_vma_seq_get_next(info
);
597 struct bpf_iter__task_vma
{
598 __bpf_md_ptr(struct bpf_iter_meta
*, meta
);
599 __bpf_md_ptr(struct task_struct
*, task
);
600 __bpf_md_ptr(struct vm_area_struct
*, vma
);
603 DEFINE_BPF_ITER_FUNC(task_vma
, struct bpf_iter_meta
*meta
,
604 struct task_struct
*task
, struct vm_area_struct
*vma
)
606 static int __task_vma_seq_show(struct seq_file
*seq
, bool in_stop
)
608 struct bpf_iter_seq_task_vma_info
*info
= seq
->private;
609 struct bpf_iter__task_vma ctx
;
610 struct bpf_iter_meta meta
;
611 struct bpf_prog
*prog
;
614 prog
= bpf_iter_get_info(&meta
, in_stop
);
619 ctx
.task
= info
->task
;
621 return bpf_iter_run_prog(prog
, &ctx
);
624 static int task_vma_seq_show(struct seq_file
*seq
, void *v
)
626 return __task_vma_seq_show(seq
, false);
629 static void task_vma_seq_stop(struct seq_file
*seq
, void *v
)
631 struct bpf_iter_seq_task_vma_info
*info
= seq
->private;
634 (void)__task_vma_seq_show(seq
, true);
636 /* info->vma has not been seen by the BPF program. If the
637 * user space reads more, task_vma_seq_get_next should
638 * return this vma again. Set prev_vm_start to ~0UL,
639 * so that we don't skip the vma returned by the next
640 * find_vma() (case task_vma_iter_find_vma in
641 * task_vma_seq_get_next()).
643 info
->prev_vm_start
= ~0UL;
644 info
->prev_vm_end
= info
->vma
->vm_end
;
645 mmap_read_unlock(info
->mm
);
648 put_task_struct(info
->task
);
653 static const struct seq_operations task_vma_seq_ops
= {
654 .start
= task_vma_seq_start
,
655 .next
= task_vma_seq_next
,
656 .stop
= task_vma_seq_stop
,
657 .show
= task_vma_seq_show
,
660 static const struct bpf_iter_seq_info task_seq_info
= {
661 .seq_ops
= &task_seq_ops
,
662 .init_seq_private
= init_seq_pidns
,
663 .fini_seq_private
= fini_seq_pidns
,
664 .seq_priv_size
= sizeof(struct bpf_iter_seq_task_info
),
667 static int bpf_iter_fill_link_info(const struct bpf_iter_aux_info
*aux
, struct bpf_link_info
*info
)
669 switch (aux
->task
.type
) {
670 case BPF_TASK_ITER_TID
:
671 info
->iter
.task
.tid
= aux
->task
.pid
;
673 case BPF_TASK_ITER_TGID
:
674 info
->iter
.task
.pid
= aux
->task
.pid
;
682 static void bpf_iter_task_show_fdinfo(const struct bpf_iter_aux_info
*aux
, struct seq_file
*seq
)
684 seq_printf(seq
, "task_type:\t%s\n", iter_task_type_names
[aux
->task
.type
]);
685 if (aux
->task
.type
== BPF_TASK_ITER_TID
)
686 seq_printf(seq
, "tid:\t%u\n", aux
->task
.pid
);
687 else if (aux
->task
.type
== BPF_TASK_ITER_TGID
)
688 seq_printf(seq
, "pid:\t%u\n", aux
->task
.pid
);
691 static struct bpf_iter_reg task_reg_info
= {
693 .attach_target
= bpf_iter_attach_task
,
694 .feature
= BPF_ITER_RESCHED
,
695 .ctx_arg_info_size
= 1,
697 { offsetof(struct bpf_iter__task
, task
),
698 PTR_TO_BTF_ID_OR_NULL
| PTR_TRUSTED
},
700 .seq_info
= &task_seq_info
,
701 .fill_link_info
= bpf_iter_fill_link_info
,
702 .show_fdinfo
= bpf_iter_task_show_fdinfo
,
705 static const struct bpf_iter_seq_info task_file_seq_info
= {
706 .seq_ops
= &task_file_seq_ops
,
707 .init_seq_private
= init_seq_pidns
,
708 .fini_seq_private
= fini_seq_pidns
,
709 .seq_priv_size
= sizeof(struct bpf_iter_seq_task_file_info
),
712 static struct bpf_iter_reg task_file_reg_info
= {
713 .target
= "task_file",
714 .attach_target
= bpf_iter_attach_task
,
715 .feature
= BPF_ITER_RESCHED
,
716 .ctx_arg_info_size
= 2,
718 { offsetof(struct bpf_iter__task_file
, task
),
719 PTR_TO_BTF_ID_OR_NULL
},
720 { offsetof(struct bpf_iter__task_file
, file
),
721 PTR_TO_BTF_ID_OR_NULL
},
723 .seq_info
= &task_file_seq_info
,
724 .fill_link_info
= bpf_iter_fill_link_info
,
725 .show_fdinfo
= bpf_iter_task_show_fdinfo
,
728 static const struct bpf_iter_seq_info task_vma_seq_info
= {
729 .seq_ops
= &task_vma_seq_ops
,
730 .init_seq_private
= init_seq_pidns
,
731 .fini_seq_private
= fini_seq_pidns
,
732 .seq_priv_size
= sizeof(struct bpf_iter_seq_task_vma_info
),
735 static struct bpf_iter_reg task_vma_reg_info
= {
736 .target
= "task_vma",
737 .attach_target
= bpf_iter_attach_task
,
738 .feature
= BPF_ITER_RESCHED
,
739 .ctx_arg_info_size
= 2,
741 { offsetof(struct bpf_iter__task_vma
, task
),
742 PTR_TO_BTF_ID_OR_NULL
},
743 { offsetof(struct bpf_iter__task_vma
, vma
),
744 PTR_TO_BTF_ID_OR_NULL
},
746 .seq_info
= &task_vma_seq_info
,
747 .fill_link_info
= bpf_iter_fill_link_info
,
748 .show_fdinfo
= bpf_iter_task_show_fdinfo
,
751 BPF_CALL_5(bpf_find_vma
, struct task_struct
*, task
, u64
, start
,
752 bpf_callback_t
, callback_fn
, void *, callback_ctx
, u64
, flags
)
754 struct mmap_unlock_irq_work
*work
= NULL
;
755 struct vm_area_struct
*vma
;
756 bool irq_work_busy
= false;
757 struct mm_struct
*mm
;
770 irq_work_busy
= bpf_mmap_unlock_get_irq_work(&work
);
772 if (irq_work_busy
|| !mmap_read_trylock(mm
))
775 vma
= find_vma(mm
, start
);
777 if (vma
&& vma
->vm_start
<= start
&& vma
->vm_end
> start
) {
778 callback_fn((u64
)(long)task
, (u64
)(long)vma
,
779 (u64
)(long)callback_ctx
, 0, 0);
782 bpf_mmap_unlock_mm(work
, mm
);
786 const struct bpf_func_proto bpf_find_vma_proto
= {
787 .func
= bpf_find_vma
,
788 .ret_type
= RET_INTEGER
,
789 .arg1_type
= ARG_PTR_TO_BTF_ID
,
790 .arg1_btf_id
= &btf_tracing_ids
[BTF_TRACING_TYPE_TASK
],
791 .arg2_type
= ARG_ANYTHING
,
792 .arg3_type
= ARG_PTR_TO_FUNC
,
793 .arg4_type
= ARG_PTR_TO_STACK_OR_NULL
,
794 .arg5_type
= ARG_ANYTHING
,
797 struct bpf_iter_task_vma_kern_data
{
798 struct task_struct
*task
;
799 struct mm_struct
*mm
;
800 struct mmap_unlock_irq_work
*work
;
801 struct vma_iterator vmi
;
804 struct bpf_iter_task_vma
{
805 /* opaque iterator state; having __u64 here allows to preserve correct
806 * alignment requirements in vmlinux.h, generated from BTF
809 } __attribute__((aligned(8)));
811 /* Non-opaque version of bpf_iter_task_vma */
812 struct bpf_iter_task_vma_kern
{
813 struct bpf_iter_task_vma_kern_data
*data
;
814 } __attribute__((aligned(8)));
816 __bpf_kfunc_start_defs();
818 __bpf_kfunc
int bpf_iter_task_vma_new(struct bpf_iter_task_vma
*it
,
819 struct task_struct
*task
, u64 addr
)
821 struct bpf_iter_task_vma_kern
*kit
= (void *)it
;
822 bool irq_work_busy
= false;
825 BUILD_BUG_ON(sizeof(struct bpf_iter_task_vma_kern
) != sizeof(struct bpf_iter_task_vma
));
826 BUILD_BUG_ON(__alignof__(struct bpf_iter_task_vma_kern
) != __alignof__(struct bpf_iter_task_vma
));
828 /* is_iter_reg_valid_uninit guarantees that kit hasn't been initialized
829 * before, so non-NULL kit->data doesn't point to previously
830 * bpf_mem_alloc'd bpf_iter_task_vma_kern_data
832 kit
->data
= bpf_mem_alloc(&bpf_global_ma
, sizeof(struct bpf_iter_task_vma_kern_data
));
836 kit
->data
->task
= get_task_struct(task
);
837 kit
->data
->mm
= task
->mm
;
838 if (!kit
->data
->mm
) {
840 goto err_cleanup_iter
;
843 /* kit->data->work == NULL is valid after bpf_mmap_unlock_get_irq_work */
844 irq_work_busy
= bpf_mmap_unlock_get_irq_work(&kit
->data
->work
);
845 if (irq_work_busy
|| !mmap_read_trylock(kit
->data
->mm
)) {
847 goto err_cleanup_iter
;
850 vma_iter_init(&kit
->data
->vmi
, kit
->data
->mm
, addr
);
855 put_task_struct(kit
->data
->task
);
856 bpf_mem_free(&bpf_global_ma
, kit
->data
);
857 /* NULL kit->data signals failed bpf_iter_task_vma initialization */
862 __bpf_kfunc
struct vm_area_struct
*bpf_iter_task_vma_next(struct bpf_iter_task_vma
*it
)
864 struct bpf_iter_task_vma_kern
*kit
= (void *)it
;
866 if (!kit
->data
) /* bpf_iter_task_vma_new failed */
868 return vma_next(&kit
->data
->vmi
);
871 __bpf_kfunc
void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma
*it
)
873 struct bpf_iter_task_vma_kern
*kit
= (void *)it
;
876 bpf_mmap_unlock_mm(kit
->data
->work
, kit
->data
->mm
);
877 put_task_struct(kit
->data
->task
);
878 bpf_mem_free(&bpf_global_ma
, kit
->data
);
882 __bpf_kfunc_end_defs();
884 #ifdef CONFIG_CGROUPS
886 struct bpf_iter_css_task
{
888 } __attribute__((aligned(8)));
890 struct bpf_iter_css_task_kern
{
891 struct css_task_iter
*css_it
;
892 } __attribute__((aligned(8)));
894 __bpf_kfunc_start_defs();
896 __bpf_kfunc
int bpf_iter_css_task_new(struct bpf_iter_css_task
*it
,
897 struct cgroup_subsys_state
*css
, unsigned int flags
)
899 struct bpf_iter_css_task_kern
*kit
= (void *)it
;
901 BUILD_BUG_ON(sizeof(struct bpf_iter_css_task_kern
) != sizeof(struct bpf_iter_css_task
));
902 BUILD_BUG_ON(__alignof__(struct bpf_iter_css_task_kern
) !=
903 __alignof__(struct bpf_iter_css_task
));
906 case CSS_TASK_ITER_PROCS
| CSS_TASK_ITER_THREADED
:
907 case CSS_TASK_ITER_PROCS
:
914 kit
->css_it
= bpf_mem_alloc(&bpf_global_ma
, sizeof(struct css_task_iter
));
917 css_task_iter_start(css
, flags
, kit
->css_it
);
921 __bpf_kfunc
struct task_struct
*bpf_iter_css_task_next(struct bpf_iter_css_task
*it
)
923 struct bpf_iter_css_task_kern
*kit
= (void *)it
;
927 return css_task_iter_next(kit
->css_it
);
930 __bpf_kfunc
void bpf_iter_css_task_destroy(struct bpf_iter_css_task
*it
)
932 struct bpf_iter_css_task_kern
*kit
= (void *)it
;
936 css_task_iter_end(kit
->css_it
);
937 bpf_mem_free(&bpf_global_ma
, kit
->css_it
);
940 __bpf_kfunc_end_defs();
942 #endif /* CONFIG_CGROUPS */
944 struct bpf_iter_task
{
946 } __attribute__((aligned(8)));
948 struct bpf_iter_task_kern
{
949 struct task_struct
*task
;
950 struct task_struct
*pos
;
952 } __attribute__((aligned(8)));
955 /* all process in the system */
956 BPF_TASK_ITER_ALL_PROCS
,
957 /* all threads in the system */
958 BPF_TASK_ITER_ALL_THREADS
,
959 /* all threads of a specific process */
960 BPF_TASK_ITER_PROC_THREADS
963 __bpf_kfunc_start_defs();
965 __bpf_kfunc
int bpf_iter_task_new(struct bpf_iter_task
*it
,
966 struct task_struct
*task__nullable
, unsigned int flags
)
968 struct bpf_iter_task_kern
*kit
= (void *)it
;
970 BUILD_BUG_ON(sizeof(struct bpf_iter_task_kern
) > sizeof(struct bpf_iter_task
));
971 BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern
) !=
972 __alignof__(struct bpf_iter_task
));
977 case BPF_TASK_ITER_ALL_THREADS
:
978 case BPF_TASK_ITER_ALL_PROCS
:
980 case BPF_TASK_ITER_PROC_THREADS
:
988 if (flags
== BPF_TASK_ITER_PROC_THREADS
)
989 kit
->task
= task__nullable
;
991 kit
->task
= &init_task
;
992 kit
->pos
= kit
->task
;
997 __bpf_kfunc
struct task_struct
*bpf_iter_task_next(struct bpf_iter_task
*it
)
999 struct bpf_iter_task_kern
*kit
= (void *)it
;
1000 struct task_struct
*pos
;
1009 if (flags
== BPF_TASK_ITER_ALL_PROCS
)
1012 kit
->pos
= __next_thread(kit
->pos
);
1013 if (kit
->pos
|| flags
== BPF_TASK_ITER_PROC_THREADS
)
1017 kit
->task
= next_task(kit
->task
);
1018 if (kit
->task
== &init_task
)
1021 kit
->pos
= kit
->task
;
1026 __bpf_kfunc
void bpf_iter_task_destroy(struct bpf_iter_task
*it
)
1030 __bpf_kfunc_end_defs();
1032 DEFINE_PER_CPU(struct mmap_unlock_irq_work
, mmap_unlock_work
);
1034 static void do_mmap_read_unlock(struct irq_work
*entry
)
1036 struct mmap_unlock_irq_work
*work
;
1038 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT
)))
1041 work
= container_of(entry
, struct mmap_unlock_irq_work
, irq_work
);
1042 mmap_read_unlock_non_owner(work
->mm
);
1045 static int __init
task_iter_init(void)
1047 struct mmap_unlock_irq_work
*work
;
1050 for_each_possible_cpu(cpu
) {
1051 work
= per_cpu_ptr(&mmap_unlock_work
, cpu
);
1052 init_irq_work(&work
->irq_work
, do_mmap_read_unlock
);
1055 task_reg_info
.ctx_arg_info
[0].btf_id
= btf_tracing_ids
[BTF_TRACING_TYPE_TASK
];
1056 ret
= bpf_iter_reg_target(&task_reg_info
);
1060 task_file_reg_info
.ctx_arg_info
[0].btf_id
= btf_tracing_ids
[BTF_TRACING_TYPE_TASK
];
1061 task_file_reg_info
.ctx_arg_info
[1].btf_id
= btf_tracing_ids
[BTF_TRACING_TYPE_FILE
];
1062 ret
= bpf_iter_reg_target(&task_file_reg_info
);
1066 task_vma_reg_info
.ctx_arg_info
[0].btf_id
= btf_tracing_ids
[BTF_TRACING_TYPE_TASK
];
1067 task_vma_reg_info
.ctx_arg_info
[1].btf_id
= btf_tracing_ids
[BTF_TRACING_TYPE_VMA
];
1068 return bpf_iter_reg_target(&task_vma_reg_info
);
1070 late_initcall(task_iter_init
);