1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
5 #include <linux/rcupdate.h>
6 #include <linux/random.h>
8 #include <linux/topology.h>
9 #include <linux/ktime.h>
10 #include <linux/sched.h>
11 #include <linux/uidgid.h>
12 #include <linux/filter.h>
13 #include <linux/ctype.h>
14 #include <linux/jiffies.h>
15 #include <linux/pid_namespace.h>
16 #include <linux/proc_ns.h>
18 #include "../../lib/kstrtox.h"
20 /* If kernel subsystem is allowing eBPF programs to call this function,
21 * inside its own verifier_ops->get_func_proto() callback it should return
22 * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
24 * Different map implementations will rely on rcu in map methods
25 * lookup/update/delete, therefore eBPF programs must run under rcu lock
26 * if program is allowed to access maps, so check rcu_read_lock_held in
27 * all three functions.
29 BPF_CALL_2(bpf_map_lookup_elem
, struct bpf_map
*, map
, void *, key
)
31 WARN_ON_ONCE(!rcu_read_lock_held());
32 return (unsigned long) map
->ops
->map_lookup_elem(map
, key
);
35 const struct bpf_func_proto bpf_map_lookup_elem_proto
= {
36 .func
= bpf_map_lookup_elem
,
39 .ret_type
= RET_PTR_TO_MAP_VALUE_OR_NULL
,
40 .arg1_type
= ARG_CONST_MAP_PTR
,
41 .arg2_type
= ARG_PTR_TO_MAP_KEY
,
44 BPF_CALL_4(bpf_map_update_elem
, struct bpf_map
*, map
, void *, key
,
45 void *, value
, u64
, flags
)
47 WARN_ON_ONCE(!rcu_read_lock_held());
48 return map
->ops
->map_update_elem(map
, key
, value
, flags
);
51 const struct bpf_func_proto bpf_map_update_elem_proto
= {
52 .func
= bpf_map_update_elem
,
55 .ret_type
= RET_INTEGER
,
56 .arg1_type
= ARG_CONST_MAP_PTR
,
57 .arg2_type
= ARG_PTR_TO_MAP_KEY
,
58 .arg3_type
= ARG_PTR_TO_MAP_VALUE
,
59 .arg4_type
= ARG_ANYTHING
,
62 BPF_CALL_2(bpf_map_delete_elem
, struct bpf_map
*, map
, void *, key
)
64 WARN_ON_ONCE(!rcu_read_lock_held());
65 return map
->ops
->map_delete_elem(map
, key
);
68 const struct bpf_func_proto bpf_map_delete_elem_proto
= {
69 .func
= bpf_map_delete_elem
,
72 .ret_type
= RET_INTEGER
,
73 .arg1_type
= ARG_CONST_MAP_PTR
,
74 .arg2_type
= ARG_PTR_TO_MAP_KEY
,
77 BPF_CALL_3(bpf_map_push_elem
, struct bpf_map
*, map
, void *, value
, u64
, flags
)
79 return map
->ops
->map_push_elem(map
, value
, flags
);
82 const struct bpf_func_proto bpf_map_push_elem_proto
= {
83 .func
= bpf_map_push_elem
,
86 .ret_type
= RET_INTEGER
,
87 .arg1_type
= ARG_CONST_MAP_PTR
,
88 .arg2_type
= ARG_PTR_TO_MAP_VALUE
,
89 .arg3_type
= ARG_ANYTHING
,
92 BPF_CALL_2(bpf_map_pop_elem
, struct bpf_map
*, map
, void *, value
)
94 return map
->ops
->map_pop_elem(map
, value
);
97 const struct bpf_func_proto bpf_map_pop_elem_proto
= {
98 .func
= bpf_map_pop_elem
,
100 .ret_type
= RET_INTEGER
,
101 .arg1_type
= ARG_CONST_MAP_PTR
,
102 .arg2_type
= ARG_PTR_TO_UNINIT_MAP_VALUE
,
105 BPF_CALL_2(bpf_map_peek_elem
, struct bpf_map
*, map
, void *, value
)
107 return map
->ops
->map_peek_elem(map
, value
);
110 const struct bpf_func_proto bpf_map_peek_elem_proto
= {
111 .func
= bpf_map_pop_elem
,
113 .ret_type
= RET_INTEGER
,
114 .arg1_type
= ARG_CONST_MAP_PTR
,
115 .arg2_type
= ARG_PTR_TO_UNINIT_MAP_VALUE
,
118 const struct bpf_func_proto bpf_get_prandom_u32_proto
= {
119 .func
= bpf_user_rnd_u32
,
121 .ret_type
= RET_INTEGER
,
124 BPF_CALL_0(bpf_get_smp_processor_id
)
126 return smp_processor_id();
129 const struct bpf_func_proto bpf_get_smp_processor_id_proto
= {
130 .func
= bpf_get_smp_processor_id
,
132 .ret_type
= RET_INTEGER
,
135 BPF_CALL_0(bpf_get_numa_node_id
)
137 return numa_node_id();
140 const struct bpf_func_proto bpf_get_numa_node_id_proto
= {
141 .func
= bpf_get_numa_node_id
,
143 .ret_type
= RET_INTEGER
,
146 BPF_CALL_0(bpf_ktime_get_ns
)
148 /* NMI safe access to clock monotonic */
149 return ktime_get_mono_fast_ns();
152 const struct bpf_func_proto bpf_ktime_get_ns_proto
= {
153 .func
= bpf_ktime_get_ns
,
155 .ret_type
= RET_INTEGER
,
158 BPF_CALL_0(bpf_ktime_get_boot_ns
)
160 /* NMI safe access to clock boottime */
161 return ktime_get_boot_fast_ns();
164 const struct bpf_func_proto bpf_ktime_get_boot_ns_proto
= {
165 .func
= bpf_ktime_get_boot_ns
,
167 .ret_type
= RET_INTEGER
,
170 BPF_CALL_0(bpf_ktime_get_coarse_ns
)
172 return ktime_get_coarse_ns();
175 const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto
= {
176 .func
= bpf_ktime_get_coarse_ns
,
178 .ret_type
= RET_INTEGER
,
181 BPF_CALL_0(bpf_get_current_pid_tgid
)
183 struct task_struct
*task
= current
;
188 return (u64
) task
->tgid
<< 32 | task
->pid
;
191 const struct bpf_func_proto bpf_get_current_pid_tgid_proto
= {
192 .func
= bpf_get_current_pid_tgid
,
194 .ret_type
= RET_INTEGER
,
197 BPF_CALL_0(bpf_get_current_uid_gid
)
199 struct task_struct
*task
= current
;
206 current_uid_gid(&uid
, &gid
);
207 return (u64
) from_kgid(&init_user_ns
, gid
) << 32 |
208 from_kuid(&init_user_ns
, uid
);
211 const struct bpf_func_proto bpf_get_current_uid_gid_proto
= {
212 .func
= bpf_get_current_uid_gid
,
214 .ret_type
= RET_INTEGER
,
217 BPF_CALL_2(bpf_get_current_comm
, char *, buf
, u32
, size
)
219 struct task_struct
*task
= current
;
224 strncpy(buf
, task
->comm
, size
);
226 /* Verifier guarantees that size > 0. For task->comm exceeding
227 * size, guarantee that buf is %NUL-terminated. Unconditionally
228 * done here to save the size test.
233 memset(buf
, 0, size
);
237 const struct bpf_func_proto bpf_get_current_comm_proto
= {
238 .func
= bpf_get_current_comm
,
240 .ret_type
= RET_INTEGER
,
241 .arg1_type
= ARG_PTR_TO_UNINIT_MEM
,
242 .arg2_type
= ARG_CONST_SIZE
,
245 #if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
247 static inline void __bpf_spin_lock(struct bpf_spin_lock
*lock
)
249 arch_spinlock_t
*l
= (void *)lock
;
252 arch_spinlock_t lock
;
253 } u
= { .lock
= __ARCH_SPIN_LOCK_UNLOCKED
};
255 compiletime_assert(u
.val
== 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
256 BUILD_BUG_ON(sizeof(*l
) != sizeof(__u32
));
257 BUILD_BUG_ON(sizeof(*lock
) != sizeof(__u32
));
261 static inline void __bpf_spin_unlock(struct bpf_spin_lock
*lock
)
263 arch_spinlock_t
*l
= (void *)lock
;
270 static inline void __bpf_spin_lock(struct bpf_spin_lock
*lock
)
272 atomic_t
*l
= (void *)lock
;
274 BUILD_BUG_ON(sizeof(*l
) != sizeof(*lock
));
276 atomic_cond_read_relaxed(l
, !VAL
);
277 } while (atomic_xchg(l
, 1));
280 static inline void __bpf_spin_unlock(struct bpf_spin_lock
*lock
)
282 atomic_t
*l
= (void *)lock
;
284 atomic_set_release(l
, 0);
289 static DEFINE_PER_CPU(unsigned long, irqsave_flags
);
291 notrace
BPF_CALL_1(bpf_spin_lock
, struct bpf_spin_lock
*, lock
)
295 local_irq_save(flags
);
296 __bpf_spin_lock(lock
);
297 __this_cpu_write(irqsave_flags
, flags
);
301 const struct bpf_func_proto bpf_spin_lock_proto
= {
302 .func
= bpf_spin_lock
,
304 .ret_type
= RET_VOID
,
305 .arg1_type
= ARG_PTR_TO_SPIN_LOCK
,
308 notrace
BPF_CALL_1(bpf_spin_unlock
, struct bpf_spin_lock
*, lock
)
312 flags
= __this_cpu_read(irqsave_flags
);
313 __bpf_spin_unlock(lock
);
314 local_irq_restore(flags
);
318 const struct bpf_func_proto bpf_spin_unlock_proto
= {
319 .func
= bpf_spin_unlock
,
321 .ret_type
= RET_VOID
,
322 .arg1_type
= ARG_PTR_TO_SPIN_LOCK
,
325 void copy_map_value_locked(struct bpf_map
*map
, void *dst
, void *src
,
328 struct bpf_spin_lock
*lock
;
331 lock
= src
+ map
->spin_lock_off
;
333 lock
= dst
+ map
->spin_lock_off
;
335 ____bpf_spin_lock(lock
);
336 copy_map_value(map
, dst
, src
);
337 ____bpf_spin_unlock(lock
);
341 BPF_CALL_0(bpf_jiffies64
)
343 return get_jiffies_64();
346 const struct bpf_func_proto bpf_jiffies64_proto
= {
347 .func
= bpf_jiffies64
,
349 .ret_type
= RET_INTEGER
,
352 #ifdef CONFIG_CGROUPS
353 BPF_CALL_0(bpf_get_current_cgroup_id
)
355 struct cgroup
*cgrp
= task_dfl_cgroup(current
);
357 return cgroup_id(cgrp
);
360 const struct bpf_func_proto bpf_get_current_cgroup_id_proto
= {
361 .func
= bpf_get_current_cgroup_id
,
363 .ret_type
= RET_INTEGER
,
366 BPF_CALL_1(bpf_get_current_ancestor_cgroup_id
, int, ancestor_level
)
368 struct cgroup
*cgrp
= task_dfl_cgroup(current
);
369 struct cgroup
*ancestor
;
371 ancestor
= cgroup_ancestor(cgrp
, ancestor_level
);
374 return cgroup_id(ancestor
);
377 const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto
= {
378 .func
= bpf_get_current_ancestor_cgroup_id
,
380 .ret_type
= RET_INTEGER
,
381 .arg1_type
= ARG_ANYTHING
,
384 #ifdef CONFIG_CGROUP_BPF
385 DECLARE_PER_CPU(struct bpf_cgroup_storage
*,
386 bpf_cgroup_storage
[MAX_BPF_CGROUP_STORAGE_TYPE
]);
388 BPF_CALL_2(bpf_get_local_storage
, struct bpf_map
*, map
, u64
, flags
)
390 /* flags argument is not used now,
391 * but provides an ability to extend the API.
392 * verifier checks that its value is correct.
394 enum bpf_cgroup_storage_type stype
= cgroup_storage_type(map
);
395 struct bpf_cgroup_storage
*storage
;
398 storage
= this_cpu_read(bpf_cgroup_storage
[stype
]);
400 if (stype
== BPF_CGROUP_STORAGE_SHARED
)
401 ptr
= &READ_ONCE(storage
->buf
)->data
[0];
403 ptr
= this_cpu_ptr(storage
->percpu_buf
);
405 return (unsigned long)ptr
;
408 const struct bpf_func_proto bpf_get_local_storage_proto
= {
409 .func
= bpf_get_local_storage
,
411 .ret_type
= RET_PTR_TO_MAP_VALUE
,
412 .arg1_type
= ARG_CONST_MAP_PTR
,
413 .arg2_type
= ARG_ANYTHING
,
417 #define BPF_STRTOX_BASE_MASK 0x1F
419 static int __bpf_strtoull(const char *buf
, size_t buf_len
, u64 flags
,
420 unsigned long long *res
, bool *is_negative
)
422 unsigned int base
= flags
& BPF_STRTOX_BASE_MASK
;
423 const char *cur_buf
= buf
;
424 size_t cur_len
= buf_len
;
425 unsigned int consumed
;
429 if (!buf
|| !buf_len
|| !res
|| !is_negative
)
432 if (base
!= 0 && base
!= 8 && base
!= 10 && base
!= 16)
435 if (flags
& ~BPF_STRTOX_BASE_MASK
)
438 while (cur_buf
< buf
+ buf_len
&& isspace(*cur_buf
))
441 *is_negative
= (cur_buf
< buf
+ buf_len
&& *cur_buf
== '-');
445 consumed
= cur_buf
- buf
;
450 cur_len
= min(cur_len
, sizeof(str
) - 1);
451 memcpy(str
, cur_buf
, cur_len
);
455 cur_buf
= _parse_integer_fixup_radix(cur_buf
, &base
);
456 val_len
= _parse_integer(cur_buf
, base
, res
);
458 if (val_len
& KSTRTOX_OVERFLOW
)
465 consumed
+= cur_buf
- str
;
470 static int __bpf_strtoll(const char *buf
, size_t buf_len
, u64 flags
,
473 unsigned long long _res
;
477 err
= __bpf_strtoull(buf
, buf_len
, flags
, &_res
, &is_negative
);
481 if ((long long)-_res
> 0)
485 if ((long long)_res
< 0)
492 BPF_CALL_4(bpf_strtol
, const char *, buf
, size_t, buf_len
, u64
, flags
,
498 err
= __bpf_strtoll(buf
, buf_len
, flags
, &_res
);
501 if (_res
!= (long)_res
)
507 const struct bpf_func_proto bpf_strtol_proto
= {
510 .ret_type
= RET_INTEGER
,
511 .arg1_type
= ARG_PTR_TO_MEM
,
512 .arg2_type
= ARG_CONST_SIZE
,
513 .arg3_type
= ARG_ANYTHING
,
514 .arg4_type
= ARG_PTR_TO_LONG
,
517 BPF_CALL_4(bpf_strtoul
, const char *, buf
, size_t, buf_len
, u64
, flags
,
518 unsigned long *, res
)
520 unsigned long long _res
;
524 err
= __bpf_strtoull(buf
, buf_len
, flags
, &_res
, &is_negative
);
529 if (_res
!= (unsigned long)_res
)
535 const struct bpf_func_proto bpf_strtoul_proto
= {
538 .ret_type
= RET_INTEGER
,
539 .arg1_type
= ARG_PTR_TO_MEM
,
540 .arg2_type
= ARG_CONST_SIZE
,
541 .arg3_type
= ARG_ANYTHING
,
542 .arg4_type
= ARG_PTR_TO_LONG
,
546 BPF_CALL_4(bpf_get_ns_current_pid_tgid
, u64
, dev
, u64
, ino
,
547 struct bpf_pidns_info
*, nsdata
, u32
, size
)
549 struct task_struct
*task
= current
;
550 struct pid_namespace
*pidns
;
553 if (unlikely(size
!= sizeof(struct bpf_pidns_info
)))
556 if (unlikely((u64
)(dev_t
)dev
!= dev
))
562 pidns
= task_active_pid_ns(task
);
563 if (unlikely(!pidns
)) {
568 if (!ns_match(&pidns
->ns
, (dev_t
)dev
, ino
))
571 nsdata
->pid
= task_pid_nr_ns(task
, pidns
);
572 nsdata
->tgid
= task_tgid_nr_ns(task
, pidns
);
575 memset((void *)nsdata
, 0, (size_t) size
);
579 const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto
= {
580 .func
= bpf_get_ns_current_pid_tgid
,
582 .ret_type
= RET_INTEGER
,
583 .arg1_type
= ARG_ANYTHING
,
584 .arg2_type
= ARG_ANYTHING
,
585 .arg3_type
= ARG_PTR_TO_UNINIT_MEM
,
586 .arg4_type
= ARG_CONST_SIZE
,
589 static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto
= {
590 .func
= bpf_get_raw_cpu_id
,
592 .ret_type
= RET_INTEGER
,
595 BPF_CALL_5(bpf_event_output_data
, void *, ctx
, struct bpf_map
*, map
,
596 u64
, flags
, void *, data
, u64
, size
)
598 if (unlikely(flags
& ~(BPF_F_INDEX_MASK
)))
601 return bpf_event_output(map
, flags
, data
, size
, NULL
, 0, NULL
);
604 const struct bpf_func_proto bpf_event_output_data_proto
= {
605 .func
= bpf_event_output_data
,
607 .ret_type
= RET_INTEGER
,
608 .arg1_type
= ARG_PTR_TO_CTX
,
609 .arg2_type
= ARG_CONST_MAP_PTR
,
610 .arg3_type
= ARG_ANYTHING
,
611 .arg4_type
= ARG_PTR_TO_MEM
,
612 .arg5_type
= ARG_CONST_SIZE_OR_ZERO
,
615 BPF_CALL_3(bpf_copy_from_user
, void *, dst
, u32
, size
,
616 const void __user
*, user_ptr
)
618 int ret
= copy_from_user(dst
, user_ptr
, size
);
621 memset(dst
, 0, size
);
628 const struct bpf_func_proto bpf_copy_from_user_proto
= {
629 .func
= bpf_copy_from_user
,
631 .ret_type
= RET_INTEGER
,
632 .arg1_type
= ARG_PTR_TO_UNINIT_MEM
,
633 .arg2_type
= ARG_CONST_SIZE_OR_ZERO
,
634 .arg3_type
= ARG_ANYTHING
,
637 BPF_CALL_2(bpf_per_cpu_ptr
, const void *, ptr
, u32
, cpu
)
639 if (cpu
>= nr_cpu_ids
)
640 return (unsigned long)NULL
;
642 return (unsigned long)per_cpu_ptr((const void __percpu
*)ptr
, cpu
);
645 const struct bpf_func_proto bpf_per_cpu_ptr_proto
= {
646 .func
= bpf_per_cpu_ptr
,
648 .ret_type
= RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL
,
649 .arg1_type
= ARG_PTR_TO_PERCPU_BTF_ID
,
650 .arg2_type
= ARG_ANYTHING
,
653 BPF_CALL_1(bpf_this_cpu_ptr
, const void *, percpu_ptr
)
655 return (unsigned long)this_cpu_ptr((const void __percpu
*)percpu_ptr
);
658 const struct bpf_func_proto bpf_this_cpu_ptr_proto
= {
659 .func
= bpf_this_cpu_ptr
,
661 .ret_type
= RET_PTR_TO_MEM_OR_BTF_ID
,
662 .arg1_type
= ARG_PTR_TO_PERCPU_BTF_ID
,
665 const struct bpf_func_proto bpf_get_current_task_proto __weak
;
666 const struct bpf_func_proto bpf_probe_read_user_proto __weak
;
667 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak
;
668 const struct bpf_func_proto bpf_probe_read_kernel_proto __weak
;
669 const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak
;
671 const struct bpf_func_proto
*
672 bpf_base_func_proto(enum bpf_func_id func_id
)
675 case BPF_FUNC_map_lookup_elem
:
676 return &bpf_map_lookup_elem_proto
;
677 case BPF_FUNC_map_update_elem
:
678 return &bpf_map_update_elem_proto
;
679 case BPF_FUNC_map_delete_elem
:
680 return &bpf_map_delete_elem_proto
;
681 case BPF_FUNC_map_push_elem
:
682 return &bpf_map_push_elem_proto
;
683 case BPF_FUNC_map_pop_elem
:
684 return &bpf_map_pop_elem_proto
;
685 case BPF_FUNC_map_peek_elem
:
686 return &bpf_map_peek_elem_proto
;
687 case BPF_FUNC_get_prandom_u32
:
688 return &bpf_get_prandom_u32_proto
;
689 case BPF_FUNC_get_smp_processor_id
:
690 return &bpf_get_raw_smp_processor_id_proto
;
691 case BPF_FUNC_get_numa_node_id
:
692 return &bpf_get_numa_node_id_proto
;
693 case BPF_FUNC_tail_call
:
694 return &bpf_tail_call_proto
;
695 case BPF_FUNC_ktime_get_ns
:
696 return &bpf_ktime_get_ns_proto
;
697 case BPF_FUNC_ktime_get_boot_ns
:
698 return &bpf_ktime_get_boot_ns_proto
;
699 case BPF_FUNC_ktime_get_coarse_ns
:
700 return &bpf_ktime_get_coarse_ns_proto
;
701 case BPF_FUNC_ringbuf_output
:
702 return &bpf_ringbuf_output_proto
;
703 case BPF_FUNC_ringbuf_reserve
:
704 return &bpf_ringbuf_reserve_proto
;
705 case BPF_FUNC_ringbuf_submit
:
706 return &bpf_ringbuf_submit_proto
;
707 case BPF_FUNC_ringbuf_discard
:
708 return &bpf_ringbuf_discard_proto
;
709 case BPF_FUNC_ringbuf_query
:
710 return &bpf_ringbuf_query_proto
;
719 case BPF_FUNC_spin_lock
:
720 return &bpf_spin_lock_proto
;
721 case BPF_FUNC_spin_unlock
:
722 return &bpf_spin_unlock_proto
;
723 case BPF_FUNC_trace_printk
:
724 if (!perfmon_capable())
726 return bpf_get_trace_printk_proto();
727 case BPF_FUNC_snprintf_btf
:
728 if (!perfmon_capable())
730 return &bpf_snprintf_btf_proto
;
731 case BPF_FUNC_jiffies64
:
732 return &bpf_jiffies64_proto
;
733 case BPF_FUNC_per_cpu_ptr
:
734 return &bpf_per_cpu_ptr_proto
;
735 case BPF_FUNC_this_cpu_ptr
:
736 return &bpf_this_cpu_ptr_proto
;
741 if (!perfmon_capable())
745 case BPF_FUNC_get_current_task
:
746 return &bpf_get_current_task_proto
;
747 case BPF_FUNC_probe_read_user
:
748 return &bpf_probe_read_user_proto
;
749 case BPF_FUNC_probe_read_kernel
:
750 return &bpf_probe_read_kernel_proto
;
751 case BPF_FUNC_probe_read_user_str
:
752 return &bpf_probe_read_user_str_proto
;
753 case BPF_FUNC_probe_read_kernel_str
:
754 return &bpf_probe_read_kernel_str_proto
;