1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
5 #include <linux/rcupdate.h>
6 #include <linux/random.h>
8 #include <linux/topology.h>
9 #include <linux/ktime.h>
10 #include <linux/sched.h>
11 #include <linux/uidgid.h>
12 #include <linux/filter.h>
13 #include <linux/ctype.h>
14 #include <linux/jiffies.h>
15 #include <linux/pid_namespace.h>
16 #include <linux/proc_ns.h>
18 #include "../../lib/kstrtox.h"
20 /* If kernel subsystem is allowing eBPF programs to call this function,
21 * inside its own verifier_ops->get_func_proto() callback it should return
22 * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
24 * Different map implementations will rely on rcu in map methods
25 * lookup/update/delete, therefore eBPF programs must run under rcu lock
26 * if program is allowed to access maps, so check rcu_read_lock_held in
27 * all three functions.
29 BPF_CALL_2(bpf_map_lookup_elem
, struct bpf_map
*, map
, void *, key
)
31 WARN_ON_ONCE(!rcu_read_lock_held());
32 return (unsigned long) map
->ops
->map_lookup_elem(map
, key
);
35 const struct bpf_func_proto bpf_map_lookup_elem_proto
= {
36 .func
= bpf_map_lookup_elem
,
39 .ret_type
= RET_PTR_TO_MAP_VALUE_OR_NULL
,
40 .arg1_type
= ARG_CONST_MAP_PTR
,
41 .arg2_type
= ARG_PTR_TO_MAP_KEY
,
44 BPF_CALL_4(bpf_map_update_elem
, struct bpf_map
*, map
, void *, key
,
45 void *, value
, u64
, flags
)
47 WARN_ON_ONCE(!rcu_read_lock_held());
48 return map
->ops
->map_update_elem(map
, key
, value
, flags
);
51 const struct bpf_func_proto bpf_map_update_elem_proto
= {
52 .func
= bpf_map_update_elem
,
55 .ret_type
= RET_INTEGER
,
56 .arg1_type
= ARG_CONST_MAP_PTR
,
57 .arg2_type
= ARG_PTR_TO_MAP_KEY
,
58 .arg3_type
= ARG_PTR_TO_MAP_VALUE
,
59 .arg4_type
= ARG_ANYTHING
,
62 BPF_CALL_2(bpf_map_delete_elem
, struct bpf_map
*, map
, void *, key
)
64 WARN_ON_ONCE(!rcu_read_lock_held());
65 return map
->ops
->map_delete_elem(map
, key
);
68 const struct bpf_func_proto bpf_map_delete_elem_proto
= {
69 .func
= bpf_map_delete_elem
,
72 .ret_type
= RET_INTEGER
,
73 .arg1_type
= ARG_CONST_MAP_PTR
,
74 .arg2_type
= ARG_PTR_TO_MAP_KEY
,
77 BPF_CALL_3(bpf_map_push_elem
, struct bpf_map
*, map
, void *, value
, u64
, flags
)
79 return map
->ops
->map_push_elem(map
, value
, flags
);
82 const struct bpf_func_proto bpf_map_push_elem_proto
= {
83 .func
= bpf_map_push_elem
,
86 .ret_type
= RET_INTEGER
,
87 .arg1_type
= ARG_CONST_MAP_PTR
,
88 .arg2_type
= ARG_PTR_TO_MAP_VALUE
,
89 .arg3_type
= ARG_ANYTHING
,
92 BPF_CALL_2(bpf_map_pop_elem
, struct bpf_map
*, map
, void *, value
)
94 return map
->ops
->map_pop_elem(map
, value
);
97 const struct bpf_func_proto bpf_map_pop_elem_proto
= {
98 .func
= bpf_map_pop_elem
,
100 .ret_type
= RET_INTEGER
,
101 .arg1_type
= ARG_CONST_MAP_PTR
,
102 .arg2_type
= ARG_PTR_TO_UNINIT_MAP_VALUE
,
105 BPF_CALL_2(bpf_map_peek_elem
, struct bpf_map
*, map
, void *, value
)
107 return map
->ops
->map_peek_elem(map
, value
);
110 const struct bpf_func_proto bpf_map_peek_elem_proto
= {
111 .func
= bpf_map_pop_elem
,
113 .ret_type
= RET_INTEGER
,
114 .arg1_type
= ARG_CONST_MAP_PTR
,
115 .arg2_type
= ARG_PTR_TO_UNINIT_MAP_VALUE
,
118 const struct bpf_func_proto bpf_get_prandom_u32_proto
= {
119 .func
= bpf_user_rnd_u32
,
121 .ret_type
= RET_INTEGER
,
124 BPF_CALL_0(bpf_get_smp_processor_id
)
126 return smp_processor_id();
129 const struct bpf_func_proto bpf_get_smp_processor_id_proto
= {
130 .func
= bpf_get_smp_processor_id
,
132 .ret_type
= RET_INTEGER
,
135 BPF_CALL_0(bpf_get_numa_node_id
)
137 return numa_node_id();
140 const struct bpf_func_proto bpf_get_numa_node_id_proto
= {
141 .func
= bpf_get_numa_node_id
,
143 .ret_type
= RET_INTEGER
,
146 BPF_CALL_0(bpf_ktime_get_ns
)
148 /* NMI safe access to clock monotonic */
149 return ktime_get_mono_fast_ns();
152 const struct bpf_func_proto bpf_ktime_get_ns_proto
= {
153 .func
= bpf_ktime_get_ns
,
155 .ret_type
= RET_INTEGER
,
158 BPF_CALL_0(bpf_ktime_get_boot_ns
)
160 /* NMI safe access to clock boottime */
161 return ktime_get_boot_fast_ns();
164 const struct bpf_func_proto bpf_ktime_get_boot_ns_proto
= {
165 .func
= bpf_ktime_get_boot_ns
,
167 .ret_type
= RET_INTEGER
,
170 BPF_CALL_0(bpf_get_current_pid_tgid
)
172 struct task_struct
*task
= current
;
177 return (u64
) task
->tgid
<< 32 | task
->pid
;
180 const struct bpf_func_proto bpf_get_current_pid_tgid_proto
= {
181 .func
= bpf_get_current_pid_tgid
,
183 .ret_type
= RET_INTEGER
,
186 BPF_CALL_0(bpf_get_current_uid_gid
)
188 struct task_struct
*task
= current
;
195 current_uid_gid(&uid
, &gid
);
196 return (u64
) from_kgid(&init_user_ns
, gid
) << 32 |
197 from_kuid(&init_user_ns
, uid
);
200 const struct bpf_func_proto bpf_get_current_uid_gid_proto
= {
201 .func
= bpf_get_current_uid_gid
,
203 .ret_type
= RET_INTEGER
,
206 BPF_CALL_2(bpf_get_current_comm
, char *, buf
, u32
, size
)
208 struct task_struct
*task
= current
;
213 strncpy(buf
, task
->comm
, size
);
215 /* Verifier guarantees that size > 0. For task->comm exceeding
216 * size, guarantee that buf is %NUL-terminated. Unconditionally
217 * done here to save the size test.
222 memset(buf
, 0, size
);
226 const struct bpf_func_proto bpf_get_current_comm_proto
= {
227 .func
= bpf_get_current_comm
,
229 .ret_type
= RET_INTEGER
,
230 .arg1_type
= ARG_PTR_TO_UNINIT_MEM
,
231 .arg2_type
= ARG_CONST_SIZE
,
234 #if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
236 static inline void __bpf_spin_lock(struct bpf_spin_lock
*lock
)
238 arch_spinlock_t
*l
= (void *)lock
;
241 arch_spinlock_t lock
;
242 } u
= { .lock
= __ARCH_SPIN_LOCK_UNLOCKED
};
244 compiletime_assert(u
.val
== 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
245 BUILD_BUG_ON(sizeof(*l
) != sizeof(__u32
));
246 BUILD_BUG_ON(sizeof(*lock
) != sizeof(__u32
));
250 static inline void __bpf_spin_unlock(struct bpf_spin_lock
*lock
)
252 arch_spinlock_t
*l
= (void *)lock
;
259 static inline void __bpf_spin_lock(struct bpf_spin_lock
*lock
)
261 atomic_t
*l
= (void *)lock
;
263 BUILD_BUG_ON(sizeof(*l
) != sizeof(*lock
));
265 atomic_cond_read_relaxed(l
, !VAL
);
266 } while (atomic_xchg(l
, 1));
269 static inline void __bpf_spin_unlock(struct bpf_spin_lock
*lock
)
271 atomic_t
*l
= (void *)lock
;
273 atomic_set_release(l
, 0);
278 static DEFINE_PER_CPU(unsigned long, irqsave_flags
);
280 notrace
BPF_CALL_1(bpf_spin_lock
, struct bpf_spin_lock
*, lock
)
284 local_irq_save(flags
);
285 __bpf_spin_lock(lock
);
286 __this_cpu_write(irqsave_flags
, flags
);
290 const struct bpf_func_proto bpf_spin_lock_proto
= {
291 .func
= bpf_spin_lock
,
293 .ret_type
= RET_VOID
,
294 .arg1_type
= ARG_PTR_TO_SPIN_LOCK
,
297 notrace
BPF_CALL_1(bpf_spin_unlock
, struct bpf_spin_lock
*, lock
)
301 flags
= __this_cpu_read(irqsave_flags
);
302 __bpf_spin_unlock(lock
);
303 local_irq_restore(flags
);
307 const struct bpf_func_proto bpf_spin_unlock_proto
= {
308 .func
= bpf_spin_unlock
,
310 .ret_type
= RET_VOID
,
311 .arg1_type
= ARG_PTR_TO_SPIN_LOCK
,
314 void copy_map_value_locked(struct bpf_map
*map
, void *dst
, void *src
,
317 struct bpf_spin_lock
*lock
;
320 lock
= src
+ map
->spin_lock_off
;
322 lock
= dst
+ map
->spin_lock_off
;
324 ____bpf_spin_lock(lock
);
325 copy_map_value(map
, dst
, src
);
326 ____bpf_spin_unlock(lock
);
330 BPF_CALL_0(bpf_jiffies64
)
332 return get_jiffies_64();
335 const struct bpf_func_proto bpf_jiffies64_proto
= {
336 .func
= bpf_jiffies64
,
338 .ret_type
= RET_INTEGER
,
341 #ifdef CONFIG_CGROUPS
342 BPF_CALL_0(bpf_get_current_cgroup_id
)
344 struct cgroup
*cgrp
= task_dfl_cgroup(current
);
346 return cgroup_id(cgrp
);
349 const struct bpf_func_proto bpf_get_current_cgroup_id_proto
= {
350 .func
= bpf_get_current_cgroup_id
,
352 .ret_type
= RET_INTEGER
,
355 BPF_CALL_1(bpf_get_current_ancestor_cgroup_id
, int, ancestor_level
)
357 struct cgroup
*cgrp
= task_dfl_cgroup(current
);
358 struct cgroup
*ancestor
;
360 ancestor
= cgroup_ancestor(cgrp
, ancestor_level
);
363 return cgroup_id(ancestor
);
366 const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto
= {
367 .func
= bpf_get_current_ancestor_cgroup_id
,
369 .ret_type
= RET_INTEGER
,
370 .arg1_type
= ARG_ANYTHING
,
373 #ifdef CONFIG_CGROUP_BPF
374 DECLARE_PER_CPU(struct bpf_cgroup_storage
*,
375 bpf_cgroup_storage
[MAX_BPF_CGROUP_STORAGE_TYPE
]);
377 BPF_CALL_2(bpf_get_local_storage
, struct bpf_map
*, map
, u64
, flags
)
379 /* flags argument is not used now,
380 * but provides an ability to extend the API.
381 * verifier checks that its value is correct.
383 enum bpf_cgroup_storage_type stype
= cgroup_storage_type(map
);
384 struct bpf_cgroup_storage
*storage
;
387 storage
= this_cpu_read(bpf_cgroup_storage
[stype
]);
389 if (stype
== BPF_CGROUP_STORAGE_SHARED
)
390 ptr
= &READ_ONCE(storage
->buf
)->data
[0];
392 ptr
= this_cpu_ptr(storage
->percpu_buf
);
394 return (unsigned long)ptr
;
397 const struct bpf_func_proto bpf_get_local_storage_proto
= {
398 .func
= bpf_get_local_storage
,
400 .ret_type
= RET_PTR_TO_MAP_VALUE
,
401 .arg1_type
= ARG_CONST_MAP_PTR
,
402 .arg2_type
= ARG_ANYTHING
,
406 #define BPF_STRTOX_BASE_MASK 0x1F
408 static int __bpf_strtoull(const char *buf
, size_t buf_len
, u64 flags
,
409 unsigned long long *res
, bool *is_negative
)
411 unsigned int base
= flags
& BPF_STRTOX_BASE_MASK
;
412 const char *cur_buf
= buf
;
413 size_t cur_len
= buf_len
;
414 unsigned int consumed
;
418 if (!buf
|| !buf_len
|| !res
|| !is_negative
)
421 if (base
!= 0 && base
!= 8 && base
!= 10 && base
!= 16)
424 if (flags
& ~BPF_STRTOX_BASE_MASK
)
427 while (cur_buf
< buf
+ buf_len
&& isspace(*cur_buf
))
430 *is_negative
= (cur_buf
< buf
+ buf_len
&& *cur_buf
== '-');
434 consumed
= cur_buf
- buf
;
439 cur_len
= min(cur_len
, sizeof(str
) - 1);
440 memcpy(str
, cur_buf
, cur_len
);
444 cur_buf
= _parse_integer_fixup_radix(cur_buf
, &base
);
445 val_len
= _parse_integer(cur_buf
, base
, res
);
447 if (val_len
& KSTRTOX_OVERFLOW
)
454 consumed
+= cur_buf
- str
;
459 static int __bpf_strtoll(const char *buf
, size_t buf_len
, u64 flags
,
462 unsigned long long _res
;
466 err
= __bpf_strtoull(buf
, buf_len
, flags
, &_res
, &is_negative
);
470 if ((long long)-_res
> 0)
474 if ((long long)_res
< 0)
481 BPF_CALL_4(bpf_strtol
, const char *, buf
, size_t, buf_len
, u64
, flags
,
487 err
= __bpf_strtoll(buf
, buf_len
, flags
, &_res
);
490 if (_res
!= (long)_res
)
496 const struct bpf_func_proto bpf_strtol_proto
= {
499 .ret_type
= RET_INTEGER
,
500 .arg1_type
= ARG_PTR_TO_MEM
,
501 .arg2_type
= ARG_CONST_SIZE
,
502 .arg3_type
= ARG_ANYTHING
,
503 .arg4_type
= ARG_PTR_TO_LONG
,
506 BPF_CALL_4(bpf_strtoul
, const char *, buf
, size_t, buf_len
, u64
, flags
,
507 unsigned long *, res
)
509 unsigned long long _res
;
513 err
= __bpf_strtoull(buf
, buf_len
, flags
, &_res
, &is_negative
);
518 if (_res
!= (unsigned long)_res
)
524 const struct bpf_func_proto bpf_strtoul_proto
= {
527 .ret_type
= RET_INTEGER
,
528 .arg1_type
= ARG_PTR_TO_MEM
,
529 .arg2_type
= ARG_CONST_SIZE
,
530 .arg3_type
= ARG_ANYTHING
,
531 .arg4_type
= ARG_PTR_TO_LONG
,
535 BPF_CALL_4(bpf_get_ns_current_pid_tgid
, u64
, dev
, u64
, ino
,
536 struct bpf_pidns_info
*, nsdata
, u32
, size
)
538 struct task_struct
*task
= current
;
539 struct pid_namespace
*pidns
;
542 if (unlikely(size
!= sizeof(struct bpf_pidns_info
)))
545 if (unlikely((u64
)(dev_t
)dev
!= dev
))
551 pidns
= task_active_pid_ns(task
);
552 if (unlikely(!pidns
)) {
557 if (!ns_match(&pidns
->ns
, (dev_t
)dev
, ino
))
560 nsdata
->pid
= task_pid_nr_ns(task
, pidns
);
561 nsdata
->tgid
= task_tgid_nr_ns(task
, pidns
);
564 memset((void *)nsdata
, 0, (size_t) size
);
568 const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto
= {
569 .func
= bpf_get_ns_current_pid_tgid
,
571 .ret_type
= RET_INTEGER
,
572 .arg1_type
= ARG_ANYTHING
,
573 .arg2_type
= ARG_ANYTHING
,
574 .arg3_type
= ARG_PTR_TO_UNINIT_MEM
,
575 .arg4_type
= ARG_CONST_SIZE
,
578 static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto
= {
579 .func
= bpf_get_raw_cpu_id
,
581 .ret_type
= RET_INTEGER
,
584 BPF_CALL_5(bpf_event_output_data
, void *, ctx
, struct bpf_map
*, map
,
585 u64
, flags
, void *, data
, u64
, size
)
587 if (unlikely(flags
& ~(BPF_F_INDEX_MASK
)))
590 return bpf_event_output(map
, flags
, data
, size
, NULL
, 0, NULL
);
593 const struct bpf_func_proto bpf_event_output_data_proto
= {
594 .func
= bpf_event_output_data
,
596 .ret_type
= RET_INTEGER
,
597 .arg1_type
= ARG_PTR_TO_CTX
,
598 .arg2_type
= ARG_CONST_MAP_PTR
,
599 .arg3_type
= ARG_ANYTHING
,
600 .arg4_type
= ARG_PTR_TO_MEM
,
601 .arg5_type
= ARG_CONST_SIZE_OR_ZERO
,
604 const struct bpf_func_proto bpf_get_current_task_proto __weak
;
605 const struct bpf_func_proto bpf_probe_read_user_proto __weak
;
606 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak
;
607 const struct bpf_func_proto bpf_probe_read_kernel_proto __weak
;
608 const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak
;
610 const struct bpf_func_proto
*
611 bpf_base_func_proto(enum bpf_func_id func_id
)
614 case BPF_FUNC_map_lookup_elem
:
615 return &bpf_map_lookup_elem_proto
;
616 case BPF_FUNC_map_update_elem
:
617 return &bpf_map_update_elem_proto
;
618 case BPF_FUNC_map_delete_elem
:
619 return &bpf_map_delete_elem_proto
;
620 case BPF_FUNC_map_push_elem
:
621 return &bpf_map_push_elem_proto
;
622 case BPF_FUNC_map_pop_elem
:
623 return &bpf_map_pop_elem_proto
;
624 case BPF_FUNC_map_peek_elem
:
625 return &bpf_map_peek_elem_proto
;
626 case BPF_FUNC_get_prandom_u32
:
627 return &bpf_get_prandom_u32_proto
;
628 case BPF_FUNC_get_smp_processor_id
:
629 return &bpf_get_raw_smp_processor_id_proto
;
630 case BPF_FUNC_get_numa_node_id
:
631 return &bpf_get_numa_node_id_proto
;
632 case BPF_FUNC_tail_call
:
633 return &bpf_tail_call_proto
;
634 case BPF_FUNC_ktime_get_ns
:
635 return &bpf_ktime_get_ns_proto
;
636 case BPF_FUNC_ktime_get_boot_ns
:
637 return &bpf_ktime_get_boot_ns_proto
;
638 case BPF_FUNC_ringbuf_output
:
639 return &bpf_ringbuf_output_proto
;
640 case BPF_FUNC_ringbuf_reserve
:
641 return &bpf_ringbuf_reserve_proto
;
642 case BPF_FUNC_ringbuf_submit
:
643 return &bpf_ringbuf_submit_proto
;
644 case BPF_FUNC_ringbuf_discard
:
645 return &bpf_ringbuf_discard_proto
;
646 case BPF_FUNC_ringbuf_query
:
647 return &bpf_ringbuf_query_proto
;
656 case BPF_FUNC_spin_lock
:
657 return &bpf_spin_lock_proto
;
658 case BPF_FUNC_spin_unlock
:
659 return &bpf_spin_unlock_proto
;
660 case BPF_FUNC_trace_printk
:
661 if (!perfmon_capable())
663 return bpf_get_trace_printk_proto();
664 case BPF_FUNC_jiffies64
:
665 return &bpf_jiffies64_proto
;
670 if (!perfmon_capable())
674 case BPF_FUNC_get_current_task
:
675 return &bpf_get_current_task_proto
;
676 case BPF_FUNC_probe_read_user
:
677 return &bpf_probe_read_user_proto
;
678 case BPF_FUNC_probe_read_kernel
:
679 return &bpf_probe_read_kernel_proto
;
680 case BPF_FUNC_probe_read_user_str
:
681 return &bpf_probe_read_user_str_proto
;
682 case BPF_FUNC_probe_read_kernel_str
:
683 return &bpf_probe_read_kernel_str_proto
;