Merge tag 'rproc-v6.14' of git://git.kernel.org/pub/scm/linux/kernel/git/remoteproc...
[linux.git] / net / netfilter / nf_conntrack_bpf.c
blob4a136fc3a9c0f57b63bb751cae64833b427345ad
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Unstable Conntrack Helpers for XDP and TC-BPF hook
4 * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
5 * allowed to break compatibility for these functions since the interface they
6 * are exposed through to BPF programs is explicitly unstable.
7 */
9 #include <linux/bpf_verifier.h>
10 #include <linux/bpf.h>
11 #include <linux/btf.h>
12 #include <linux/filter.h>
13 #include <linux/mutex.h>
14 #include <linux/types.h>
15 #include <linux/btf_ids.h>
16 #include <linux/net_namespace.h>
17 #include <net/xdp.h>
18 #include <net/netfilter/nf_conntrack_bpf.h>
19 #include <net/netfilter/nf_conntrack_core.h>
21 /* bpf_ct_opts - Options for CT lookup helpers
23 * Members:
24 * @netns_id - Specify the network namespace for lookup
25 * Values:
26 * BPF_F_CURRENT_NETNS (-1)
27 * Use namespace associated with ctx (xdp_md, __sk_buff)
28 * [0, S32_MAX]
29 * Network Namespace ID
30 * @error - Out parameter, set for any errors encountered
31 * Values:
32 * -EINVAL - Passed NULL for bpf_tuple pointer
33 * -EINVAL - opts->reserved is not 0
34 * -EINVAL - netns_id is less than -1
35 * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (16) or 12
36 * -EINVAL - opts->ct_zone_id set when
37 opts__sz isn't NF_BPF_CT_OPTS_SZ (16)
38 * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
39 * -ENONET - No network namespace found for netns_id
40 * -ENOENT - Conntrack lookup could not find entry for tuple
41 * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
42 * or sizeof(tuple->ipv6)
43 * @l4proto - Layer 4 protocol
44 * Values:
45 * IPPROTO_TCP, IPPROTO_UDP
46 * @dir: - connection tracking tuple direction.
47 * @ct_zone_id - connection tracking zone id.
48 * @ct_zone_dir - connection tracking zone direction.
49 * @reserved - Reserved member, will be reused for more options in future
50 * Values:
51 * 0
53 struct bpf_ct_opts {
54 s32 netns_id;
55 s32 error;
56 u8 l4proto;
57 u8 dir;
58 u16 ct_zone_id;
59 u8 ct_zone_dir;
60 u8 reserved[3];
63 enum {
64 NF_BPF_CT_OPTS_SZ = 16,
67 static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
68 u32 tuple_len, u8 protonum, u8 dir,
69 struct nf_conntrack_tuple *tuple)
71 union nf_inet_addr *src = dir ? &tuple->dst.u3 : &tuple->src.u3;
72 union nf_inet_addr *dst = dir ? &tuple->src.u3 : &tuple->dst.u3;
73 union nf_conntrack_man_proto *sport = dir ? (void *)&tuple->dst.u
74 : &tuple->src.u;
75 union nf_conntrack_man_proto *dport = dir ? &tuple->src.u
76 : (void *)&tuple->dst.u;
78 if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
79 return -EPROTO;
81 memset(tuple, 0, sizeof(*tuple));
83 switch (tuple_len) {
84 case sizeof(bpf_tuple->ipv4):
85 tuple->src.l3num = AF_INET;
86 src->ip = bpf_tuple->ipv4.saddr;
87 sport->tcp.port = bpf_tuple->ipv4.sport;
88 dst->ip = bpf_tuple->ipv4.daddr;
89 dport->tcp.port = bpf_tuple->ipv4.dport;
90 break;
91 case sizeof(bpf_tuple->ipv6):
92 tuple->src.l3num = AF_INET6;
93 memcpy(src->ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
94 sport->tcp.port = bpf_tuple->ipv6.sport;
95 memcpy(dst->ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
96 dport->tcp.port = bpf_tuple->ipv6.dport;
97 break;
98 default:
99 return -EAFNOSUPPORT;
101 tuple->dst.protonum = protonum;
102 tuple->dst.dir = dir;
104 return 0;
107 static struct nf_conn *
108 __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
109 u32 tuple_len, struct bpf_ct_opts *opts, u32 opts_len,
110 u32 timeout)
112 struct nf_conntrack_tuple otuple, rtuple;
113 struct nf_conntrack_zone ct_zone;
114 struct nf_conn *ct;
115 int err;
117 if (!opts || !bpf_tuple)
118 return ERR_PTR(-EINVAL);
119 if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == 12))
120 return ERR_PTR(-EINVAL);
121 if (opts_len == NF_BPF_CT_OPTS_SZ) {
122 if (opts->reserved[0] || opts->reserved[1] || opts->reserved[2])
123 return ERR_PTR(-EINVAL);
124 } else {
125 if (opts->ct_zone_id)
126 return ERR_PTR(-EINVAL);
129 if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
130 return ERR_PTR(-EINVAL);
132 err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
133 IP_CT_DIR_ORIGINAL, &otuple);
134 if (err < 0)
135 return ERR_PTR(err);
137 err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
138 IP_CT_DIR_REPLY, &rtuple);
139 if (err < 0)
140 return ERR_PTR(err);
142 if (opts->netns_id >= 0) {
143 net = get_net_ns_by_id(net, opts->netns_id);
144 if (unlikely(!net))
145 return ERR_PTR(-ENONET);
148 if (opts_len == NF_BPF_CT_OPTS_SZ) {
149 if (opts->ct_zone_dir == 0)
150 opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
151 nf_ct_zone_init(&ct_zone,
152 opts->ct_zone_id, opts->ct_zone_dir, 0);
153 } else {
154 ct_zone = nf_ct_zone_dflt;
157 ct = nf_conntrack_alloc(net, &ct_zone, &otuple, &rtuple,
158 GFP_ATOMIC);
159 if (IS_ERR(ct))
160 goto out;
162 memset(&ct->proto, 0, sizeof(ct->proto));
163 __nf_ct_set_timeout(ct, timeout * HZ);
165 out:
166 if (opts->netns_id >= 0)
167 put_net(net);
169 return ct;
172 static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
173 struct bpf_sock_tuple *bpf_tuple,
174 u32 tuple_len, struct bpf_ct_opts *opts,
175 u32 opts_len)
177 struct nf_conntrack_tuple_hash *hash;
178 struct nf_conntrack_tuple tuple;
179 struct nf_conntrack_zone ct_zone;
180 struct nf_conn *ct;
181 int err;
183 if (!opts || !bpf_tuple)
184 return ERR_PTR(-EINVAL);
185 if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == 12))
186 return ERR_PTR(-EINVAL);
187 if (opts_len == NF_BPF_CT_OPTS_SZ) {
188 if (opts->reserved[0] || opts->reserved[1] || opts->reserved[2])
189 return ERR_PTR(-EINVAL);
190 } else {
191 if (opts->ct_zone_id)
192 return ERR_PTR(-EINVAL);
194 if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
195 return ERR_PTR(-EPROTO);
196 if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
197 return ERR_PTR(-EINVAL);
199 err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
200 IP_CT_DIR_ORIGINAL, &tuple);
201 if (err < 0)
202 return ERR_PTR(err);
204 if (opts->netns_id >= 0) {
205 net = get_net_ns_by_id(net, opts->netns_id);
206 if (unlikely(!net))
207 return ERR_PTR(-ENONET);
210 if (opts_len == NF_BPF_CT_OPTS_SZ) {
211 if (opts->ct_zone_dir == 0)
212 opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
213 nf_ct_zone_init(&ct_zone,
214 opts->ct_zone_id, opts->ct_zone_dir, 0);
215 } else {
216 ct_zone = nf_ct_zone_dflt;
219 hash = nf_conntrack_find_get(net, &ct_zone, &tuple);
220 if (opts->netns_id >= 0)
221 put_net(net);
222 if (!hash)
223 return ERR_PTR(-ENOENT);
225 ct = nf_ct_tuplehash_to_ctrack(hash);
226 opts->dir = NF_CT_DIRECTION(hash);
228 return ct;
231 BTF_ID_LIST(btf_nf_conn_ids)
232 BTF_ID(struct, nf_conn)
233 BTF_ID(struct, nf_conn___init)
235 /* Check writes into `struct nf_conn` */
236 static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
237 const struct bpf_reg_state *reg,
238 int off, int size)
240 const struct btf_type *ncit, *nct, *t;
241 size_t end;
243 ncit = btf_type_by_id(reg->btf, btf_nf_conn_ids[1]);
244 nct = btf_type_by_id(reg->btf, btf_nf_conn_ids[0]);
245 t = btf_type_by_id(reg->btf, reg->btf_id);
246 if (t != nct && t != ncit) {
247 bpf_log(log, "only read is supported\n");
248 return -EACCES;
251 /* `struct nf_conn` and `struct nf_conn___init` have the same layout
252 * so we are safe to simply merge offset checks here
254 switch (off) {
255 #if defined(CONFIG_NF_CONNTRACK_MARK)
256 case offsetof(struct nf_conn, mark):
257 end = offsetofend(struct nf_conn, mark);
258 break;
259 #endif
260 default:
261 bpf_log(log, "no write support to nf_conn at off %d\n", off);
262 return -EACCES;
265 if (off + size > end) {
266 bpf_log(log,
267 "write access at off %d with size %d beyond the member of nf_conn ended at %zu\n",
268 off, size, end);
269 return -EACCES;
272 return 0;
275 __bpf_kfunc_start_defs();
277 /* bpf_xdp_ct_alloc - Allocate a new CT entry
279 * Parameters:
280 * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program
281 * Cannot be NULL
282 * @bpf_tuple - Pointer to memory representing the tuple to look up
283 * Cannot be NULL
284 * @tuple__sz - Length of the tuple structure
285 * Must be one of sizeof(bpf_tuple->ipv4) or
286 * sizeof(bpf_tuple->ipv6)
287 * @opts - Additional options for allocation (documented above)
288 * Cannot be NULL
289 * @opts__sz - Length of the bpf_ct_opts structure
290 * Must be NF_BPF_CT_OPTS_SZ (16) or 12
292 __bpf_kfunc struct nf_conn___init *
293 bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
294 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
296 struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
297 struct nf_conn *nfct;
299 nfct = __bpf_nf_ct_alloc_entry(dev_net(ctx->rxq->dev), bpf_tuple, tuple__sz,
300 opts, opts__sz, 10);
301 if (IS_ERR(nfct)) {
302 if (opts)
303 opts->error = PTR_ERR(nfct);
304 return NULL;
307 return (struct nf_conn___init *)nfct;
310 /* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
311 * reference to it
313 * Parameters:
314 * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program
315 * Cannot be NULL
316 * @bpf_tuple - Pointer to memory representing the tuple to look up
317 * Cannot be NULL
318 * @tuple__sz - Length of the tuple structure
319 * Must be one of sizeof(bpf_tuple->ipv4) or
320 * sizeof(bpf_tuple->ipv6)
321 * @opts - Additional options for lookup (documented above)
322 * Cannot be NULL
323 * @opts__sz - Length of the bpf_ct_opts structure
324 * Must be NF_BPF_CT_OPTS_SZ (16) or 12
326 __bpf_kfunc struct nf_conn *
327 bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
328 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
330 struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
331 struct net *caller_net;
332 struct nf_conn *nfct;
334 caller_net = dev_net(ctx->rxq->dev);
335 nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
336 if (IS_ERR(nfct)) {
337 if (opts)
338 opts->error = PTR_ERR(nfct);
339 return NULL;
341 return nfct;
344 /* bpf_skb_ct_alloc - Allocate a new CT entry
346 * Parameters:
347 * @skb_ctx - Pointer to ctx (__sk_buff) in TC program
348 * Cannot be NULL
349 * @bpf_tuple - Pointer to memory representing the tuple to look up
350 * Cannot be NULL
351 * @tuple__sz - Length of the tuple structure
352 * Must be one of sizeof(bpf_tuple->ipv4) or
353 * sizeof(bpf_tuple->ipv6)
354 * @opts - Additional options for allocation (documented above)
355 * Cannot be NULL
356 * @opts__sz - Length of the bpf_ct_opts structure
357 * Must be NF_BPF_CT_OPTS_SZ (16) or 12
359 __bpf_kfunc struct nf_conn___init *
360 bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
361 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
363 struct sk_buff *skb = (struct sk_buff *)skb_ctx;
364 struct nf_conn *nfct;
365 struct net *net;
367 net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
368 nfct = __bpf_nf_ct_alloc_entry(net, bpf_tuple, tuple__sz, opts, opts__sz, 10);
369 if (IS_ERR(nfct)) {
370 if (opts)
371 opts->error = PTR_ERR(nfct);
372 return NULL;
375 return (struct nf_conn___init *)nfct;
378 /* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
379 * reference to it
381 * Parameters:
382 * @skb_ctx - Pointer to ctx (__sk_buff) in TC program
383 * Cannot be NULL
384 * @bpf_tuple - Pointer to memory representing the tuple to look up
385 * Cannot be NULL
386 * @tuple__sz - Length of the tuple structure
387 * Must be one of sizeof(bpf_tuple->ipv4) or
388 * sizeof(bpf_tuple->ipv6)
389 * @opts - Additional options for lookup (documented above)
390 * Cannot be NULL
391 * @opts__sz - Length of the bpf_ct_opts structure
392 * Must be NF_BPF_CT_OPTS_SZ (16) or 12
394 __bpf_kfunc struct nf_conn *
395 bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
396 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
398 struct sk_buff *skb = (struct sk_buff *)skb_ctx;
399 struct net *caller_net;
400 struct nf_conn *nfct;
402 caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
403 nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
404 if (IS_ERR(nfct)) {
405 if (opts)
406 opts->error = PTR_ERR(nfct);
407 return NULL;
409 return nfct;
412 /* bpf_ct_insert_entry - Add the provided entry into a CT map
414 * This must be invoked for referenced PTR_TO_BTF_ID.
416 * @nfct - Pointer to referenced nf_conn___init object, obtained
417 * using bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
419 __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
421 struct nf_conn *nfct = (struct nf_conn *)nfct_i;
422 int err;
424 if (!nf_ct_is_confirmed(nfct))
425 nfct->timeout += nfct_time_stamp;
426 nfct->status |= IPS_CONFIRMED;
427 err = nf_conntrack_hash_check_insert(nfct);
428 if (err < 0) {
429 nf_conntrack_free(nfct);
430 return NULL;
432 return nfct;
435 /* bpf_ct_release - Release acquired nf_conn object
437 * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
438 * the program if any references remain in the program in all of the explored
439 * states.
441 * Parameters:
442 * @nf_conn - Pointer to referenced nf_conn object, obtained using
443 * bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
445 __bpf_kfunc void bpf_ct_release(struct nf_conn *nfct)
447 nf_ct_put(nfct);
450 /* bpf_ct_set_timeout - Set timeout of allocated nf_conn
452 * Sets the default timeout of newly allocated nf_conn before insertion.
453 * This helper must be invoked for refcounted pointer to nf_conn___init.
455 * Parameters:
456 * @nfct - Pointer to referenced nf_conn object, obtained using
457 * bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
458 * @timeout - Timeout in msecs.
460 __bpf_kfunc void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
462 __nf_ct_set_timeout((struct nf_conn *)nfct, msecs_to_jiffies(timeout));
465 /* bpf_ct_change_timeout - Change timeout of inserted nf_conn
467 * Change timeout associated of the inserted or looked up nf_conn.
468 * This helper must be invoked for refcounted pointer to nf_conn.
470 * Parameters:
471 * @nfct - Pointer to referenced nf_conn object, obtained using
472 * bpf_ct_insert_entry, bpf_xdp_ct_lookup, or bpf_skb_ct_lookup.
473 * @timeout - New timeout in msecs.
475 __bpf_kfunc int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
477 return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout));
480 /* bpf_ct_set_status - Set status field of allocated nf_conn
482 * Set the status field of the newly allocated nf_conn before insertion.
483 * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn___init.
485 * Parameters:
486 * @nfct - Pointer to referenced nf_conn object, obtained using
487 * bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
488 * @status - New status value.
490 __bpf_kfunc int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
492 return nf_ct_change_status_common((struct nf_conn *)nfct, status);
495 /* bpf_ct_change_status - Change status of inserted nf_conn
497 * Change the status field of the provided connection tracking entry.
498 * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn.
500 * Parameters:
501 * @nfct - Pointer to referenced nf_conn object, obtained using
502 * bpf_ct_insert_entry, bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
503 * @status - New status value.
505 __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
507 return nf_ct_change_status_common(nfct, status);
510 __bpf_kfunc_end_defs();
512 BTF_KFUNCS_START(nf_ct_kfunc_set)
513 BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
514 BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
515 BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
516 BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
517 BTF_ID_FLAGS(func, bpf_ct_insert_entry, KF_ACQUIRE | KF_RET_NULL | KF_RELEASE)
518 BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE)
519 BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS)
520 BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS)
521 BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS)
522 BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS)
523 BTF_KFUNCS_END(nf_ct_kfunc_set)
525 static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = {
526 .owner = THIS_MODULE,
527 .set = &nf_ct_kfunc_set,
530 int register_nf_conntrack_bpf(void)
532 int ret;
534 ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set);
535 ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
536 if (!ret) {
537 mutex_lock(&nf_conn_btf_access_lock);
538 nfct_btf_struct_access = _nf_conntrack_btf_struct_access;
539 mutex_unlock(&nf_conn_btf_access_lock);
542 return ret;
545 void cleanup_nf_conntrack_bpf(void)
547 mutex_lock(&nf_conn_btf_access_lock);
548 nfct_btf_struct_access = NULL;
549 mutex_unlock(&nf_conn_btf_access_lock);