perf tools: Don't clone maps from parent when synthesizing forks
[linux/fpc-iii.git] / net / netfilter / nft_ct.c
blob586627c361dfcf8026505d1bff3b5287b2e3e96f
1 /*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
9 * Development of this code funded by Astaro AG (http://www.astaro.com/)
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/netlink.h>
16 #include <linux/netfilter.h>
17 #include <linux/netfilter/nf_tables.h>
18 #include <net/netfilter/nf_tables.h>
19 #include <net/netfilter/nf_conntrack.h>
20 #include <net/netfilter/nf_conntrack_acct.h>
21 #include <net/netfilter/nf_conntrack_tuple.h>
22 #include <net/netfilter/nf_conntrack_helper.h>
23 #include <net/netfilter/nf_conntrack_ecache.h>
24 #include <net/netfilter/nf_conntrack_labels.h>
25 #include <net/netfilter/nf_conntrack_timeout.h>
26 #include <net/netfilter/nf_conntrack_l4proto.h>
28 struct nft_ct {
29 enum nft_ct_keys key:8;
30 enum ip_conntrack_dir dir:8;
31 union {
32 enum nft_registers dreg:8;
33 enum nft_registers sreg:8;
37 struct nft_ct_helper_obj {
38 struct nf_conntrack_helper *helper4;
39 struct nf_conntrack_helper *helper6;
40 u8 l4proto;
43 #ifdef CONFIG_NF_CONNTRACK_ZONES
44 static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
45 static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
46 #endif
48 static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c,
49 enum nft_ct_keys k,
50 enum ip_conntrack_dir d)
52 if (d < IP_CT_DIR_MAX)
53 return k == NFT_CT_BYTES ? atomic64_read(&c[d].bytes) :
54 atomic64_read(&c[d].packets);
56 return nft_ct_get_eval_counter(c, k, IP_CT_DIR_ORIGINAL) +
57 nft_ct_get_eval_counter(c, k, IP_CT_DIR_REPLY);
60 static void nft_ct_get_eval(const struct nft_expr *expr,
61 struct nft_regs *regs,
62 const struct nft_pktinfo *pkt)
64 const struct nft_ct *priv = nft_expr_priv(expr);
65 u32 *dest = &regs->data[priv->dreg];
66 enum ip_conntrack_info ctinfo;
67 const struct nf_conn *ct;
68 const struct nf_conn_help *help;
69 const struct nf_conntrack_tuple *tuple;
70 const struct nf_conntrack_helper *helper;
71 unsigned int state;
73 ct = nf_ct_get(pkt->skb, &ctinfo);
75 switch (priv->key) {
76 case NFT_CT_STATE:
77 if (ct)
78 state = NF_CT_STATE_BIT(ctinfo);
79 else if (ctinfo == IP_CT_UNTRACKED)
80 state = NF_CT_STATE_UNTRACKED_BIT;
81 else
82 state = NF_CT_STATE_INVALID_BIT;
83 *dest = state;
84 return;
85 default:
86 break;
89 if (ct == NULL)
90 goto err;
92 switch (priv->key) {
93 case NFT_CT_DIRECTION:
94 nft_reg_store8(dest, CTINFO2DIR(ctinfo));
95 return;
96 case NFT_CT_STATUS:
97 *dest = ct->status;
98 return;
99 #ifdef CONFIG_NF_CONNTRACK_MARK
100 case NFT_CT_MARK:
101 *dest = ct->mark;
102 return;
103 #endif
104 #ifdef CONFIG_NF_CONNTRACK_SECMARK
105 case NFT_CT_SECMARK:
106 *dest = ct->secmark;
107 return;
108 #endif
109 case NFT_CT_EXPIRATION:
110 *dest = jiffies_to_msecs(nf_ct_expires(ct));
111 return;
112 case NFT_CT_HELPER:
113 if (ct->master == NULL)
114 goto err;
115 help = nfct_help(ct->master);
116 if (help == NULL)
117 goto err;
118 helper = rcu_dereference(help->helper);
119 if (helper == NULL)
120 goto err;
121 strncpy((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
122 return;
123 #ifdef CONFIG_NF_CONNTRACK_LABELS
124 case NFT_CT_LABELS: {
125 struct nf_conn_labels *labels = nf_ct_labels_find(ct);
127 if (labels)
128 memcpy(dest, labels->bits, NF_CT_LABELS_MAX_SIZE);
129 else
130 memset(dest, 0, NF_CT_LABELS_MAX_SIZE);
131 return;
133 #endif
134 case NFT_CT_BYTES: /* fallthrough */
135 case NFT_CT_PKTS: {
136 const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
137 u64 count = 0;
139 if (acct)
140 count = nft_ct_get_eval_counter(acct->counter,
141 priv->key, priv->dir);
142 memcpy(dest, &count, sizeof(count));
143 return;
145 case NFT_CT_AVGPKT: {
146 const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
147 u64 avgcnt = 0, bcnt = 0, pcnt = 0;
149 if (acct) {
150 pcnt = nft_ct_get_eval_counter(acct->counter,
151 NFT_CT_PKTS, priv->dir);
152 bcnt = nft_ct_get_eval_counter(acct->counter,
153 NFT_CT_BYTES, priv->dir);
154 if (pcnt != 0)
155 avgcnt = div64_u64(bcnt, pcnt);
158 memcpy(dest, &avgcnt, sizeof(avgcnt));
159 return;
161 case NFT_CT_L3PROTOCOL:
162 nft_reg_store8(dest, nf_ct_l3num(ct));
163 return;
164 case NFT_CT_PROTOCOL:
165 nft_reg_store8(dest, nf_ct_protonum(ct));
166 return;
167 #ifdef CONFIG_NF_CONNTRACK_ZONES
168 case NFT_CT_ZONE: {
169 const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
170 u16 zoneid;
172 if (priv->dir < IP_CT_DIR_MAX)
173 zoneid = nf_ct_zone_id(zone, priv->dir);
174 else
175 zoneid = zone->id;
177 nft_reg_store16(dest, zoneid);
178 return;
180 #endif
181 default:
182 break;
185 tuple = &ct->tuplehash[priv->dir].tuple;
186 switch (priv->key) {
187 case NFT_CT_SRC:
188 memcpy(dest, tuple->src.u3.all,
189 nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
190 return;
191 case NFT_CT_DST:
192 memcpy(dest, tuple->dst.u3.all,
193 nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
194 return;
195 case NFT_CT_PROTO_SRC:
196 nft_reg_store16(dest, (__force u16)tuple->src.u.all);
197 return;
198 case NFT_CT_PROTO_DST:
199 nft_reg_store16(dest, (__force u16)tuple->dst.u.all);
200 return;
201 case NFT_CT_SRC_IP:
202 if (nf_ct_l3num(ct) != NFPROTO_IPV4)
203 goto err;
204 *dest = tuple->src.u3.ip;
205 return;
206 case NFT_CT_DST_IP:
207 if (nf_ct_l3num(ct) != NFPROTO_IPV4)
208 goto err;
209 *dest = tuple->dst.u3.ip;
210 return;
211 case NFT_CT_SRC_IP6:
212 if (nf_ct_l3num(ct) != NFPROTO_IPV6)
213 goto err;
214 memcpy(dest, tuple->src.u3.ip6, sizeof(struct in6_addr));
215 return;
216 case NFT_CT_DST_IP6:
217 if (nf_ct_l3num(ct) != NFPROTO_IPV6)
218 goto err;
219 memcpy(dest, tuple->dst.u3.ip6, sizeof(struct in6_addr));
220 return;
221 default:
222 break;
224 return;
225 err:
226 regs->verdict.code = NFT_BREAK;
229 #ifdef CONFIG_NF_CONNTRACK_ZONES
230 static void nft_ct_set_zone_eval(const struct nft_expr *expr,
231 struct nft_regs *regs,
232 const struct nft_pktinfo *pkt)
234 struct nf_conntrack_zone zone = { .dir = NF_CT_DEFAULT_ZONE_DIR };
235 const struct nft_ct *priv = nft_expr_priv(expr);
236 struct sk_buff *skb = pkt->skb;
237 enum ip_conntrack_info ctinfo;
238 u16 value = nft_reg_load16(&regs->data[priv->sreg]);
239 struct nf_conn *ct;
241 ct = nf_ct_get(skb, &ctinfo);
242 if (ct) /* already tracked */
243 return;
245 zone.id = value;
247 switch (priv->dir) {
248 case IP_CT_DIR_ORIGINAL:
249 zone.dir = NF_CT_ZONE_DIR_ORIG;
250 break;
251 case IP_CT_DIR_REPLY:
252 zone.dir = NF_CT_ZONE_DIR_REPL;
253 break;
254 default:
255 break;
258 ct = this_cpu_read(nft_ct_pcpu_template);
260 if (likely(atomic_read(&ct->ct_general.use) == 1)) {
261 nf_ct_zone_add(ct, &zone);
262 } else {
263 /* previous skb got queued to userspace */
264 ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC);
265 if (!ct) {
266 regs->verdict.code = NF_DROP;
267 return;
271 atomic_inc(&ct->ct_general.use);
272 nf_ct_set(skb, ct, IP_CT_NEW);
274 #endif
276 static void nft_ct_set_eval(const struct nft_expr *expr,
277 struct nft_regs *regs,
278 const struct nft_pktinfo *pkt)
280 const struct nft_ct *priv = nft_expr_priv(expr);
281 struct sk_buff *skb = pkt->skb;
282 #if defined(CONFIG_NF_CONNTRACK_MARK) || defined(CONFIG_NF_CONNTRACK_SECMARK)
283 u32 value = regs->data[priv->sreg];
284 #endif
285 enum ip_conntrack_info ctinfo;
286 struct nf_conn *ct;
288 ct = nf_ct_get(skb, &ctinfo);
289 if (ct == NULL || nf_ct_is_template(ct))
290 return;
292 switch (priv->key) {
293 #ifdef CONFIG_NF_CONNTRACK_MARK
294 case NFT_CT_MARK:
295 if (ct->mark != value) {
296 ct->mark = value;
297 nf_conntrack_event_cache(IPCT_MARK, ct);
299 break;
300 #endif
301 #ifdef CONFIG_NF_CONNTRACK_SECMARK
302 case NFT_CT_SECMARK:
303 if (ct->secmark != value) {
304 ct->secmark = value;
305 nf_conntrack_event_cache(IPCT_SECMARK, ct);
307 break;
308 #endif
309 #ifdef CONFIG_NF_CONNTRACK_LABELS
310 case NFT_CT_LABELS:
311 nf_connlabels_replace(ct,
312 &regs->data[priv->sreg],
313 &regs->data[priv->sreg],
314 NF_CT_LABELS_MAX_SIZE / sizeof(u32));
315 break;
316 #endif
317 #ifdef CONFIG_NF_CONNTRACK_EVENTS
318 case NFT_CT_EVENTMASK: {
319 struct nf_conntrack_ecache *e = nf_ct_ecache_find(ct);
320 u32 ctmask = regs->data[priv->sreg];
322 if (e) {
323 if (e->ctmask != ctmask)
324 e->ctmask = ctmask;
325 break;
328 if (ctmask && !nf_ct_is_confirmed(ct))
329 nf_ct_ecache_ext_add(ct, ctmask, 0, GFP_ATOMIC);
330 break;
332 #endif
333 default:
334 break;
338 static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
339 [NFTA_CT_DREG] = { .type = NLA_U32 },
340 [NFTA_CT_KEY] = { .type = NLA_U32 },
341 [NFTA_CT_DIRECTION] = { .type = NLA_U8 },
342 [NFTA_CT_SREG] = { .type = NLA_U32 },
345 #ifdef CONFIG_NF_CONNTRACK_ZONES
346 static void nft_ct_tmpl_put_pcpu(void)
348 struct nf_conn *ct;
349 int cpu;
351 for_each_possible_cpu(cpu) {
352 ct = per_cpu(nft_ct_pcpu_template, cpu);
353 if (!ct)
354 break;
355 nf_ct_put(ct);
356 per_cpu(nft_ct_pcpu_template, cpu) = NULL;
360 static bool nft_ct_tmpl_alloc_pcpu(void)
362 struct nf_conntrack_zone zone = { .id = 0 };
363 struct nf_conn *tmp;
364 int cpu;
366 if (nft_ct_pcpu_template_refcnt)
367 return true;
369 for_each_possible_cpu(cpu) {
370 tmp = nf_ct_tmpl_alloc(&init_net, &zone, GFP_KERNEL);
371 if (!tmp) {
372 nft_ct_tmpl_put_pcpu();
373 return false;
376 atomic_set(&tmp->ct_general.use, 1);
377 per_cpu(nft_ct_pcpu_template, cpu) = tmp;
380 return true;
382 #endif
384 static int nft_ct_get_init(const struct nft_ctx *ctx,
385 const struct nft_expr *expr,
386 const struct nlattr * const tb[])
388 struct nft_ct *priv = nft_expr_priv(expr);
389 unsigned int len;
390 int err;
392 priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
393 priv->dir = IP_CT_DIR_MAX;
394 switch (priv->key) {
395 case NFT_CT_DIRECTION:
396 if (tb[NFTA_CT_DIRECTION] != NULL)
397 return -EINVAL;
398 len = sizeof(u8);
399 break;
400 case NFT_CT_STATE:
401 case NFT_CT_STATUS:
402 #ifdef CONFIG_NF_CONNTRACK_MARK
403 case NFT_CT_MARK:
404 #endif
405 #ifdef CONFIG_NF_CONNTRACK_SECMARK
406 case NFT_CT_SECMARK:
407 #endif
408 case NFT_CT_EXPIRATION:
409 if (tb[NFTA_CT_DIRECTION] != NULL)
410 return -EINVAL;
411 len = sizeof(u32);
412 break;
413 #ifdef CONFIG_NF_CONNTRACK_LABELS
414 case NFT_CT_LABELS:
415 if (tb[NFTA_CT_DIRECTION] != NULL)
416 return -EINVAL;
417 len = NF_CT_LABELS_MAX_SIZE;
418 break;
419 #endif
420 case NFT_CT_HELPER:
421 if (tb[NFTA_CT_DIRECTION] != NULL)
422 return -EINVAL;
423 len = NF_CT_HELPER_NAME_LEN;
424 break;
426 case NFT_CT_L3PROTOCOL:
427 case NFT_CT_PROTOCOL:
428 /* For compatibility, do not report error if NFTA_CT_DIRECTION
429 * attribute is specified.
431 len = sizeof(u8);
432 break;
433 case NFT_CT_SRC:
434 case NFT_CT_DST:
435 if (tb[NFTA_CT_DIRECTION] == NULL)
436 return -EINVAL;
438 switch (ctx->family) {
439 case NFPROTO_IPV4:
440 len = FIELD_SIZEOF(struct nf_conntrack_tuple,
441 src.u3.ip);
442 break;
443 case NFPROTO_IPV6:
444 case NFPROTO_INET:
445 len = FIELD_SIZEOF(struct nf_conntrack_tuple,
446 src.u3.ip6);
447 break;
448 default:
449 return -EAFNOSUPPORT;
451 break;
452 case NFT_CT_SRC_IP:
453 case NFT_CT_DST_IP:
454 if (tb[NFTA_CT_DIRECTION] == NULL)
455 return -EINVAL;
457 len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u3.ip);
458 break;
459 case NFT_CT_SRC_IP6:
460 case NFT_CT_DST_IP6:
461 if (tb[NFTA_CT_DIRECTION] == NULL)
462 return -EINVAL;
464 len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u3.ip6);
465 break;
466 case NFT_CT_PROTO_SRC:
467 case NFT_CT_PROTO_DST:
468 if (tb[NFTA_CT_DIRECTION] == NULL)
469 return -EINVAL;
470 len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u.all);
471 break;
472 case NFT_CT_BYTES:
473 case NFT_CT_PKTS:
474 case NFT_CT_AVGPKT:
475 len = sizeof(u64);
476 break;
477 #ifdef CONFIG_NF_CONNTRACK_ZONES
478 case NFT_CT_ZONE:
479 len = sizeof(u16);
480 break;
481 #endif
482 default:
483 return -EOPNOTSUPP;
486 if (tb[NFTA_CT_DIRECTION] != NULL) {
487 priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
488 switch (priv->dir) {
489 case IP_CT_DIR_ORIGINAL:
490 case IP_CT_DIR_REPLY:
491 break;
492 default:
493 return -EINVAL;
497 priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]);
498 err = nft_validate_register_store(ctx, priv->dreg, NULL,
499 NFT_DATA_VALUE, len);
500 if (err < 0)
501 return err;
503 err = nf_ct_netns_get(ctx->net, ctx->family);
504 if (err < 0)
505 return err;
507 if (priv->key == NFT_CT_BYTES ||
508 priv->key == NFT_CT_PKTS ||
509 priv->key == NFT_CT_AVGPKT)
510 nf_ct_set_acct(ctx->net, true);
512 return 0;
515 static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv)
517 switch (priv->key) {
518 #ifdef CONFIG_NF_CONNTRACK_LABELS
519 case NFT_CT_LABELS:
520 nf_connlabels_put(ctx->net);
521 break;
522 #endif
523 #ifdef CONFIG_NF_CONNTRACK_ZONES
524 case NFT_CT_ZONE:
525 if (--nft_ct_pcpu_template_refcnt == 0)
526 nft_ct_tmpl_put_pcpu();
527 #endif
528 default:
529 break;
533 static int nft_ct_set_init(const struct nft_ctx *ctx,
534 const struct nft_expr *expr,
535 const struct nlattr * const tb[])
537 struct nft_ct *priv = nft_expr_priv(expr);
538 unsigned int len;
539 int err;
541 priv->dir = IP_CT_DIR_MAX;
542 priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
543 switch (priv->key) {
544 #ifdef CONFIG_NF_CONNTRACK_MARK
545 case NFT_CT_MARK:
546 if (tb[NFTA_CT_DIRECTION])
547 return -EINVAL;
548 len = FIELD_SIZEOF(struct nf_conn, mark);
549 break;
550 #endif
551 #ifdef CONFIG_NF_CONNTRACK_LABELS
552 case NFT_CT_LABELS:
553 if (tb[NFTA_CT_DIRECTION])
554 return -EINVAL;
555 len = NF_CT_LABELS_MAX_SIZE;
556 err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
557 if (err)
558 return err;
559 break;
560 #endif
561 #ifdef CONFIG_NF_CONNTRACK_ZONES
562 case NFT_CT_ZONE:
563 if (!nft_ct_tmpl_alloc_pcpu())
564 return -ENOMEM;
565 nft_ct_pcpu_template_refcnt++;
566 len = sizeof(u16);
567 break;
568 #endif
569 #ifdef CONFIG_NF_CONNTRACK_EVENTS
570 case NFT_CT_EVENTMASK:
571 if (tb[NFTA_CT_DIRECTION])
572 return -EINVAL;
573 len = sizeof(u32);
574 break;
575 #endif
576 #ifdef CONFIG_NF_CONNTRACK_SECMARK
577 case NFT_CT_SECMARK:
578 if (tb[NFTA_CT_DIRECTION])
579 return -EINVAL;
580 len = sizeof(u32);
581 break;
582 #endif
583 default:
584 return -EOPNOTSUPP;
587 if (tb[NFTA_CT_DIRECTION]) {
588 priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
589 switch (priv->dir) {
590 case IP_CT_DIR_ORIGINAL:
591 case IP_CT_DIR_REPLY:
592 break;
593 default:
594 err = -EINVAL;
595 goto err1;
599 priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
600 err = nft_validate_register_load(priv->sreg, len);
601 if (err < 0)
602 goto err1;
604 err = nf_ct_netns_get(ctx->net, ctx->family);
605 if (err < 0)
606 goto err1;
608 return 0;
610 err1:
611 __nft_ct_set_destroy(ctx, priv);
612 return err;
615 static void nft_ct_get_destroy(const struct nft_ctx *ctx,
616 const struct nft_expr *expr)
618 nf_ct_netns_put(ctx->net, ctx->family);
621 static void nft_ct_set_destroy(const struct nft_ctx *ctx,
622 const struct nft_expr *expr)
624 struct nft_ct *priv = nft_expr_priv(expr);
626 __nft_ct_set_destroy(ctx, priv);
627 nf_ct_netns_put(ctx->net, ctx->family);
630 static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
632 const struct nft_ct *priv = nft_expr_priv(expr);
634 if (nft_dump_register(skb, NFTA_CT_DREG, priv->dreg))
635 goto nla_put_failure;
636 if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
637 goto nla_put_failure;
639 switch (priv->key) {
640 case NFT_CT_SRC:
641 case NFT_CT_DST:
642 case NFT_CT_SRC_IP:
643 case NFT_CT_DST_IP:
644 case NFT_CT_SRC_IP6:
645 case NFT_CT_DST_IP6:
646 case NFT_CT_PROTO_SRC:
647 case NFT_CT_PROTO_DST:
648 if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
649 goto nla_put_failure;
650 break;
651 case NFT_CT_BYTES:
652 case NFT_CT_PKTS:
653 case NFT_CT_AVGPKT:
654 case NFT_CT_ZONE:
655 if (priv->dir < IP_CT_DIR_MAX &&
656 nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
657 goto nla_put_failure;
658 break;
659 default:
660 break;
663 return 0;
665 nla_put_failure:
666 return -1;
669 static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
671 const struct nft_ct *priv = nft_expr_priv(expr);
673 if (nft_dump_register(skb, NFTA_CT_SREG, priv->sreg))
674 goto nla_put_failure;
675 if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
676 goto nla_put_failure;
678 switch (priv->key) {
679 case NFT_CT_ZONE:
680 if (priv->dir < IP_CT_DIR_MAX &&
681 nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
682 goto nla_put_failure;
683 break;
684 default:
685 break;
688 return 0;
690 nla_put_failure:
691 return -1;
694 static struct nft_expr_type nft_ct_type;
695 static const struct nft_expr_ops nft_ct_get_ops = {
696 .type = &nft_ct_type,
697 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
698 .eval = nft_ct_get_eval,
699 .init = nft_ct_get_init,
700 .destroy = nft_ct_get_destroy,
701 .dump = nft_ct_get_dump,
704 static const struct nft_expr_ops nft_ct_set_ops = {
705 .type = &nft_ct_type,
706 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
707 .eval = nft_ct_set_eval,
708 .init = nft_ct_set_init,
709 .destroy = nft_ct_set_destroy,
710 .dump = nft_ct_set_dump,
713 #ifdef CONFIG_NF_CONNTRACK_ZONES
714 static const struct nft_expr_ops nft_ct_set_zone_ops = {
715 .type = &nft_ct_type,
716 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
717 .eval = nft_ct_set_zone_eval,
718 .init = nft_ct_set_init,
719 .destroy = nft_ct_set_destroy,
720 .dump = nft_ct_set_dump,
722 #endif
724 static const struct nft_expr_ops *
725 nft_ct_select_ops(const struct nft_ctx *ctx,
726 const struct nlattr * const tb[])
728 if (tb[NFTA_CT_KEY] == NULL)
729 return ERR_PTR(-EINVAL);
731 if (tb[NFTA_CT_DREG] && tb[NFTA_CT_SREG])
732 return ERR_PTR(-EINVAL);
734 if (tb[NFTA_CT_DREG])
735 return &nft_ct_get_ops;
737 if (tb[NFTA_CT_SREG]) {
738 #ifdef CONFIG_NF_CONNTRACK_ZONES
739 if (nla_get_be32(tb[NFTA_CT_KEY]) == htonl(NFT_CT_ZONE))
740 return &nft_ct_set_zone_ops;
741 #endif
742 return &nft_ct_set_ops;
745 return ERR_PTR(-EINVAL);
748 static struct nft_expr_type nft_ct_type __read_mostly = {
749 .name = "ct",
750 .select_ops = nft_ct_select_ops,
751 .policy = nft_ct_policy,
752 .maxattr = NFTA_CT_MAX,
753 .owner = THIS_MODULE,
756 static void nft_notrack_eval(const struct nft_expr *expr,
757 struct nft_regs *regs,
758 const struct nft_pktinfo *pkt)
760 struct sk_buff *skb = pkt->skb;
761 enum ip_conntrack_info ctinfo;
762 struct nf_conn *ct;
764 ct = nf_ct_get(pkt->skb, &ctinfo);
765 /* Previously seen (loopback or untracked)? Ignore. */
766 if (ct || ctinfo == IP_CT_UNTRACKED)
767 return;
769 nf_ct_set(skb, ct, IP_CT_UNTRACKED);
772 static struct nft_expr_type nft_notrack_type;
773 static const struct nft_expr_ops nft_notrack_ops = {
774 .type = &nft_notrack_type,
775 .size = NFT_EXPR_SIZE(0),
776 .eval = nft_notrack_eval,
779 static struct nft_expr_type nft_notrack_type __read_mostly = {
780 .name = "notrack",
781 .ops = &nft_notrack_ops,
782 .owner = THIS_MODULE,
785 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
786 static int
787 nft_ct_timeout_parse_policy(void *timeouts,
788 const struct nf_conntrack_l4proto *l4proto,
789 struct net *net, const struct nlattr *attr)
791 struct nlattr **tb;
792 int ret = 0;
794 tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
795 GFP_KERNEL);
797 if (!tb)
798 return -ENOMEM;
800 ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max,
801 attr, l4proto->ctnl_timeout.nla_policy,
802 NULL);
803 if (ret < 0)
804 goto err;
806 ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
808 err:
809 kfree(tb);
810 return ret;
813 struct nft_ct_timeout_obj {
814 struct nf_ct_timeout *timeout;
815 u8 l4proto;
818 static void nft_ct_timeout_obj_eval(struct nft_object *obj,
819 struct nft_regs *regs,
820 const struct nft_pktinfo *pkt)
822 const struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
823 struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb);
824 struct nf_conn_timeout *timeout;
825 const unsigned int *values;
827 if (priv->l4proto != pkt->tprot)
828 return;
830 if (!ct || nf_ct_is_template(ct) || nf_ct_is_confirmed(ct))
831 return;
833 timeout = nf_ct_timeout_find(ct);
834 if (!timeout) {
835 timeout = nf_ct_timeout_ext_add(ct, priv->timeout, GFP_ATOMIC);
836 if (!timeout) {
837 regs->verdict.code = NF_DROP;
838 return;
842 rcu_assign_pointer(timeout->timeout, priv->timeout);
844 /* adjust the timeout as per 'new' state. ct is unconfirmed,
845 * so the current timestamp must not be added.
847 values = nf_ct_timeout_data(timeout);
848 if (values)
849 nf_ct_refresh(ct, pkt->skb, values[0]);
852 static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx,
853 const struct nlattr * const tb[],
854 struct nft_object *obj)
856 struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
857 const struct nf_conntrack_l4proto *l4proto;
858 struct nf_ct_timeout *timeout;
859 int l3num = ctx->family;
860 __u8 l4num;
861 int ret;
863 if (!tb[NFTA_CT_TIMEOUT_L4PROTO] ||
864 !tb[NFTA_CT_TIMEOUT_DATA])
865 return -EINVAL;
867 if (tb[NFTA_CT_TIMEOUT_L3PROTO])
868 l3num = ntohs(nla_get_be16(tb[NFTA_CT_TIMEOUT_L3PROTO]));
870 l4num = nla_get_u8(tb[NFTA_CT_TIMEOUT_L4PROTO]);
871 priv->l4proto = l4num;
873 l4proto = nf_ct_l4proto_find_get(l4num);
875 if (l4proto->l4proto != l4num) {
876 ret = -EOPNOTSUPP;
877 goto err_proto_put;
880 timeout = kzalloc(sizeof(struct nf_ct_timeout) +
881 l4proto->ctnl_timeout.obj_size, GFP_KERNEL);
882 if (timeout == NULL) {
883 ret = -ENOMEM;
884 goto err_proto_put;
887 ret = nft_ct_timeout_parse_policy(&timeout->data, l4proto, ctx->net,
888 tb[NFTA_CT_TIMEOUT_DATA]);
889 if (ret < 0)
890 goto err_free_timeout;
892 timeout->l3num = l3num;
893 timeout->l4proto = l4proto;
895 ret = nf_ct_netns_get(ctx->net, ctx->family);
896 if (ret < 0)
897 goto err_free_timeout;
899 priv->timeout = timeout;
900 return 0;
902 err_free_timeout:
903 kfree(timeout);
904 err_proto_put:
905 nf_ct_l4proto_put(l4proto);
906 return ret;
909 static void nft_ct_timeout_obj_destroy(const struct nft_ctx *ctx,
910 struct nft_object *obj)
912 struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
913 struct nf_ct_timeout *timeout = priv->timeout;
915 nf_ct_untimeout(ctx->net, timeout);
916 nf_ct_l4proto_put(timeout->l4proto);
917 nf_ct_netns_put(ctx->net, ctx->family);
918 kfree(priv->timeout);
921 static int nft_ct_timeout_obj_dump(struct sk_buff *skb,
922 struct nft_object *obj, bool reset)
924 const struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
925 const struct nf_ct_timeout *timeout = priv->timeout;
926 struct nlattr *nest_params;
927 int ret;
929 if (nla_put_u8(skb, NFTA_CT_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) ||
930 nla_put_be16(skb, NFTA_CT_TIMEOUT_L3PROTO, htons(timeout->l3num)))
931 return -1;
933 nest_params = nla_nest_start(skb, NFTA_CT_TIMEOUT_DATA | NLA_F_NESTED);
934 if (!nest_params)
935 return -1;
937 ret = timeout->l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->data);
938 if (ret < 0)
939 return -1;
940 nla_nest_end(skb, nest_params);
941 return 0;
944 static const struct nla_policy nft_ct_timeout_policy[NFTA_CT_TIMEOUT_MAX + 1] = {
945 [NFTA_CT_TIMEOUT_L3PROTO] = {.type = NLA_U16 },
946 [NFTA_CT_TIMEOUT_L4PROTO] = {.type = NLA_U8 },
947 [NFTA_CT_TIMEOUT_DATA] = {.type = NLA_NESTED },
950 static struct nft_object_type nft_ct_timeout_obj_type;
952 static const struct nft_object_ops nft_ct_timeout_obj_ops = {
953 .type = &nft_ct_timeout_obj_type,
954 .size = sizeof(struct nft_ct_timeout_obj),
955 .eval = nft_ct_timeout_obj_eval,
956 .init = nft_ct_timeout_obj_init,
957 .destroy = nft_ct_timeout_obj_destroy,
958 .dump = nft_ct_timeout_obj_dump,
961 static struct nft_object_type nft_ct_timeout_obj_type __read_mostly = {
962 .type = NFT_OBJECT_CT_TIMEOUT,
963 .ops = &nft_ct_timeout_obj_ops,
964 .maxattr = NFTA_CT_TIMEOUT_MAX,
965 .policy = nft_ct_timeout_policy,
966 .owner = THIS_MODULE,
968 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
970 static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
971 const struct nlattr * const tb[],
972 struct nft_object *obj)
974 struct nft_ct_helper_obj *priv = nft_obj_data(obj);
975 struct nf_conntrack_helper *help4, *help6;
976 char name[NF_CT_HELPER_NAME_LEN];
977 int family = ctx->family;
978 int err;
980 if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO])
981 return -EINVAL;
983 priv->l4proto = nla_get_u8(tb[NFTA_CT_HELPER_L4PROTO]);
984 if (!priv->l4proto)
985 return -ENOENT;
987 nla_strlcpy(name, tb[NFTA_CT_HELPER_NAME], sizeof(name));
989 if (tb[NFTA_CT_HELPER_L3PROTO])
990 family = ntohs(nla_get_be16(tb[NFTA_CT_HELPER_L3PROTO]));
992 help4 = NULL;
993 help6 = NULL;
995 switch (family) {
996 case NFPROTO_IPV4:
997 if (ctx->family == NFPROTO_IPV6)
998 return -EINVAL;
1000 help4 = nf_conntrack_helper_try_module_get(name, family,
1001 priv->l4proto);
1002 break;
1003 case NFPROTO_IPV6:
1004 if (ctx->family == NFPROTO_IPV4)
1005 return -EINVAL;
1007 help6 = nf_conntrack_helper_try_module_get(name, family,
1008 priv->l4proto);
1009 break;
1010 case NFPROTO_NETDEV: /* fallthrough */
1011 case NFPROTO_BRIDGE: /* same */
1012 case NFPROTO_INET:
1013 help4 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV4,
1014 priv->l4proto);
1015 help6 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV6,
1016 priv->l4proto);
1017 break;
1018 default:
1019 return -EAFNOSUPPORT;
1022 /* && is intentional; only error if INET found neither ipv4 or ipv6 */
1023 if (!help4 && !help6)
1024 return -ENOENT;
1026 priv->helper4 = help4;
1027 priv->helper6 = help6;
1029 err = nf_ct_netns_get(ctx->net, ctx->family);
1030 if (err < 0)
1031 goto err_put_helper;
1033 return 0;
1035 err_put_helper:
1036 if (priv->helper4)
1037 nf_conntrack_helper_put(priv->helper4);
1038 if (priv->helper6)
1039 nf_conntrack_helper_put(priv->helper6);
1040 return err;
1043 static void nft_ct_helper_obj_destroy(const struct nft_ctx *ctx,
1044 struct nft_object *obj)
1046 struct nft_ct_helper_obj *priv = nft_obj_data(obj);
1048 if (priv->helper4)
1049 nf_conntrack_helper_put(priv->helper4);
1050 if (priv->helper6)
1051 nf_conntrack_helper_put(priv->helper6);
1053 nf_ct_netns_put(ctx->net, ctx->family);
1056 static void nft_ct_helper_obj_eval(struct nft_object *obj,
1057 struct nft_regs *regs,
1058 const struct nft_pktinfo *pkt)
1060 const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
1061 struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb);
1062 struct nf_conntrack_helper *to_assign = NULL;
1063 struct nf_conn_help *help;
1065 if (!ct ||
1066 nf_ct_is_confirmed(ct) ||
1067 nf_ct_is_template(ct) ||
1068 priv->l4proto != nf_ct_protonum(ct))
1069 return;
1071 switch (nf_ct_l3num(ct)) {
1072 case NFPROTO_IPV4:
1073 to_assign = priv->helper4;
1074 break;
1075 case NFPROTO_IPV6:
1076 to_assign = priv->helper6;
1077 break;
1078 default:
1079 WARN_ON_ONCE(1);
1080 return;
1083 if (!to_assign)
1084 return;
1086 if (test_bit(IPS_HELPER_BIT, &ct->status))
1087 return;
1089 help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
1090 if (help) {
1091 rcu_assign_pointer(help->helper, to_assign);
1092 set_bit(IPS_HELPER_BIT, &ct->status);
1096 static int nft_ct_helper_obj_dump(struct sk_buff *skb,
1097 struct nft_object *obj, bool reset)
1099 const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
1100 const struct nf_conntrack_helper *helper;
1101 u16 family;
1103 if (priv->helper4 && priv->helper6) {
1104 family = NFPROTO_INET;
1105 helper = priv->helper4;
1106 } else if (priv->helper6) {
1107 family = NFPROTO_IPV6;
1108 helper = priv->helper6;
1109 } else {
1110 family = NFPROTO_IPV4;
1111 helper = priv->helper4;
1114 if (nla_put_string(skb, NFTA_CT_HELPER_NAME, helper->name))
1115 return -1;
1117 if (nla_put_u8(skb, NFTA_CT_HELPER_L4PROTO, priv->l4proto))
1118 return -1;
1120 if (nla_put_be16(skb, NFTA_CT_HELPER_L3PROTO, htons(family)))
1121 return -1;
1123 return 0;
1126 static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = {
1127 [NFTA_CT_HELPER_NAME] = { .type = NLA_STRING,
1128 .len = NF_CT_HELPER_NAME_LEN - 1 },
1129 [NFTA_CT_HELPER_L3PROTO] = { .type = NLA_U16 },
1130 [NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 },
1133 static struct nft_object_type nft_ct_helper_obj_type;
1134 static const struct nft_object_ops nft_ct_helper_obj_ops = {
1135 .type = &nft_ct_helper_obj_type,
1136 .size = sizeof(struct nft_ct_helper_obj),
1137 .eval = nft_ct_helper_obj_eval,
1138 .init = nft_ct_helper_obj_init,
1139 .destroy = nft_ct_helper_obj_destroy,
1140 .dump = nft_ct_helper_obj_dump,
1143 static struct nft_object_type nft_ct_helper_obj_type __read_mostly = {
1144 .type = NFT_OBJECT_CT_HELPER,
1145 .ops = &nft_ct_helper_obj_ops,
1146 .maxattr = NFTA_CT_HELPER_MAX,
1147 .policy = nft_ct_helper_policy,
1148 .owner = THIS_MODULE,
1151 static int __init nft_ct_module_init(void)
1153 int err;
1155 BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE);
1157 err = nft_register_expr(&nft_ct_type);
1158 if (err < 0)
1159 return err;
1161 err = nft_register_expr(&nft_notrack_type);
1162 if (err < 0)
1163 goto err1;
1165 err = nft_register_obj(&nft_ct_helper_obj_type);
1166 if (err < 0)
1167 goto err2;
1168 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1169 err = nft_register_obj(&nft_ct_timeout_obj_type);
1170 if (err < 0)
1171 goto err3;
1172 #endif
1173 return 0;
1175 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1176 err3:
1177 nft_unregister_obj(&nft_ct_helper_obj_type);
1178 #endif
1179 err2:
1180 nft_unregister_expr(&nft_notrack_type);
1181 err1:
1182 nft_unregister_expr(&nft_ct_type);
1183 return err;
1186 static void __exit nft_ct_module_exit(void)
1188 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1189 nft_unregister_obj(&nft_ct_timeout_obj_type);
1190 #endif
1191 nft_unregister_obj(&nft_ct_helper_obj_type);
1192 nft_unregister_expr(&nft_notrack_type);
1193 nft_unregister_expr(&nft_ct_type);
1196 module_init(nft_ct_module_init);
1197 module_exit(nft_ct_module_exit);
1199 MODULE_LICENSE("GPL");
1200 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
1201 MODULE_ALIAS_NFT_EXPR("ct");
1202 MODULE_ALIAS_NFT_EXPR("notrack");
1203 MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER);
1204 MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_TIMEOUT);