2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
9 * Development of this code funded by Astaro AG (http://www.astaro.com/)
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/netlink.h>
16 #include <linux/netfilter.h>
17 #include <linux/netfilter/nf_tables.h>
18 #include <net/netfilter/nf_tables.h>
19 #include <net/netfilter/nf_conntrack.h>
20 #include <net/netfilter/nf_conntrack_acct.h>
21 #include <net/netfilter/nf_conntrack_tuple.h>
22 #include <net/netfilter/nf_conntrack_helper.h>
23 #include <net/netfilter/nf_conntrack_ecache.h>
24 #include <net/netfilter/nf_conntrack_labels.h>
27 enum nft_ct_keys key
:8;
28 enum ip_conntrack_dir dir
:8;
30 enum nft_registers dreg
:8;
31 enum nft_registers sreg
:8;
35 #ifdef CONFIG_NF_CONNTRACK_ZONES
36 static DEFINE_PER_CPU(struct nf_conn
*, nft_ct_pcpu_template
);
37 static unsigned int nft_ct_pcpu_template_refcnt __read_mostly
;
40 static u64
nft_ct_get_eval_counter(const struct nf_conn_counter
*c
,
42 enum ip_conntrack_dir d
)
44 if (d
< IP_CT_DIR_MAX
)
45 return k
== NFT_CT_BYTES
? atomic64_read(&c
[d
].bytes
) :
46 atomic64_read(&c
[d
].packets
);
48 return nft_ct_get_eval_counter(c
, k
, IP_CT_DIR_ORIGINAL
) +
49 nft_ct_get_eval_counter(c
, k
, IP_CT_DIR_REPLY
);
52 static void nft_ct_get_eval(const struct nft_expr
*expr
,
53 struct nft_regs
*regs
,
54 const struct nft_pktinfo
*pkt
)
56 const struct nft_ct
*priv
= nft_expr_priv(expr
);
57 u32
*dest
= ®s
->data
[priv
->dreg
];
58 enum ip_conntrack_info ctinfo
;
59 const struct nf_conn
*ct
;
60 const struct nf_conn_help
*help
;
61 const struct nf_conntrack_tuple
*tuple
;
62 const struct nf_conntrack_helper
*helper
;
65 ct
= nf_ct_get(pkt
->skb
, &ctinfo
);
70 state
= NF_CT_STATE_INVALID_BIT
;
71 else if (nf_ct_is_untracked(ct
))
72 state
= NF_CT_STATE_UNTRACKED_BIT
;
74 state
= NF_CT_STATE_BIT(ctinfo
);
85 case NFT_CT_DIRECTION
:
86 *dest
= CTINFO2DIR(ctinfo
);
91 #ifdef CONFIG_NF_CONNTRACK_MARK
96 #ifdef CONFIG_NF_CONNTRACK_SECMARK
101 case NFT_CT_EXPIRATION
:
102 *dest
= jiffies_to_msecs(nf_ct_expires(ct
));
105 if (ct
->master
== NULL
)
107 help
= nfct_help(ct
->master
);
110 helper
= rcu_dereference(help
->helper
);
113 strncpy((char *)dest
, helper
->name
, NF_CT_HELPER_NAME_LEN
);
115 #ifdef CONFIG_NF_CONNTRACK_LABELS
116 case NFT_CT_LABELS
: {
117 struct nf_conn_labels
*labels
= nf_ct_labels_find(ct
);
120 memcpy(dest
, labels
->bits
, NF_CT_LABELS_MAX_SIZE
);
122 memset(dest
, 0, NF_CT_LABELS_MAX_SIZE
);
126 case NFT_CT_BYTES
: /* fallthrough */
128 const struct nf_conn_acct
*acct
= nf_conn_acct_find(ct
);
132 count
= nft_ct_get_eval_counter(acct
->counter
,
133 priv
->key
, priv
->dir
);
134 memcpy(dest
, &count
, sizeof(count
));
137 case NFT_CT_AVGPKT
: {
138 const struct nf_conn_acct
*acct
= nf_conn_acct_find(ct
);
139 u64 avgcnt
= 0, bcnt
= 0, pcnt
= 0;
142 pcnt
= nft_ct_get_eval_counter(acct
->counter
,
143 NFT_CT_PKTS
, priv
->dir
);
144 bcnt
= nft_ct_get_eval_counter(acct
->counter
,
145 NFT_CT_BYTES
, priv
->dir
);
147 avgcnt
= div64_u64(bcnt
, pcnt
);
150 memcpy(dest
, &avgcnt
, sizeof(avgcnt
));
153 case NFT_CT_L3PROTOCOL
:
154 *dest
= nf_ct_l3num(ct
);
156 case NFT_CT_PROTOCOL
:
157 *dest
= nf_ct_protonum(ct
);
159 #ifdef CONFIG_NF_CONNTRACK_ZONES
161 const struct nf_conntrack_zone
*zone
= nf_ct_zone(ct
);
163 if (priv
->dir
< IP_CT_DIR_MAX
)
164 *dest
= nf_ct_zone_id(zone
, priv
->dir
);
175 tuple
= &ct
->tuplehash
[priv
->dir
].tuple
;
178 memcpy(dest
, tuple
->src
.u3
.all
,
179 nf_ct_l3num(ct
) == NFPROTO_IPV4
? 4 : 16);
182 memcpy(dest
, tuple
->dst
.u3
.all
,
183 nf_ct_l3num(ct
) == NFPROTO_IPV4
? 4 : 16);
185 case NFT_CT_PROTO_SRC
:
186 *dest
= (__force __u16
)tuple
->src
.u
.all
;
188 case NFT_CT_PROTO_DST
:
189 *dest
= (__force __u16
)tuple
->dst
.u
.all
;
196 regs
->verdict
.code
= NFT_BREAK
;
199 #ifdef CONFIG_NF_CONNTRACK_ZONES
200 static void nft_ct_set_zone_eval(const struct nft_expr
*expr
,
201 struct nft_regs
*regs
,
202 const struct nft_pktinfo
*pkt
)
204 struct nf_conntrack_zone zone
= { .dir
= NF_CT_DEFAULT_ZONE_DIR
};
205 const struct nft_ct
*priv
= nft_expr_priv(expr
);
206 struct sk_buff
*skb
= pkt
->skb
;
207 enum ip_conntrack_info ctinfo
;
208 u16 value
= regs
->data
[priv
->sreg
];
211 ct
= nf_ct_get(skb
, &ctinfo
);
212 if (ct
) /* already tracked */
218 case IP_CT_DIR_ORIGINAL
:
219 zone
.dir
= NF_CT_ZONE_DIR_ORIG
;
221 case IP_CT_DIR_REPLY
:
222 zone
.dir
= NF_CT_ZONE_DIR_REPL
;
228 ct
= this_cpu_read(nft_ct_pcpu_template
);
230 if (likely(atomic_read(&ct
->ct_general
.use
) == 1)) {
231 nf_ct_zone_add(ct
, &zone
);
233 /* previous skb got queued to userspace */
234 ct
= nf_ct_tmpl_alloc(nft_net(pkt
), &zone
, GFP_ATOMIC
);
236 regs
->verdict
.code
= NF_DROP
;
241 atomic_inc(&ct
->ct_general
.use
);
242 nf_ct_set(skb
, ct
, IP_CT_NEW
);
246 static void nft_ct_set_eval(const struct nft_expr
*expr
,
247 struct nft_regs
*regs
,
248 const struct nft_pktinfo
*pkt
)
250 const struct nft_ct
*priv
= nft_expr_priv(expr
);
251 struct sk_buff
*skb
= pkt
->skb
;
252 #ifdef CONFIG_NF_CONNTRACK_MARK
253 u32 value
= regs
->data
[priv
->sreg
];
255 enum ip_conntrack_info ctinfo
;
258 ct
= nf_ct_get(skb
, &ctinfo
);
263 #ifdef CONFIG_NF_CONNTRACK_MARK
265 if (ct
->mark
!= value
) {
267 nf_conntrack_event_cache(IPCT_MARK
, ct
);
271 #ifdef CONFIG_NF_CONNTRACK_LABELS
273 nf_connlabels_replace(ct
,
274 ®s
->data
[priv
->sreg
],
275 ®s
->data
[priv
->sreg
],
276 NF_CT_LABELS_MAX_SIZE
/ sizeof(u32
));
284 static const struct nla_policy nft_ct_policy
[NFTA_CT_MAX
+ 1] = {
285 [NFTA_CT_DREG
] = { .type
= NLA_U32
},
286 [NFTA_CT_KEY
] = { .type
= NLA_U32
},
287 [NFTA_CT_DIRECTION
] = { .type
= NLA_U8
},
288 [NFTA_CT_SREG
] = { .type
= NLA_U32
},
291 static int nft_ct_netns_get(struct net
*net
, uint8_t family
)
295 if (family
== NFPROTO_INET
) {
296 err
= nf_ct_netns_get(net
, NFPROTO_IPV4
);
299 err
= nf_ct_netns_get(net
, NFPROTO_IPV6
);
303 err
= nf_ct_netns_get(net
, family
);
310 nf_ct_netns_put(net
, NFPROTO_IPV4
);
315 static void nft_ct_netns_put(struct net
*net
, uint8_t family
)
317 if (family
== NFPROTO_INET
) {
318 nf_ct_netns_put(net
, NFPROTO_IPV4
);
319 nf_ct_netns_put(net
, NFPROTO_IPV6
);
321 nf_ct_netns_put(net
, family
);
324 #ifdef CONFIG_NF_CONNTRACK_ZONES
325 static void nft_ct_tmpl_put_pcpu(void)
330 for_each_possible_cpu(cpu
) {
331 ct
= per_cpu(nft_ct_pcpu_template
, cpu
);
335 per_cpu(nft_ct_pcpu_template
, cpu
) = NULL
;
339 static bool nft_ct_tmpl_alloc_pcpu(void)
341 struct nf_conntrack_zone zone
= { .id
= 0 };
345 if (nft_ct_pcpu_template_refcnt
)
348 for_each_possible_cpu(cpu
) {
349 tmp
= nf_ct_tmpl_alloc(&init_net
, &zone
, GFP_KERNEL
);
351 nft_ct_tmpl_put_pcpu();
355 atomic_set(&tmp
->ct_general
.use
, 1);
356 per_cpu(nft_ct_pcpu_template
, cpu
) = tmp
;
363 static int nft_ct_get_init(const struct nft_ctx
*ctx
,
364 const struct nft_expr
*expr
,
365 const struct nlattr
* const tb
[])
367 struct nft_ct
*priv
= nft_expr_priv(expr
);
371 priv
->key
= ntohl(nla_get_be32(tb
[NFTA_CT_KEY
]));
372 priv
->dir
= IP_CT_DIR_MAX
;
374 case NFT_CT_DIRECTION
:
375 if (tb
[NFTA_CT_DIRECTION
] != NULL
)
381 #ifdef CONFIG_NF_CONNTRACK_MARK
384 #ifdef CONFIG_NF_CONNTRACK_SECMARK
387 case NFT_CT_EXPIRATION
:
388 if (tb
[NFTA_CT_DIRECTION
] != NULL
)
392 #ifdef CONFIG_NF_CONNTRACK_LABELS
394 if (tb
[NFTA_CT_DIRECTION
] != NULL
)
396 len
= NF_CT_LABELS_MAX_SIZE
;
400 if (tb
[NFTA_CT_DIRECTION
] != NULL
)
402 len
= NF_CT_HELPER_NAME_LEN
;
405 case NFT_CT_L3PROTOCOL
:
406 case NFT_CT_PROTOCOL
:
407 /* For compatibility, do not report error if NFTA_CT_DIRECTION
408 * attribute is specified.
414 if (tb
[NFTA_CT_DIRECTION
] == NULL
)
417 switch (ctx
->afi
->family
) {
419 len
= FIELD_SIZEOF(struct nf_conntrack_tuple
,
424 len
= FIELD_SIZEOF(struct nf_conntrack_tuple
,
428 return -EAFNOSUPPORT
;
431 case NFT_CT_PROTO_SRC
:
432 case NFT_CT_PROTO_DST
:
433 if (tb
[NFTA_CT_DIRECTION
] == NULL
)
435 len
= FIELD_SIZEOF(struct nf_conntrack_tuple
, src
.u
.all
);
442 #ifdef CONFIG_NF_CONNTRACK_ZONES
451 if (tb
[NFTA_CT_DIRECTION
] != NULL
) {
452 priv
->dir
= nla_get_u8(tb
[NFTA_CT_DIRECTION
]);
454 case IP_CT_DIR_ORIGINAL
:
455 case IP_CT_DIR_REPLY
:
462 priv
->dreg
= nft_parse_register(tb
[NFTA_CT_DREG
]);
463 err
= nft_validate_register_store(ctx
, priv
->dreg
, NULL
,
464 NFT_DATA_VALUE
, len
);
468 err
= nft_ct_netns_get(ctx
->net
, ctx
->afi
->family
);
472 if (priv
->key
== NFT_CT_BYTES
||
473 priv
->key
== NFT_CT_PKTS
||
474 priv
->key
== NFT_CT_AVGPKT
)
475 nf_ct_set_acct(ctx
->net
, true);
480 static void __nft_ct_set_destroy(const struct nft_ctx
*ctx
, struct nft_ct
*priv
)
483 #ifdef CONFIG_NF_CONNTRACK_LABELS
485 nf_connlabels_put(ctx
->net
);
488 #ifdef CONFIG_NF_CONNTRACK_ZONES
490 if (--nft_ct_pcpu_template_refcnt
== 0)
491 nft_ct_tmpl_put_pcpu();
498 static int nft_ct_set_init(const struct nft_ctx
*ctx
,
499 const struct nft_expr
*expr
,
500 const struct nlattr
* const tb
[])
502 struct nft_ct
*priv
= nft_expr_priv(expr
);
506 priv
->dir
= IP_CT_DIR_MAX
;
507 priv
->key
= ntohl(nla_get_be32(tb
[NFTA_CT_KEY
]));
509 #ifdef CONFIG_NF_CONNTRACK_MARK
511 if (tb
[NFTA_CT_DIRECTION
])
513 len
= FIELD_SIZEOF(struct nf_conn
, mark
);
516 #ifdef CONFIG_NF_CONNTRACK_LABELS
518 if (tb
[NFTA_CT_DIRECTION
])
520 len
= NF_CT_LABELS_MAX_SIZE
;
521 err
= nf_connlabels_get(ctx
->net
, (len
* BITS_PER_BYTE
) - 1);
526 #ifdef CONFIG_NF_CONNTRACK_ZONES
528 if (!nft_ct_tmpl_alloc_pcpu())
530 nft_ct_pcpu_template_refcnt
++;
538 if (tb
[NFTA_CT_DIRECTION
]) {
539 priv
->dir
= nla_get_u8(tb
[NFTA_CT_DIRECTION
]);
541 case IP_CT_DIR_ORIGINAL
:
542 case IP_CT_DIR_REPLY
:
549 priv
->sreg
= nft_parse_register(tb
[NFTA_CT_SREG
]);
550 err
= nft_validate_register_load(priv
->sreg
, len
);
554 err
= nft_ct_netns_get(ctx
->net
, ctx
->afi
->family
);
561 __nft_ct_set_destroy(ctx
, priv
);
565 static void nft_ct_get_destroy(const struct nft_ctx
*ctx
,
566 const struct nft_expr
*expr
)
568 nf_ct_netns_put(ctx
->net
, ctx
->afi
->family
);
571 static void nft_ct_set_destroy(const struct nft_ctx
*ctx
,
572 const struct nft_expr
*expr
)
574 struct nft_ct
*priv
= nft_expr_priv(expr
);
576 __nft_ct_set_destroy(ctx
, priv
);
577 nft_ct_netns_put(ctx
->net
, ctx
->afi
->family
);
580 static int nft_ct_get_dump(struct sk_buff
*skb
, const struct nft_expr
*expr
)
582 const struct nft_ct
*priv
= nft_expr_priv(expr
);
584 if (nft_dump_register(skb
, NFTA_CT_DREG
, priv
->dreg
))
585 goto nla_put_failure
;
586 if (nla_put_be32(skb
, NFTA_CT_KEY
, htonl(priv
->key
)))
587 goto nla_put_failure
;
592 case NFT_CT_PROTO_SRC
:
593 case NFT_CT_PROTO_DST
:
594 if (nla_put_u8(skb
, NFTA_CT_DIRECTION
, priv
->dir
))
595 goto nla_put_failure
;
601 if (priv
->dir
< IP_CT_DIR_MAX
&&
602 nla_put_u8(skb
, NFTA_CT_DIRECTION
, priv
->dir
))
603 goto nla_put_failure
;
615 static int nft_ct_set_dump(struct sk_buff
*skb
, const struct nft_expr
*expr
)
617 const struct nft_ct
*priv
= nft_expr_priv(expr
);
619 if (nft_dump_register(skb
, NFTA_CT_SREG
, priv
->sreg
))
620 goto nla_put_failure
;
621 if (nla_put_be32(skb
, NFTA_CT_KEY
, htonl(priv
->key
)))
622 goto nla_put_failure
;
626 if (priv
->dir
< IP_CT_DIR_MAX
&&
627 nla_put_u8(skb
, NFTA_CT_DIRECTION
, priv
->dir
))
628 goto nla_put_failure
;
640 static struct nft_expr_type nft_ct_type
;
641 static const struct nft_expr_ops nft_ct_get_ops
= {
642 .type
= &nft_ct_type
,
643 .size
= NFT_EXPR_SIZE(sizeof(struct nft_ct
)),
644 .eval
= nft_ct_get_eval
,
645 .init
= nft_ct_get_init
,
646 .destroy
= nft_ct_get_destroy
,
647 .dump
= nft_ct_get_dump
,
650 static const struct nft_expr_ops nft_ct_set_ops
= {
651 .type
= &nft_ct_type
,
652 .size
= NFT_EXPR_SIZE(sizeof(struct nft_ct
)),
653 .eval
= nft_ct_set_eval
,
654 .init
= nft_ct_set_init
,
655 .destroy
= nft_ct_set_destroy
,
656 .dump
= nft_ct_set_dump
,
659 #ifdef CONFIG_NF_CONNTRACK_ZONES
660 static const struct nft_expr_ops nft_ct_set_zone_ops
= {
661 .type
= &nft_ct_type
,
662 .size
= NFT_EXPR_SIZE(sizeof(struct nft_ct
)),
663 .eval
= nft_ct_set_zone_eval
,
664 .init
= nft_ct_set_init
,
665 .destroy
= nft_ct_set_destroy
,
666 .dump
= nft_ct_set_dump
,
670 static const struct nft_expr_ops
*
671 nft_ct_select_ops(const struct nft_ctx
*ctx
,
672 const struct nlattr
* const tb
[])
674 if (tb
[NFTA_CT_KEY
] == NULL
)
675 return ERR_PTR(-EINVAL
);
677 if (tb
[NFTA_CT_DREG
] && tb
[NFTA_CT_SREG
])
678 return ERR_PTR(-EINVAL
);
680 if (tb
[NFTA_CT_DREG
])
681 return &nft_ct_get_ops
;
683 if (tb
[NFTA_CT_SREG
]) {
684 #ifdef CONFIG_NF_CONNTRACK_ZONES
685 if (nla_get_be32(tb
[NFTA_CT_KEY
]) == htonl(NFT_CT_ZONE
))
686 return &nft_ct_set_zone_ops
;
688 return &nft_ct_set_ops
;
691 return ERR_PTR(-EINVAL
);
694 static struct nft_expr_type nft_ct_type __read_mostly
= {
696 .select_ops
= &nft_ct_select_ops
,
697 .policy
= nft_ct_policy
,
698 .maxattr
= NFTA_CT_MAX
,
699 .owner
= THIS_MODULE
,
702 static void nft_notrack_eval(const struct nft_expr
*expr
,
703 struct nft_regs
*regs
,
704 const struct nft_pktinfo
*pkt
)
706 struct sk_buff
*skb
= pkt
->skb
;
707 enum ip_conntrack_info ctinfo
;
710 ct
= nf_ct_get(pkt
->skb
, &ctinfo
);
711 /* Previously seen (loopback or untracked)? Ignore. */
715 ct
= nf_ct_untracked_get();
716 atomic_inc(&ct
->ct_general
.use
);
717 nf_ct_set(skb
, ct
, IP_CT_NEW
);
720 static struct nft_expr_type nft_notrack_type
;
721 static const struct nft_expr_ops nft_notrack_ops
= {
722 .type
= &nft_notrack_type
,
723 .size
= NFT_EXPR_SIZE(0),
724 .eval
= nft_notrack_eval
,
727 static struct nft_expr_type nft_notrack_type __read_mostly
= {
729 .ops
= &nft_notrack_ops
,
730 .owner
= THIS_MODULE
,
733 static int __init
nft_ct_module_init(void)
737 BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE
> NFT_REG_SIZE
);
739 err
= nft_register_expr(&nft_ct_type
);
743 err
= nft_register_expr(&nft_notrack_type
);
749 nft_unregister_expr(&nft_ct_type
);
753 static void __exit
nft_ct_module_exit(void)
755 nft_unregister_expr(&nft_notrack_type
);
756 nft_unregister_expr(&nft_ct_type
);
759 module_init(nft_ct_module_init
);
760 module_exit(nft_ct_module_exit
);
762 MODULE_LICENSE("GPL");
763 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
764 MODULE_ALIAS_NFT_EXPR("ct");
765 MODULE_ALIAS_NFT_EXPR("notrack");