1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
3 * net/sched/act_ct.c Connection Tracking action
5 * Authors: Paul Blakey <paulb@mellanox.com>
6 * Yossi Kuperman <yossiku@mellanox.com>
7 * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
10 #include <linux/module.h>
11 #include <linux/init.h>
12 #include <linux/kernel.h>
13 #include <linux/skbuff.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/pkt_cls.h>
17 #include <linux/ipv6.h>
18 #include <linux/rhashtable.h>
19 #include <net/netlink.h>
20 #include <net/pkt_sched.h>
21 #include <net/pkt_cls.h>
22 #include <net/act_api.h>
24 #include <net/ipv6_frag.h>
25 #include <uapi/linux/tc_act/tc_ct.h>
26 #include <net/tc_act/tc_ct.h>
28 #include <net/netfilter/nf_flow_table.h>
29 #include <net/netfilter/nf_conntrack.h>
30 #include <net/netfilter/nf_conntrack_core.h>
31 #include <net/netfilter/nf_conntrack_zones.h>
32 #include <net/netfilter/nf_conntrack_helper.h>
33 #include <net/netfilter/nf_conntrack_acct.h>
34 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
35 #include <uapi/linux/netfilter/nf_nat.h>
37 static struct workqueue_struct
*act_ct_wq
;
38 static struct rhashtable zones_ht
;
39 static DEFINE_MUTEX(zones_mutex
);
41 struct tcf_ct_flow_table
{
42 struct rhash_head node
; /* In zones tables */
44 struct rcu_work rwork
;
45 struct nf_flowtable nf_ft
;
52 static const struct rhashtable_params zones_params
= {
53 .head_offset
= offsetof(struct tcf_ct_flow_table
, node
),
54 .key_offset
= offsetof(struct tcf_ct_flow_table
, zone
),
55 .key_len
= sizeof_field(struct tcf_ct_flow_table
, zone
),
56 .automatic_shrinking
= true,
59 static struct flow_action_entry
*
60 tcf_ct_flow_table_flow_action_get_next(struct flow_action
*flow_action
)
62 int i
= flow_action
->num_entries
++;
64 return &flow_action
->entries
[i
];
67 static void tcf_ct_add_mangle_action(struct flow_action
*action
,
68 enum flow_action_mangle_base htype
,
73 struct flow_action_entry
*entry
;
75 entry
= tcf_ct_flow_table_flow_action_get_next(action
);
76 entry
->id
= FLOW_ACTION_MANGLE
;
77 entry
->mangle
.htype
= htype
;
78 entry
->mangle
.mask
= ~mask
;
79 entry
->mangle
.offset
= offset
;
80 entry
->mangle
.val
= val
;
83 /* The following nat helper functions check if the inverted reverse tuple
84 * (target) is different then the current dir tuple - meaning nat for ports
85 * and/or ip is needed, and add the relevant mangle actions.
88 tcf_ct_flow_table_add_action_nat_ipv4(const struct nf_conntrack_tuple
*tuple
,
89 struct nf_conntrack_tuple target
,
90 struct flow_action
*action
)
92 if (memcmp(&target
.src
.u3
, &tuple
->src
.u3
, sizeof(target
.src
.u3
)))
93 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_IP4
,
94 offsetof(struct iphdr
, saddr
),
96 be32_to_cpu(target
.src
.u3
.ip
));
97 if (memcmp(&target
.dst
.u3
, &tuple
->dst
.u3
, sizeof(target
.dst
.u3
)))
98 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_IP4
,
99 offsetof(struct iphdr
, daddr
),
101 be32_to_cpu(target
.dst
.u3
.ip
));
105 tcf_ct_add_ipv6_addr_mangle_action(struct flow_action
*action
,
106 union nf_inet_addr
*addr
,
111 for (i
= 0; i
< sizeof(struct in6_addr
) / sizeof(u32
); i
++)
112 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_IP6
,
113 i
* sizeof(u32
) + offset
,
114 0xFFFFFFFF, be32_to_cpu(addr
->ip6
[i
]));
118 tcf_ct_flow_table_add_action_nat_ipv6(const struct nf_conntrack_tuple
*tuple
,
119 struct nf_conntrack_tuple target
,
120 struct flow_action
*action
)
122 if (memcmp(&target
.src
.u3
, &tuple
->src
.u3
, sizeof(target
.src
.u3
)))
123 tcf_ct_add_ipv6_addr_mangle_action(action
, &target
.src
.u3
,
124 offsetof(struct ipv6hdr
,
126 if (memcmp(&target
.dst
.u3
, &tuple
->dst
.u3
, sizeof(target
.dst
.u3
)))
127 tcf_ct_add_ipv6_addr_mangle_action(action
, &target
.dst
.u3
,
128 offsetof(struct ipv6hdr
,
133 tcf_ct_flow_table_add_action_nat_tcp(const struct nf_conntrack_tuple
*tuple
,
134 struct nf_conntrack_tuple target
,
135 struct flow_action
*action
)
137 __be16 target_src
= target
.src
.u
.tcp
.port
;
138 __be16 target_dst
= target
.dst
.u
.tcp
.port
;
140 if (target_src
!= tuple
->src
.u
.tcp
.port
)
141 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_TCP
,
142 offsetof(struct tcphdr
, source
),
143 0xFFFF, be16_to_cpu(target_src
));
144 if (target_dst
!= tuple
->dst
.u
.tcp
.port
)
145 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_TCP
,
146 offsetof(struct tcphdr
, dest
),
147 0xFFFF, be16_to_cpu(target_dst
));
151 tcf_ct_flow_table_add_action_nat_udp(const struct nf_conntrack_tuple
*tuple
,
152 struct nf_conntrack_tuple target
,
153 struct flow_action
*action
)
155 __be16 target_src
= target
.src
.u
.udp
.port
;
156 __be16 target_dst
= target
.dst
.u
.udp
.port
;
158 if (target_src
!= tuple
->src
.u
.udp
.port
)
159 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_UDP
,
160 offsetof(struct udphdr
, source
),
161 0xFFFF, be16_to_cpu(target_src
));
162 if (target_dst
!= tuple
->dst
.u
.udp
.port
)
163 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_UDP
,
164 offsetof(struct udphdr
, dest
),
165 0xFFFF, be16_to_cpu(target_dst
));
168 static void tcf_ct_flow_table_add_action_meta(struct nf_conn
*ct
,
169 enum ip_conntrack_dir dir
,
170 struct flow_action
*action
)
172 struct nf_conn_labels
*ct_labels
;
173 struct flow_action_entry
*entry
;
174 enum ip_conntrack_info ctinfo
;
177 entry
= tcf_ct_flow_table_flow_action_get_next(action
);
178 entry
->id
= FLOW_ACTION_CT_METADATA
;
179 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
180 entry
->ct_metadata
.mark
= ct
->mark
;
182 ctinfo
= dir
== IP_CT_DIR_ORIGINAL
? IP_CT_ESTABLISHED
:
183 IP_CT_ESTABLISHED_REPLY
;
184 /* aligns with the CT reference on the SKB nf_ct_set */
185 entry
->ct_metadata
.cookie
= (unsigned long)ct
| ctinfo
;
187 act_ct_labels
= entry
->ct_metadata
.labels
;
188 ct_labels
= nf_ct_labels_find(ct
);
190 memcpy(act_ct_labels
, ct_labels
->bits
, NF_CT_LABELS_MAX_SIZE
);
192 memset(act_ct_labels
, 0, NF_CT_LABELS_MAX_SIZE
);
195 static int tcf_ct_flow_table_add_action_nat(struct net
*net
,
197 enum ip_conntrack_dir dir
,
198 struct flow_action
*action
)
200 const struct nf_conntrack_tuple
*tuple
= &ct
->tuplehash
[dir
].tuple
;
201 struct nf_conntrack_tuple target
;
203 if (!(ct
->status
& IPS_NAT_MASK
))
206 nf_ct_invert_tuple(&target
, &ct
->tuplehash
[!dir
].tuple
);
208 switch (tuple
->src
.l3num
) {
210 tcf_ct_flow_table_add_action_nat_ipv4(tuple
, target
,
214 tcf_ct_flow_table_add_action_nat_ipv6(tuple
, target
,
221 switch (nf_ct_protonum(ct
)) {
223 tcf_ct_flow_table_add_action_nat_tcp(tuple
, target
, action
);
226 tcf_ct_flow_table_add_action_nat_udp(tuple
, target
, action
);
235 static int tcf_ct_flow_table_fill_actions(struct net
*net
,
236 const struct flow_offload
*flow
,
237 enum flow_offload_tuple_dir tdir
,
238 struct nf_flow_rule
*flow_rule
)
240 struct flow_action
*action
= &flow_rule
->rule
->action
;
241 int num_entries
= action
->num_entries
;
242 struct nf_conn
*ct
= flow
->ct
;
243 enum ip_conntrack_dir dir
;
247 case FLOW_OFFLOAD_DIR_ORIGINAL
:
248 dir
= IP_CT_DIR_ORIGINAL
;
250 case FLOW_OFFLOAD_DIR_REPLY
:
251 dir
= IP_CT_DIR_REPLY
;
257 err
= tcf_ct_flow_table_add_action_nat(net
, ct
, dir
, action
);
261 tcf_ct_flow_table_add_action_meta(ct
, dir
, action
);
265 /* Clear filled actions */
266 for (i
= num_entries
; i
< action
->num_entries
; i
++)
267 memset(&action
->entries
[i
], 0, sizeof(action
->entries
[i
]));
268 action
->num_entries
= num_entries
;
273 static struct nf_flowtable_type flowtable_ct
= {
274 .action
= tcf_ct_flow_table_fill_actions
,
275 .owner
= THIS_MODULE
,
278 static int tcf_ct_flow_table_get(struct tcf_ct_params
*params
)
280 struct tcf_ct_flow_table
*ct_ft
;
283 mutex_lock(&zones_mutex
);
284 ct_ft
= rhashtable_lookup_fast(&zones_ht
, ¶ms
->zone
, zones_params
);
285 if (ct_ft
&& refcount_inc_not_zero(&ct_ft
->ref
))
288 ct_ft
= kzalloc(sizeof(*ct_ft
), GFP_KERNEL
);
291 refcount_set(&ct_ft
->ref
, 1);
293 ct_ft
->zone
= params
->zone
;
294 err
= rhashtable_insert_fast(&zones_ht
, &ct_ft
->node
, zones_params
);
298 ct_ft
->nf_ft
.type
= &flowtable_ct
;
299 ct_ft
->nf_ft
.flags
|= NF_FLOWTABLE_HW_OFFLOAD
|
300 NF_FLOWTABLE_COUNTER
;
301 err
= nf_flow_table_init(&ct_ft
->nf_ft
);
305 __module_get(THIS_MODULE
);
307 params
->ct_ft
= ct_ft
;
308 params
->nf_ft
= &ct_ft
->nf_ft
;
309 mutex_unlock(&zones_mutex
);
314 rhashtable_remove_fast(&zones_ht
, &ct_ft
->node
, zones_params
);
318 mutex_unlock(&zones_mutex
);
322 static void tcf_ct_flow_table_cleanup_work(struct work_struct
*work
)
324 struct tcf_ct_flow_table
*ct_ft
;
326 ct_ft
= container_of(to_rcu_work(work
), struct tcf_ct_flow_table
,
328 nf_flow_table_free(&ct_ft
->nf_ft
);
331 module_put(THIS_MODULE
);
334 static void tcf_ct_flow_table_put(struct tcf_ct_params
*params
)
336 struct tcf_ct_flow_table
*ct_ft
= params
->ct_ft
;
338 if (refcount_dec_and_test(¶ms
->ct_ft
->ref
)) {
339 rhashtable_remove_fast(&zones_ht
, &ct_ft
->node
, zones_params
);
340 INIT_RCU_WORK(&ct_ft
->rwork
, tcf_ct_flow_table_cleanup_work
);
341 queue_rcu_work(act_ct_wq
, &ct_ft
->rwork
);
345 static void tcf_ct_flow_table_add(struct tcf_ct_flow_table
*ct_ft
,
349 struct flow_offload
*entry
;
352 if (test_and_set_bit(IPS_OFFLOAD_BIT
, &ct
->status
))
355 entry
= flow_offload_alloc(ct
);
362 ct
->proto
.tcp
.seen
[0].flags
|= IP_CT_TCP_FLAG_BE_LIBERAL
;
363 ct
->proto
.tcp
.seen
[1].flags
|= IP_CT_TCP_FLAG_BE_LIBERAL
;
366 err
= flow_offload_add(&ct_ft
->nf_ft
, entry
);
373 flow_offload_free(entry
);
375 clear_bit(IPS_OFFLOAD_BIT
, &ct
->status
);
378 static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table
*ct_ft
,
380 enum ip_conntrack_info ctinfo
)
384 if (ctinfo
!= IP_CT_ESTABLISHED
&& ctinfo
!= IP_CT_ESTABLISHED_REPLY
)
387 switch (nf_ct_protonum(ct
)) {
390 if (ct
->proto
.tcp
.state
!= TCP_CONNTRACK_ESTABLISHED
)
399 if (nf_ct_ext_exist(ct
, NF_CT_EXT_HELPER
) ||
400 ct
->status
& IPS_SEQ_ADJUST
)
403 tcf_ct_flow_table_add(ct_ft
, ct
, tcp
);
407 tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff
*skb
,
408 struct flow_offload_tuple
*tuple
,
409 struct tcphdr
**tcph
)
411 struct flow_ports
*ports
;
415 if (!pskb_network_may_pull(skb
, sizeof(*iph
)))
419 thoff
= iph
->ihl
* 4;
421 if (ip_is_fragment(iph
) ||
422 unlikely(thoff
!= sizeof(struct iphdr
)))
425 if (iph
->protocol
!= IPPROTO_TCP
&&
426 iph
->protocol
!= IPPROTO_UDP
)
432 if (!pskb_network_may_pull(skb
, iph
->protocol
== IPPROTO_TCP
?
433 thoff
+ sizeof(struct tcphdr
) :
434 thoff
+ sizeof(*ports
)))
438 if (iph
->protocol
== IPPROTO_TCP
)
439 *tcph
= (void *)(skb_network_header(skb
) + thoff
);
441 ports
= (struct flow_ports
*)(skb_network_header(skb
) + thoff
);
442 tuple
->src_v4
.s_addr
= iph
->saddr
;
443 tuple
->dst_v4
.s_addr
= iph
->daddr
;
444 tuple
->src_port
= ports
->source
;
445 tuple
->dst_port
= ports
->dest
;
446 tuple
->l3proto
= AF_INET
;
447 tuple
->l4proto
= iph
->protocol
;
453 tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff
*skb
,
454 struct flow_offload_tuple
*tuple
,
455 struct tcphdr
**tcph
)
457 struct flow_ports
*ports
;
458 struct ipv6hdr
*ip6h
;
461 if (!pskb_network_may_pull(skb
, sizeof(*ip6h
)))
464 ip6h
= ipv6_hdr(skb
);
466 if (ip6h
->nexthdr
!= IPPROTO_TCP
&&
467 ip6h
->nexthdr
!= IPPROTO_UDP
)
470 if (ip6h
->hop_limit
<= 1)
473 thoff
= sizeof(*ip6h
);
474 if (!pskb_network_may_pull(skb
, ip6h
->nexthdr
== IPPROTO_TCP
?
475 thoff
+ sizeof(struct tcphdr
) :
476 thoff
+ sizeof(*ports
)))
479 ip6h
= ipv6_hdr(skb
);
480 if (ip6h
->nexthdr
== IPPROTO_TCP
)
481 *tcph
= (void *)(skb_network_header(skb
) + thoff
);
483 ports
= (struct flow_ports
*)(skb_network_header(skb
) + thoff
);
484 tuple
->src_v6
= ip6h
->saddr
;
485 tuple
->dst_v6
= ip6h
->daddr
;
486 tuple
->src_port
= ports
->source
;
487 tuple
->dst_port
= ports
->dest
;
488 tuple
->l3proto
= AF_INET6
;
489 tuple
->l4proto
= ip6h
->nexthdr
;
494 static bool tcf_ct_flow_table_lookup(struct tcf_ct_params
*p
,
498 struct nf_flowtable
*nf_ft
= &p
->ct_ft
->nf_ft
;
499 struct flow_offload_tuple_rhash
*tuplehash
;
500 struct flow_offload_tuple tuple
= {};
501 enum ip_conntrack_info ctinfo
;
502 struct tcphdr
*tcph
= NULL
;
503 struct flow_offload
*flow
;
507 /* Previously seen or loopback */
508 ct
= nf_ct_get(skb
, &ctinfo
);
509 if ((ct
&& !nf_ct_is_template(ct
)) || ctinfo
== IP_CT_UNTRACKED
)
514 if (!tcf_ct_flow_table_fill_tuple_ipv4(skb
, &tuple
, &tcph
))
518 if (!tcf_ct_flow_table_fill_tuple_ipv6(skb
, &tuple
, &tcph
))
525 tuplehash
= flow_offload_lookup(nf_ft
, &tuple
);
529 dir
= tuplehash
->tuple
.dir
;
530 flow
= container_of(tuplehash
, struct flow_offload
, tuplehash
[dir
]);
533 if (tcph
&& (unlikely(tcph
->fin
|| tcph
->rst
))) {
534 flow_offload_teardown(flow
);
538 ctinfo
= dir
== FLOW_OFFLOAD_DIR_ORIGINAL
? IP_CT_ESTABLISHED
:
539 IP_CT_ESTABLISHED_REPLY
;
541 flow_offload_refresh(nf_ft
, flow
);
542 nf_conntrack_get(&ct
->ct_general
);
543 nf_ct_set(skb
, ct
, ctinfo
);
544 if (nf_ft
->flags
& NF_FLOWTABLE_COUNTER
)
545 nf_ct_acct_update(ct
, dir
, skb
->len
);
550 static int tcf_ct_flow_tables_init(void)
552 return rhashtable_init(&zones_ht
, &zones_params
);
555 static void tcf_ct_flow_tables_uninit(void)
557 rhashtable_destroy(&zones_ht
);
560 static struct tc_action_ops act_ct_ops
;
561 static unsigned int ct_net_id
;
563 struct tc_ct_action_net
{
564 struct tc_action_net tn
; /* Must be first */
568 /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */
569 static bool tcf_ct_skb_nfct_cached(struct net
*net
, struct sk_buff
*skb
,
570 u16 zone_id
, bool force
)
572 enum ip_conntrack_info ctinfo
;
575 ct
= nf_ct_get(skb
, &ctinfo
);
578 if (!net_eq(net
, read_pnet(&ct
->ct_net
)))
580 if (nf_ct_zone(ct
)->id
!= zone_id
)
583 /* Force conntrack entry direction. */
584 if (force
&& CTINFO2DIR(ctinfo
) != IP_CT_DIR_ORIGINAL
) {
585 if (nf_ct_is_confirmed(ct
))
588 nf_conntrack_put(&ct
->ct_general
);
589 nf_ct_set(skb
, NULL
, IP_CT_UNTRACKED
);
597 /* Trim the skb to the length specified by the IP/IPv6 header,
598 * removing any trailing lower-layer padding. This prepares the skb
599 * for higher-layer processing that assumes skb->len excludes padding
600 * (such as nf_ip_checksum). The caller needs to pull the skb to the
601 * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
603 static int tcf_ct_skb_network_trim(struct sk_buff
*skb
, int family
)
610 len
= ntohs(ip_hdr(skb
)->tot_len
);
613 len
= sizeof(struct ipv6hdr
)
614 + ntohs(ipv6_hdr(skb
)->payload_len
);
620 err
= pskb_trim_rcsum(skb
, len
);
625 static u8
tcf_ct_skb_nf_family(struct sk_buff
*skb
)
627 u8 family
= NFPROTO_UNSPEC
;
629 switch (skb_protocol(skb
, true)) {
630 case htons(ETH_P_IP
):
631 family
= NFPROTO_IPV4
;
633 case htons(ETH_P_IPV6
):
634 family
= NFPROTO_IPV6
;
643 static int tcf_ct_ipv4_is_fragment(struct sk_buff
*skb
, bool *frag
)
647 len
= skb_network_offset(skb
) + sizeof(struct iphdr
);
648 if (unlikely(skb
->len
< len
))
650 if (unlikely(!pskb_may_pull(skb
, len
)))
653 *frag
= ip_is_fragment(ip_hdr(skb
));
657 static int tcf_ct_ipv6_is_fragment(struct sk_buff
*skb
, bool *frag
)
659 unsigned int flags
= 0, len
, payload_ofs
= 0;
660 unsigned short frag_off
;
663 len
= skb_network_offset(skb
) + sizeof(struct ipv6hdr
);
664 if (unlikely(skb
->len
< len
))
666 if (unlikely(!pskb_may_pull(skb
, len
)))
669 nexthdr
= ipv6_find_hdr(skb
, &payload_ofs
, -1, &frag_off
, &flags
);
670 if (unlikely(nexthdr
< 0))
673 *frag
= flags
& IP6_FH_F_FRAG
;
677 static int tcf_ct_handle_fragments(struct net
*net
, struct sk_buff
*skb
,
678 u8 family
, u16 zone
, bool *defrag
)
680 enum ip_conntrack_info ctinfo
;
681 struct qdisc_skb_cb cb
;
686 /* Previously seen (loopback)? Ignore. */
687 ct
= nf_ct_get(skb
, &ctinfo
);
688 if ((ct
&& !nf_ct_is_template(ct
)) || ctinfo
== IP_CT_UNTRACKED
)
691 if (family
== NFPROTO_IPV4
)
692 err
= tcf_ct_ipv4_is_fragment(skb
, &frag
);
694 err
= tcf_ct_ipv6_is_fragment(skb
, &frag
);
699 cb
= *qdisc_skb_cb(skb
);
701 if (family
== NFPROTO_IPV4
) {
702 enum ip_defrag_users user
= IP_DEFRAG_CONNTRACK_IN
+ zone
;
704 memset(IPCB(skb
), 0, sizeof(struct inet_skb_parm
));
706 err
= ip_defrag(net
, skb
, user
);
708 if (err
&& err
!= -EINPROGRESS
)
713 cb
.mru
= IPCB(skb
)->frag_max_size
;
715 } else { /* NFPROTO_IPV6 */
716 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
717 enum ip6_defrag_users user
= IP6_DEFRAG_CONNTRACK_IN
+ zone
;
719 memset(IP6CB(skb
), 0, sizeof(struct inet6_skb_parm
));
720 err
= nf_ct_frag6_gather(net
, skb
, user
);
721 if (err
&& err
!= -EINPROGRESS
)
726 cb
.mru
= IP6CB(skb
)->frag_max_size
;
734 *qdisc_skb_cb(skb
) = cb
;
744 static void tcf_ct_params_free(struct rcu_head
*head
)
746 struct tcf_ct_params
*params
= container_of(head
,
747 struct tcf_ct_params
, rcu
);
749 tcf_ct_flow_table_put(params
);
752 nf_conntrack_put(¶ms
->tmpl
->ct_general
);
756 #if IS_ENABLED(CONFIG_NF_NAT)
757 /* Modelled after nf_nat_ipv[46]_fn().
758 * range is only used for new, uninitialized NAT state.
759 * Returns either NF_ACCEPT or NF_DROP.
761 static int ct_nat_execute(struct sk_buff
*skb
, struct nf_conn
*ct
,
762 enum ip_conntrack_info ctinfo
,
763 const struct nf_nat_range2
*range
,
764 enum nf_nat_manip_type maniptype
)
766 __be16 proto
= skb_protocol(skb
, true);
767 int hooknum
, err
= NF_ACCEPT
;
769 /* See HOOK2MANIP(). */
770 if (maniptype
== NF_NAT_MANIP_SRC
)
771 hooknum
= NF_INET_LOCAL_IN
; /* Source NAT */
773 hooknum
= NF_INET_LOCAL_OUT
; /* Destination NAT */
777 case IP_CT_RELATED_REPLY
:
778 if (proto
== htons(ETH_P_IP
) &&
779 ip_hdr(skb
)->protocol
== IPPROTO_ICMP
) {
780 if (!nf_nat_icmp_reply_translation(skb
, ct
, ctinfo
,
784 } else if (IS_ENABLED(CONFIG_IPV6
) && proto
== htons(ETH_P_IPV6
)) {
786 u8 nexthdr
= ipv6_hdr(skb
)->nexthdr
;
787 int hdrlen
= ipv6_skip_exthdr(skb
,
788 sizeof(struct ipv6hdr
),
789 &nexthdr
, &frag_off
);
791 if (hdrlen
>= 0 && nexthdr
== IPPROTO_ICMPV6
) {
792 if (!nf_nat_icmpv6_reply_translation(skb
, ct
,
800 /* Non-ICMP, fall thru to initialize if needed. */
803 /* Seen it before? This can happen for loopback, retrans,
806 if (!nf_nat_initialized(ct
, maniptype
)) {
807 /* Initialize according to the NAT action. */
808 err
= (range
&& range
->flags
& NF_NAT_RANGE_MAP_IPS
)
809 /* Action is set up to establish a new
812 ? nf_nat_setup_info(ct
, range
, maniptype
)
813 : nf_nat_alloc_null_binding(ct
, hooknum
);
814 if (err
!= NF_ACCEPT
)
819 case IP_CT_ESTABLISHED
:
820 case IP_CT_ESTABLISHED_REPLY
:
828 err
= nf_nat_packet(ct
, ctinfo
, hooknum
, skb
);
832 #endif /* CONFIG_NF_NAT */
834 static void tcf_ct_act_set_mark(struct nf_conn
*ct
, u32 mark
, u32 mask
)
836 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
842 new_mark
= mark
| (ct
->mark
& ~(mask
));
843 if (ct
->mark
!= new_mark
) {
845 if (nf_ct_is_confirmed(ct
))
846 nf_conntrack_event_cache(IPCT_MARK
, ct
);
851 static void tcf_ct_act_set_labels(struct nf_conn
*ct
,
855 #if IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)
856 size_t labels_sz
= sizeof_field(struct tcf_ct_params
, labels
);
858 if (!memchr_inv(labels_m
, 0, labels_sz
))
861 nf_connlabels_replace(ct
, labels
, labels_m
, 4);
865 static int tcf_ct_act_nat(struct sk_buff
*skb
,
867 enum ip_conntrack_info ctinfo
,
869 struct nf_nat_range2
*range
,
872 #if IS_ENABLED(CONFIG_NF_NAT)
874 enum nf_nat_manip_type maniptype
;
876 if (!(ct_action
& TCA_CT_ACT_NAT
))
879 /* Add NAT extension if not confirmed yet. */
880 if (!nf_ct_is_confirmed(ct
) && !nf_ct_nat_ext_add(ct
))
881 return NF_DROP
; /* Can't NAT. */
883 if (ctinfo
!= IP_CT_NEW
&& (ct
->status
& IPS_NAT_MASK
) &&
884 (ctinfo
!= IP_CT_RELATED
|| commit
)) {
885 /* NAT an established or related connection like before. */
886 if (CTINFO2DIR(ctinfo
) == IP_CT_DIR_REPLY
)
887 /* This is the REPLY direction for a connection
888 * for which NAT was applied in the forward
889 * direction. Do the reverse NAT.
891 maniptype
= ct
->status
& IPS_SRC_NAT
892 ? NF_NAT_MANIP_DST
: NF_NAT_MANIP_SRC
;
894 maniptype
= ct
->status
& IPS_SRC_NAT
895 ? NF_NAT_MANIP_SRC
: NF_NAT_MANIP_DST
;
896 } else if (ct_action
& TCA_CT_ACT_NAT_SRC
) {
897 maniptype
= NF_NAT_MANIP_SRC
;
898 } else if (ct_action
& TCA_CT_ACT_NAT_DST
) {
899 maniptype
= NF_NAT_MANIP_DST
;
904 err
= ct_nat_execute(skb
, ct
, ctinfo
, range
, maniptype
);
905 if (err
== NF_ACCEPT
&&
906 ct
->status
& IPS_SRC_NAT
&& ct
->status
& IPS_DST_NAT
) {
907 if (maniptype
== NF_NAT_MANIP_SRC
)
908 maniptype
= NF_NAT_MANIP_DST
;
910 maniptype
= NF_NAT_MANIP_SRC
;
912 err
= ct_nat_execute(skb
, ct
, ctinfo
, range
, maniptype
);
920 static int tcf_ct_act(struct sk_buff
*skb
, const struct tc_action
*a
,
921 struct tcf_result
*res
)
923 struct net
*net
= dev_net(skb
->dev
);
924 bool cached
, commit
, clear
, force
;
925 enum ip_conntrack_info ctinfo
;
926 struct tcf_ct
*c
= to_ct(a
);
927 struct nf_conn
*tmpl
= NULL
;
928 struct nf_hook_state state
;
929 int nh_ofs
, err
, retval
;
930 struct tcf_ct_params
*p
;
931 bool skip_add
= false;
936 p
= rcu_dereference_bh(c
->params
);
938 retval
= READ_ONCE(c
->tcf_action
);
939 commit
= p
->ct_action
& TCA_CT_ACT_COMMIT
;
940 clear
= p
->ct_action
& TCA_CT_ACT_CLEAR
;
941 force
= p
->ct_action
& TCA_CT_ACT_FORCE
;
944 tcf_lastuse_update(&c
->tcf_tm
);
947 ct
= nf_ct_get(skb
, &ctinfo
);
949 nf_conntrack_put(&ct
->ct_general
);
950 nf_ct_set(skb
, NULL
, IP_CT_UNTRACKED
);
956 family
= tcf_ct_skb_nf_family(skb
);
957 if (family
== NFPROTO_UNSPEC
)
960 /* The conntrack module expects to be working at L3.
961 * We also try to pull the IPv4/6 header to linear area
963 nh_ofs
= skb_network_offset(skb
);
964 skb_pull_rcsum(skb
, nh_ofs
);
965 err
= tcf_ct_handle_fragments(net
, skb
, family
, p
->zone
, &defrag
);
966 if (err
== -EINPROGRESS
) {
967 retval
= TC_ACT_STOLEN
;
973 err
= tcf_ct_skb_network_trim(skb
, family
);
977 /* If we are recirculating packets to match on ct fields and
978 * committing with a separate ct action, then we don't need to
979 * actually run the packet through conntrack twice unless it's for a
982 cached
= tcf_ct_skb_nfct_cached(net
, skb
, p
->zone
, force
);
984 if (!commit
&& tcf_ct_flow_table_lookup(p
, skb
, family
)) {
989 /* Associate skb with specified zone. */
991 ct
= nf_ct_get(skb
, &ctinfo
);
993 nf_conntrack_put(skb_nfct(skb
));
994 nf_conntrack_get(&tmpl
->ct_general
);
995 nf_ct_set(skb
, tmpl
, IP_CT_NEW
);
998 state
.hook
= NF_INET_PRE_ROUTING
;
1001 err
= nf_conntrack_in(skb
, &state
);
1002 if (err
!= NF_ACCEPT
)
1007 ct
= nf_ct_get(skb
, &ctinfo
);
1010 nf_ct_deliver_cached_events(ct
);
1012 err
= tcf_ct_act_nat(skb
, ct
, ctinfo
, p
->ct_action
, &p
->range
, commit
);
1013 if (err
!= NF_ACCEPT
)
1017 tcf_ct_act_set_mark(ct
, p
->mark
, p
->mark_mask
);
1018 tcf_ct_act_set_labels(ct
, p
->labels
, p
->labels_mask
);
1020 /* This will take care of sending queued events
1021 * even if the connection is already confirmed.
1023 nf_conntrack_confirm(skb
);
1024 } else if (!skip_add
) {
1025 tcf_ct_flow_table_process_conn(p
->ct_ft
, ct
, ctinfo
);
1029 skb_push_rcsum(skb
, nh_ofs
);
1032 tcf_action_update_bstats(&c
->common
, skb
);
1034 qdisc_skb_cb(skb
)->pkt_len
= skb
->len
;
1038 tcf_action_inc_drop_qstats(&c
->common
);
1042 static const struct nla_policy ct_policy
[TCA_CT_MAX
+ 1] = {
1043 [TCA_CT_ACTION
] = { .type
= NLA_U16
},
1044 [TCA_CT_PARMS
] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_ct
)),
1045 [TCA_CT_ZONE
] = { .type
= NLA_U16
},
1046 [TCA_CT_MARK
] = { .type
= NLA_U32
},
1047 [TCA_CT_MARK_MASK
] = { .type
= NLA_U32
},
1048 [TCA_CT_LABELS
] = { .type
= NLA_BINARY
,
1049 .len
= 128 / BITS_PER_BYTE
},
1050 [TCA_CT_LABELS_MASK
] = { .type
= NLA_BINARY
,
1051 .len
= 128 / BITS_PER_BYTE
},
1052 [TCA_CT_NAT_IPV4_MIN
] = { .type
= NLA_U32
},
1053 [TCA_CT_NAT_IPV4_MAX
] = { .type
= NLA_U32
},
1054 [TCA_CT_NAT_IPV6_MIN
] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr
)),
1055 [TCA_CT_NAT_IPV6_MAX
] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr
)),
1056 [TCA_CT_NAT_PORT_MIN
] = { .type
= NLA_U16
},
1057 [TCA_CT_NAT_PORT_MAX
] = { .type
= NLA_U16
},
1060 static int tcf_ct_fill_params_nat(struct tcf_ct_params
*p
,
1063 struct netlink_ext_ack
*extack
)
1065 struct nf_nat_range2
*range
;
1067 if (!(p
->ct_action
& TCA_CT_ACT_NAT
))
1070 if (!IS_ENABLED(CONFIG_NF_NAT
)) {
1071 NL_SET_ERR_MSG_MOD(extack
, "Netfilter nat isn't enabled in kernel");
1075 if (!(p
->ct_action
& (TCA_CT_ACT_NAT_SRC
| TCA_CT_ACT_NAT_DST
)))
1078 if ((p
->ct_action
& TCA_CT_ACT_NAT_SRC
) &&
1079 (p
->ct_action
& TCA_CT_ACT_NAT_DST
)) {
1080 NL_SET_ERR_MSG_MOD(extack
, "dnat and snat can't be enabled at the same time");
1085 if (tb
[TCA_CT_NAT_IPV4_MIN
]) {
1086 struct nlattr
*max_attr
= tb
[TCA_CT_NAT_IPV4_MAX
];
1088 p
->ipv4_range
= true;
1089 range
->flags
|= NF_NAT_RANGE_MAP_IPS
;
1090 range
->min_addr
.ip
=
1091 nla_get_in_addr(tb
[TCA_CT_NAT_IPV4_MIN
]);
1093 range
->max_addr
.ip
= max_attr
?
1094 nla_get_in_addr(max_attr
) :
1096 } else if (tb
[TCA_CT_NAT_IPV6_MIN
]) {
1097 struct nlattr
*max_attr
= tb
[TCA_CT_NAT_IPV6_MAX
];
1099 p
->ipv4_range
= false;
1100 range
->flags
|= NF_NAT_RANGE_MAP_IPS
;
1101 range
->min_addr
.in6
=
1102 nla_get_in6_addr(tb
[TCA_CT_NAT_IPV6_MIN
]);
1104 range
->max_addr
.in6
= max_attr
?
1105 nla_get_in6_addr(max_attr
) :
1106 range
->min_addr
.in6
;
1109 if (tb
[TCA_CT_NAT_PORT_MIN
]) {
1110 range
->flags
|= NF_NAT_RANGE_PROTO_SPECIFIED
;
1111 range
->min_proto
.all
= nla_get_be16(tb
[TCA_CT_NAT_PORT_MIN
]);
1113 range
->max_proto
.all
= tb
[TCA_CT_NAT_PORT_MAX
] ?
1114 nla_get_be16(tb
[TCA_CT_NAT_PORT_MAX
]) :
1115 range
->min_proto
.all
;
1121 static void tcf_ct_set_key_val(struct nlattr
**tb
,
1122 void *val
, int val_type
,
1123 void *mask
, int mask_type
,
1128 nla_memcpy(val
, tb
[val_type
], len
);
1133 if (mask_type
== TCA_CT_UNSPEC
|| !tb
[mask_type
])
1134 memset(mask
, 0xff, len
);
1136 nla_memcpy(mask
, tb
[mask_type
], len
);
1139 static int tcf_ct_fill_params(struct net
*net
,
1140 struct tcf_ct_params
*p
,
1143 struct netlink_ext_ack
*extack
)
1145 struct tc_ct_action_net
*tn
= net_generic(net
, ct_net_id
);
1146 struct nf_conntrack_zone zone
;
1147 struct nf_conn
*tmpl
;
1150 p
->zone
= NF_CT_DEFAULT_ZONE_ID
;
1152 tcf_ct_set_key_val(tb
,
1153 &p
->ct_action
, TCA_CT_ACTION
,
1154 NULL
, TCA_CT_UNSPEC
,
1155 sizeof(p
->ct_action
));
1157 if (p
->ct_action
& TCA_CT_ACT_CLEAR
)
1160 err
= tcf_ct_fill_params_nat(p
, parm
, tb
, extack
);
1164 if (tb
[TCA_CT_MARK
]) {
1165 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK
)) {
1166 NL_SET_ERR_MSG_MOD(extack
, "Conntrack mark isn't enabled.");
1169 tcf_ct_set_key_val(tb
,
1170 &p
->mark
, TCA_CT_MARK
,
1171 &p
->mark_mask
, TCA_CT_MARK_MASK
,
1175 if (tb
[TCA_CT_LABELS
]) {
1176 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS
)) {
1177 NL_SET_ERR_MSG_MOD(extack
, "Conntrack labels isn't enabled.");
1182 NL_SET_ERR_MSG_MOD(extack
, "Failed to set connlabel length");
1185 tcf_ct_set_key_val(tb
,
1186 p
->labels
, TCA_CT_LABELS
,
1187 p
->labels_mask
, TCA_CT_LABELS_MASK
,
1191 if (tb
[TCA_CT_ZONE
]) {
1192 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES
)) {
1193 NL_SET_ERR_MSG_MOD(extack
, "Conntrack zones isn't enabled.");
1197 tcf_ct_set_key_val(tb
,
1198 &p
->zone
, TCA_CT_ZONE
,
1199 NULL
, TCA_CT_UNSPEC
,
1203 if (p
->zone
== NF_CT_DEFAULT_ZONE_ID
)
1206 nf_ct_zone_init(&zone
, p
->zone
, NF_CT_DEFAULT_ZONE_DIR
, 0);
1207 tmpl
= nf_ct_tmpl_alloc(net
, &zone
, GFP_KERNEL
);
1209 NL_SET_ERR_MSG_MOD(extack
, "Failed to allocate conntrack template");
1212 __set_bit(IPS_CONFIRMED_BIT
, &tmpl
->status
);
1213 nf_conntrack_get(&tmpl
->ct_general
);
1219 static int tcf_ct_init(struct net
*net
, struct nlattr
*nla
,
1220 struct nlattr
*est
, struct tc_action
**a
,
1221 int replace
, int bind
, bool rtnl_held
,
1222 struct tcf_proto
*tp
, u32 flags
,
1223 struct netlink_ext_ack
*extack
)
1225 struct tc_action_net
*tn
= net_generic(net
, ct_net_id
);
1226 struct tcf_ct_params
*params
= NULL
;
1227 struct nlattr
*tb
[TCA_CT_MAX
+ 1];
1228 struct tcf_chain
*goto_ch
= NULL
;
1235 NL_SET_ERR_MSG_MOD(extack
, "Ct requires attributes to be passed");
1239 err
= nla_parse_nested(tb
, TCA_CT_MAX
, nla
, ct_policy
, extack
);
1243 if (!tb
[TCA_CT_PARMS
]) {
1244 NL_SET_ERR_MSG_MOD(extack
, "Missing required ct parameters");
1247 parm
= nla_data(tb
[TCA_CT_PARMS
]);
1248 index
= parm
->index
;
1249 err
= tcf_idr_check_alloc(tn
, &index
, a
, bind
);
1254 err
= tcf_idr_create_from_flags(tn
, index
, est
, a
,
1255 &act_ct_ops
, bind
, flags
);
1257 tcf_idr_cleanup(tn
, index
);
1260 res
= ACT_P_CREATED
;
1266 tcf_idr_release(*a
, bind
);
1270 err
= tcf_action_check_ctrlact(parm
->action
, tp
, &goto_ch
, extack
);
1276 params
= kzalloc(sizeof(*params
), GFP_KERNEL
);
1277 if (unlikely(!params
)) {
1282 err
= tcf_ct_fill_params(net
, params
, parm
, tb
, extack
);
1286 err
= tcf_ct_flow_table_get(params
);
1290 spin_lock_bh(&c
->tcf_lock
);
1291 goto_ch
= tcf_action_set_ctrlact(*a
, parm
->action
, goto_ch
);
1292 params
= rcu_replace_pointer(c
->params
, params
,
1293 lockdep_is_held(&c
->tcf_lock
));
1294 spin_unlock_bh(&c
->tcf_lock
);
1297 tcf_chain_put_by_act(goto_ch
);
1299 call_rcu(¶ms
->rcu
, tcf_ct_params_free
);
1305 tcf_chain_put_by_act(goto_ch
);
1307 tcf_idr_release(*a
, bind
);
1311 static void tcf_ct_cleanup(struct tc_action
*a
)
1313 struct tcf_ct_params
*params
;
1314 struct tcf_ct
*c
= to_ct(a
);
1316 params
= rcu_dereference_protected(c
->params
, 1);
1318 call_rcu(¶ms
->rcu
, tcf_ct_params_free
);
1321 static int tcf_ct_dump_key_val(struct sk_buff
*skb
,
1322 void *val
, int val_type
,
1323 void *mask
, int mask_type
,
1328 if (mask
&& !memchr_inv(mask
, 0, len
))
1331 err
= nla_put(skb
, val_type
, len
, val
);
1335 if (mask_type
!= TCA_CT_UNSPEC
) {
1336 err
= nla_put(skb
, mask_type
, len
, mask
);
1344 static int tcf_ct_dump_nat(struct sk_buff
*skb
, struct tcf_ct_params
*p
)
1346 struct nf_nat_range2
*range
= &p
->range
;
1348 if (!(p
->ct_action
& TCA_CT_ACT_NAT
))
1351 if (!(p
->ct_action
& (TCA_CT_ACT_NAT_SRC
| TCA_CT_ACT_NAT_DST
)))
1354 if (range
->flags
& NF_NAT_RANGE_MAP_IPS
) {
1355 if (p
->ipv4_range
) {
1356 if (nla_put_in_addr(skb
, TCA_CT_NAT_IPV4_MIN
,
1357 range
->min_addr
.ip
))
1359 if (nla_put_in_addr(skb
, TCA_CT_NAT_IPV4_MAX
,
1360 range
->max_addr
.ip
))
1363 if (nla_put_in6_addr(skb
, TCA_CT_NAT_IPV6_MIN
,
1364 &range
->min_addr
.in6
))
1366 if (nla_put_in6_addr(skb
, TCA_CT_NAT_IPV6_MAX
,
1367 &range
->max_addr
.in6
))
1372 if (range
->flags
& NF_NAT_RANGE_PROTO_SPECIFIED
) {
1373 if (nla_put_be16(skb
, TCA_CT_NAT_PORT_MIN
,
1374 range
->min_proto
.all
))
1376 if (nla_put_be16(skb
, TCA_CT_NAT_PORT_MAX
,
1377 range
->max_proto
.all
))
1384 static inline int tcf_ct_dump(struct sk_buff
*skb
, struct tc_action
*a
,
1387 unsigned char *b
= skb_tail_pointer(skb
);
1388 struct tcf_ct
*c
= to_ct(a
);
1389 struct tcf_ct_params
*p
;
1391 struct tc_ct opt
= {
1392 .index
= c
->tcf_index
,
1393 .refcnt
= refcount_read(&c
->tcf_refcnt
) - ref
,
1394 .bindcnt
= atomic_read(&c
->tcf_bindcnt
) - bind
,
1398 spin_lock_bh(&c
->tcf_lock
);
1399 p
= rcu_dereference_protected(c
->params
,
1400 lockdep_is_held(&c
->tcf_lock
));
1401 opt
.action
= c
->tcf_action
;
1403 if (tcf_ct_dump_key_val(skb
,
1404 &p
->ct_action
, TCA_CT_ACTION
,
1405 NULL
, TCA_CT_UNSPEC
,
1406 sizeof(p
->ct_action
)))
1407 goto nla_put_failure
;
1409 if (p
->ct_action
& TCA_CT_ACT_CLEAR
)
1412 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK
) &&
1413 tcf_ct_dump_key_val(skb
,
1414 &p
->mark
, TCA_CT_MARK
,
1415 &p
->mark_mask
, TCA_CT_MARK_MASK
,
1417 goto nla_put_failure
;
1419 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS
) &&
1420 tcf_ct_dump_key_val(skb
,
1421 p
->labels
, TCA_CT_LABELS
,
1422 p
->labels_mask
, TCA_CT_LABELS_MASK
,
1424 goto nla_put_failure
;
1426 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES
) &&
1427 tcf_ct_dump_key_val(skb
,
1428 &p
->zone
, TCA_CT_ZONE
,
1429 NULL
, TCA_CT_UNSPEC
,
1431 goto nla_put_failure
;
1433 if (tcf_ct_dump_nat(skb
, p
))
1434 goto nla_put_failure
;
1437 if (nla_put(skb
, TCA_CT_PARMS
, sizeof(opt
), &opt
))
1438 goto nla_put_failure
;
1440 tcf_tm_dump(&t
, &c
->tcf_tm
);
1441 if (nla_put_64bit(skb
, TCA_CT_TM
, sizeof(t
), &t
, TCA_CT_PAD
))
1442 goto nla_put_failure
;
1443 spin_unlock_bh(&c
->tcf_lock
);
1447 spin_unlock_bh(&c
->tcf_lock
);
1452 static int tcf_ct_walker(struct net
*net
, struct sk_buff
*skb
,
1453 struct netlink_callback
*cb
, int type
,
1454 const struct tc_action_ops
*ops
,
1455 struct netlink_ext_ack
*extack
)
1457 struct tc_action_net
*tn
= net_generic(net
, ct_net_id
);
1459 return tcf_generic_walker(tn
, skb
, cb
, type
, ops
, extack
);
1462 static int tcf_ct_search(struct net
*net
, struct tc_action
**a
, u32 index
)
1464 struct tc_action_net
*tn
= net_generic(net
, ct_net_id
);
1466 return tcf_idr_search(tn
, a
, index
);
1469 static void tcf_stats_update(struct tc_action
*a
, u64 bytes
, u64 packets
,
1470 u64 drops
, u64 lastuse
, bool hw
)
1472 struct tcf_ct
*c
= to_ct(a
);
1474 tcf_action_update_stats(a
, bytes
, packets
, drops
, hw
);
1475 c
->tcf_tm
.lastuse
= max_t(u64
, c
->tcf_tm
.lastuse
, lastuse
);
1478 static struct tc_action_ops act_ct_ops
= {
1481 .owner
= THIS_MODULE
,
1483 .dump
= tcf_ct_dump
,
1484 .init
= tcf_ct_init
,
1485 .cleanup
= tcf_ct_cleanup
,
1486 .walk
= tcf_ct_walker
,
1487 .lookup
= tcf_ct_search
,
1488 .stats_update
= tcf_stats_update
,
1489 .size
= sizeof(struct tcf_ct
),
1492 static __net_init
int ct_init_net(struct net
*net
)
1494 unsigned int n_bits
= sizeof_field(struct tcf_ct_params
, labels
) * 8;
1495 struct tc_ct_action_net
*tn
= net_generic(net
, ct_net_id
);
1497 if (nf_connlabels_get(net
, n_bits
- 1)) {
1499 pr_err("act_ct: Failed to set connlabels length");
1504 return tc_action_net_init(net
, &tn
->tn
, &act_ct_ops
);
1507 static void __net_exit
ct_exit_net(struct list_head
*net_list
)
1512 list_for_each_entry(net
, net_list
, exit_list
) {
1513 struct tc_ct_action_net
*tn
= net_generic(net
, ct_net_id
);
1516 nf_connlabels_put(net
);
1520 tc_action_net_exit(net_list
, ct_net_id
);
1523 static struct pernet_operations ct_net_ops
= {
1524 .init
= ct_init_net
,
1525 .exit_batch
= ct_exit_net
,
1527 .size
= sizeof(struct tc_ct_action_net
),
1530 static int __init
ct_init_module(void)
1534 act_ct_wq
= alloc_ordered_workqueue("act_ct_workqueue", 0);
1538 err
= tcf_ct_flow_tables_init();
1542 err
= tcf_register_action(&act_ct_ops
, &ct_net_ops
);
1546 static_branch_inc(&tcf_frag_xmit_count
);
1551 tcf_ct_flow_tables_uninit();
1553 destroy_workqueue(act_ct_wq
);
1557 static void __exit
ct_cleanup_module(void)
1559 static_branch_dec(&tcf_frag_xmit_count
);
1560 tcf_unregister_action(&act_ct_ops
, &ct_net_ops
);
1561 tcf_ct_flow_tables_uninit();
1562 destroy_workqueue(act_ct_wq
);
1565 module_init(ct_init_module
);
1566 module_exit(ct_cleanup_module
);
1567 MODULE_AUTHOR("Paul Blakey <paulb@mellanox.com>");
1568 MODULE_AUTHOR("Yossi Kuperman <yossiku@mellanox.com>");
1569 MODULE_AUTHOR("Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>");
1570 MODULE_DESCRIPTION("Connection tracking action");
1571 MODULE_LICENSE("GPL v2");