1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
3 * net/sched/act_ct.c Connection Tracking action
5 * Authors: Paul Blakey <paulb@mellanox.com>
6 * Yossi Kuperman <yossiku@mellanox.com>
7 * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
10 #include <linux/module.h>
11 #include <linux/init.h>
12 #include <linux/kernel.h>
13 #include <linux/skbuff.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/pkt_cls.h>
17 #include <linux/ipv6.h>
18 #include <linux/rhashtable.h>
19 #include <net/netlink.h>
20 #include <net/pkt_sched.h>
21 #include <net/pkt_cls.h>
22 #include <net/act_api.h>
24 #include <net/ipv6_frag.h>
25 #include <uapi/linux/tc_act/tc_ct.h>
26 #include <net/tc_act/tc_ct.h>
27 #include <net/tc_wrapper.h>
29 #include <net/netfilter/nf_flow_table.h>
30 #include <net/netfilter/nf_conntrack.h>
31 #include <net/netfilter/nf_conntrack_core.h>
32 #include <net/netfilter/nf_conntrack_zones.h>
33 #include <net/netfilter/nf_conntrack_helper.h>
34 #include <net/netfilter/nf_conntrack_acct.h>
35 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
36 #include <net/netfilter/nf_conntrack_act_ct.h>
37 #include <net/netfilter/nf_conntrack_seqadj.h>
38 #include <uapi/linux/netfilter/nf_nat.h>
40 static struct workqueue_struct
*act_ct_wq
;
41 static struct rhashtable zones_ht
;
42 static DEFINE_MUTEX(zones_mutex
);
49 struct tcf_ct_flow_table
{
50 struct rhash_head node
; /* In zones tables */
52 struct rcu_work rwork
;
53 struct nf_flowtable nf_ft
;
55 struct zones_ht_key key
;
60 static const struct rhashtable_params zones_params
= {
61 .head_offset
= offsetof(struct tcf_ct_flow_table
, node
),
62 .key_offset
= offsetof(struct tcf_ct_flow_table
, key
),
63 .key_len
= offsetofend(struct zones_ht_key
, zone
),
64 .automatic_shrinking
= true,
67 static struct flow_action_entry
*
68 tcf_ct_flow_table_flow_action_get_next(struct flow_action
*flow_action
)
70 int i
= flow_action
->num_entries
++;
72 return &flow_action
->entries
[i
];
75 static void tcf_ct_add_mangle_action(struct flow_action
*action
,
76 enum flow_action_mangle_base htype
,
81 struct flow_action_entry
*entry
;
83 entry
= tcf_ct_flow_table_flow_action_get_next(action
);
84 entry
->id
= FLOW_ACTION_MANGLE
;
85 entry
->mangle
.htype
= htype
;
86 entry
->mangle
.mask
= ~mask
;
87 entry
->mangle
.offset
= offset
;
88 entry
->mangle
.val
= val
;
91 /* The following nat helper functions check if the inverted reverse tuple
92 * (target) is different then the current dir tuple - meaning nat for ports
93 * and/or ip is needed, and add the relevant mangle actions.
96 tcf_ct_flow_table_add_action_nat_ipv4(const struct nf_conntrack_tuple
*tuple
,
97 struct nf_conntrack_tuple target
,
98 struct flow_action
*action
)
100 if (memcmp(&target
.src
.u3
, &tuple
->src
.u3
, sizeof(target
.src
.u3
)))
101 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_IP4
,
102 offsetof(struct iphdr
, saddr
),
104 be32_to_cpu(target
.src
.u3
.ip
));
105 if (memcmp(&target
.dst
.u3
, &tuple
->dst
.u3
, sizeof(target
.dst
.u3
)))
106 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_IP4
,
107 offsetof(struct iphdr
, daddr
),
109 be32_to_cpu(target
.dst
.u3
.ip
));
113 tcf_ct_add_ipv6_addr_mangle_action(struct flow_action
*action
,
114 union nf_inet_addr
*addr
,
119 for (i
= 0; i
< sizeof(struct in6_addr
) / sizeof(u32
); i
++)
120 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_IP6
,
121 i
* sizeof(u32
) + offset
,
122 0xFFFFFFFF, be32_to_cpu(addr
->ip6
[i
]));
126 tcf_ct_flow_table_add_action_nat_ipv6(const struct nf_conntrack_tuple
*tuple
,
127 struct nf_conntrack_tuple target
,
128 struct flow_action
*action
)
130 if (memcmp(&target
.src
.u3
, &tuple
->src
.u3
, sizeof(target
.src
.u3
)))
131 tcf_ct_add_ipv6_addr_mangle_action(action
, &target
.src
.u3
,
132 offsetof(struct ipv6hdr
,
134 if (memcmp(&target
.dst
.u3
, &tuple
->dst
.u3
, sizeof(target
.dst
.u3
)))
135 tcf_ct_add_ipv6_addr_mangle_action(action
, &target
.dst
.u3
,
136 offsetof(struct ipv6hdr
,
141 tcf_ct_flow_table_add_action_nat_tcp(const struct nf_conntrack_tuple
*tuple
,
142 struct nf_conntrack_tuple target
,
143 struct flow_action
*action
)
145 __be16 target_src
= target
.src
.u
.tcp
.port
;
146 __be16 target_dst
= target
.dst
.u
.tcp
.port
;
148 if (target_src
!= tuple
->src
.u
.tcp
.port
)
149 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_TCP
,
150 offsetof(struct tcphdr
, source
),
151 0xFFFF, be16_to_cpu(target_src
));
152 if (target_dst
!= tuple
->dst
.u
.tcp
.port
)
153 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_TCP
,
154 offsetof(struct tcphdr
, dest
),
155 0xFFFF, be16_to_cpu(target_dst
));
159 tcf_ct_flow_table_add_action_nat_udp(const struct nf_conntrack_tuple
*tuple
,
160 struct nf_conntrack_tuple target
,
161 struct flow_action
*action
)
163 __be16 target_src
= target
.src
.u
.udp
.port
;
164 __be16 target_dst
= target
.dst
.u
.udp
.port
;
166 if (target_src
!= tuple
->src
.u
.udp
.port
)
167 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_UDP
,
168 offsetof(struct udphdr
, source
),
169 0xFFFF, be16_to_cpu(target_src
));
170 if (target_dst
!= tuple
->dst
.u
.udp
.port
)
171 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_UDP
,
172 offsetof(struct udphdr
, dest
),
173 0xFFFF, be16_to_cpu(target_dst
));
176 static void tcf_ct_flow_table_add_action_meta(struct nf_conn
*ct
,
177 enum ip_conntrack_dir dir
,
178 enum ip_conntrack_info ctinfo
,
179 struct flow_action
*action
)
181 struct nf_conn_labels
*ct_labels
;
182 struct flow_action_entry
*entry
;
185 entry
= tcf_ct_flow_table_flow_action_get_next(action
);
186 entry
->id
= FLOW_ACTION_CT_METADATA
;
187 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
188 entry
->ct_metadata
.mark
= READ_ONCE(ct
->mark
);
190 /* aligns with the CT reference on the SKB nf_ct_set */
191 entry
->ct_metadata
.cookie
= (unsigned long)ct
| ctinfo
;
192 entry
->ct_metadata
.orig_dir
= dir
== IP_CT_DIR_ORIGINAL
;
194 act_ct_labels
= entry
->ct_metadata
.labels
;
195 ct_labels
= nf_ct_labels_find(ct
);
197 memcpy(act_ct_labels
, ct_labels
->bits
, NF_CT_LABELS_MAX_SIZE
);
199 memset(act_ct_labels
, 0, NF_CT_LABELS_MAX_SIZE
);
202 static int tcf_ct_flow_table_add_action_nat(struct net
*net
,
204 enum ip_conntrack_dir dir
,
205 struct flow_action
*action
)
207 const struct nf_conntrack_tuple
*tuple
= &ct
->tuplehash
[dir
].tuple
;
208 struct nf_conntrack_tuple target
;
210 if (!(ct
->status
& IPS_NAT_MASK
))
213 nf_ct_invert_tuple(&target
, &ct
->tuplehash
[!dir
].tuple
);
215 switch (tuple
->src
.l3num
) {
217 tcf_ct_flow_table_add_action_nat_ipv4(tuple
, target
,
221 tcf_ct_flow_table_add_action_nat_ipv6(tuple
, target
,
228 switch (nf_ct_protonum(ct
)) {
230 tcf_ct_flow_table_add_action_nat_tcp(tuple
, target
, action
);
233 tcf_ct_flow_table_add_action_nat_udp(tuple
, target
, action
);
242 static int tcf_ct_flow_table_fill_actions(struct net
*net
,
243 struct flow_offload
*flow
,
244 enum flow_offload_tuple_dir tdir
,
245 struct nf_flow_rule
*flow_rule
)
247 struct flow_action
*action
= &flow_rule
->rule
->action
;
248 int num_entries
= action
->num_entries
;
249 struct nf_conn
*ct
= flow
->ct
;
250 enum ip_conntrack_info ctinfo
;
251 enum ip_conntrack_dir dir
;
255 case FLOW_OFFLOAD_DIR_ORIGINAL
:
256 dir
= IP_CT_DIR_ORIGINAL
;
257 ctinfo
= test_bit(IPS_SEEN_REPLY_BIT
, &ct
->status
) ?
258 IP_CT_ESTABLISHED
: IP_CT_NEW
;
259 if (ctinfo
== IP_CT_ESTABLISHED
)
260 set_bit(NF_FLOW_HW_ESTABLISHED
, &flow
->flags
);
262 case FLOW_OFFLOAD_DIR_REPLY
:
263 dir
= IP_CT_DIR_REPLY
;
264 ctinfo
= IP_CT_ESTABLISHED_REPLY
;
270 err
= tcf_ct_flow_table_add_action_nat(net
, ct
, dir
, action
);
274 tcf_ct_flow_table_add_action_meta(ct
, dir
, ctinfo
, action
);
278 /* Clear filled actions */
279 for (i
= num_entries
; i
< action
->num_entries
; i
++)
280 memset(&action
->entries
[i
], 0, sizeof(action
->entries
[i
]));
281 action
->num_entries
= num_entries
;
286 static bool tcf_ct_flow_is_outdated(const struct flow_offload
*flow
)
288 return test_bit(IPS_SEEN_REPLY_BIT
, &flow
->ct
->status
) &&
289 test_bit(IPS_HW_OFFLOAD_BIT
, &flow
->ct
->status
) &&
290 !test_bit(NF_FLOW_HW_PENDING
, &flow
->flags
) &&
291 !test_bit(NF_FLOW_HW_ESTABLISHED
, &flow
->flags
);
294 static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table
*ct_ft
);
296 static void tcf_ct_nf_get(struct nf_flowtable
*ft
)
298 struct tcf_ct_flow_table
*ct_ft
=
299 container_of(ft
, struct tcf_ct_flow_table
, nf_ft
);
301 tcf_ct_flow_table_get_ref(ct_ft
);
304 static void tcf_ct_flow_table_put(struct tcf_ct_flow_table
*ct_ft
);
306 static void tcf_ct_nf_put(struct nf_flowtable
*ft
)
308 struct tcf_ct_flow_table
*ct_ft
=
309 container_of(ft
, struct tcf_ct_flow_table
, nf_ft
);
311 tcf_ct_flow_table_put(ct_ft
);
314 static struct nf_flowtable_type flowtable_ct
= {
315 .gc
= tcf_ct_flow_is_outdated
,
316 .action
= tcf_ct_flow_table_fill_actions
,
317 .get
= tcf_ct_nf_get
,
318 .put
= tcf_ct_nf_put
,
319 .owner
= THIS_MODULE
,
322 static int tcf_ct_flow_table_get(struct net
*net
, struct tcf_ct_params
*params
)
324 struct zones_ht_key key
= { .net
= net
, .zone
= params
->zone
};
325 struct tcf_ct_flow_table
*ct_ft
;
328 mutex_lock(&zones_mutex
);
329 ct_ft
= rhashtable_lookup_fast(&zones_ht
, &key
, zones_params
);
330 if (ct_ft
&& refcount_inc_not_zero(&ct_ft
->ref
))
333 ct_ft
= kzalloc(sizeof(*ct_ft
), GFP_KERNEL
);
336 refcount_set(&ct_ft
->ref
, 1);
339 err
= rhashtable_insert_fast(&zones_ht
, &ct_ft
->node
, zones_params
);
343 ct_ft
->nf_ft
.type
= &flowtable_ct
;
344 ct_ft
->nf_ft
.flags
|= NF_FLOWTABLE_HW_OFFLOAD
|
345 NF_FLOWTABLE_COUNTER
;
346 err
= nf_flow_table_init(&ct_ft
->nf_ft
);
349 write_pnet(&ct_ft
->nf_ft
.net
, net
);
351 __module_get(THIS_MODULE
);
353 params
->ct_ft
= ct_ft
;
354 params
->nf_ft
= &ct_ft
->nf_ft
;
355 mutex_unlock(&zones_mutex
);
360 rhashtable_remove_fast(&zones_ht
, &ct_ft
->node
, zones_params
);
364 mutex_unlock(&zones_mutex
);
368 static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table
*ct_ft
)
370 refcount_inc(&ct_ft
->ref
);
373 static void tcf_ct_flow_table_cleanup_work(struct work_struct
*work
)
375 struct tcf_ct_flow_table
*ct_ft
;
376 struct flow_block
*block
;
378 ct_ft
= container_of(to_rcu_work(work
), struct tcf_ct_flow_table
,
380 nf_flow_table_free(&ct_ft
->nf_ft
);
382 block
= &ct_ft
->nf_ft
.flow_block
;
383 down_write(&ct_ft
->nf_ft
.flow_block_lock
);
384 WARN_ON(!list_empty(&block
->cb_list
));
385 up_write(&ct_ft
->nf_ft
.flow_block_lock
);
388 module_put(THIS_MODULE
);
391 static void tcf_ct_flow_table_put(struct tcf_ct_flow_table
*ct_ft
)
393 if (refcount_dec_and_test(&ct_ft
->ref
)) {
394 rhashtable_remove_fast(&zones_ht
, &ct_ft
->node
, zones_params
);
395 INIT_RCU_WORK(&ct_ft
->rwork
, tcf_ct_flow_table_cleanup_work
);
396 queue_rcu_work(act_ct_wq
, &ct_ft
->rwork
);
400 static void tcf_ct_flow_tc_ifidx(struct flow_offload
*entry
,
401 struct nf_conn_act_ct_ext
*act_ct_ext
, u8 dir
)
403 entry
->tuplehash
[dir
].tuple
.xmit_type
= FLOW_OFFLOAD_XMIT_TC
;
404 entry
->tuplehash
[dir
].tuple
.tc
.iifidx
= act_ct_ext
->ifindex
[dir
];
407 static void tcf_ct_flow_ct_ext_ifidx_update(struct flow_offload
*entry
)
409 struct nf_conn_act_ct_ext
*act_ct_ext
;
411 act_ct_ext
= nf_conn_act_ct_ext_find(entry
->ct
);
413 tcf_ct_flow_tc_ifidx(entry
, act_ct_ext
, FLOW_OFFLOAD_DIR_ORIGINAL
);
414 tcf_ct_flow_tc_ifidx(entry
, act_ct_ext
, FLOW_OFFLOAD_DIR_REPLY
);
418 static void tcf_ct_flow_table_add(struct tcf_ct_flow_table
*ct_ft
,
420 bool tcp
, bool bidirectional
)
422 struct nf_conn_act_ct_ext
*act_ct_ext
;
423 struct flow_offload
*entry
;
426 if (test_and_set_bit(IPS_OFFLOAD_BIT
, &ct
->status
))
429 entry
= flow_offload_alloc(ct
);
436 ct
->proto
.tcp
.seen
[0].flags
|= IP_CT_TCP_FLAG_BE_LIBERAL
;
437 ct
->proto
.tcp
.seen
[1].flags
|= IP_CT_TCP_FLAG_BE_LIBERAL
;
440 __set_bit(NF_FLOW_HW_BIDIRECTIONAL
, &entry
->flags
);
442 act_ct_ext
= nf_conn_act_ct_ext_find(ct
);
444 tcf_ct_flow_tc_ifidx(entry
, act_ct_ext
, FLOW_OFFLOAD_DIR_ORIGINAL
);
445 tcf_ct_flow_tc_ifidx(entry
, act_ct_ext
, FLOW_OFFLOAD_DIR_REPLY
);
448 err
= flow_offload_add(&ct_ft
->nf_ft
, entry
);
455 flow_offload_free(entry
);
457 clear_bit(IPS_OFFLOAD_BIT
, &ct
->status
);
460 static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table
*ct_ft
,
462 enum ip_conntrack_info ctinfo
)
464 bool tcp
= false, bidirectional
= true;
466 switch (nf_ct_protonum(ct
)) {
468 if ((ctinfo
!= IP_CT_ESTABLISHED
&&
469 ctinfo
!= IP_CT_ESTABLISHED_REPLY
) ||
470 !test_bit(IPS_ASSURED_BIT
, &ct
->status
) ||
471 ct
->proto
.tcp
.state
!= TCP_CONNTRACK_ESTABLISHED
)
477 if (!nf_ct_is_confirmed(ct
))
479 if (!test_bit(IPS_ASSURED_BIT
, &ct
->status
))
480 bidirectional
= false;
482 #ifdef CONFIG_NF_CT_PROTO_GRE
484 struct nf_conntrack_tuple
*tuple
;
486 if ((ctinfo
!= IP_CT_ESTABLISHED
&&
487 ctinfo
!= IP_CT_ESTABLISHED_REPLY
) ||
488 !test_bit(IPS_ASSURED_BIT
, &ct
->status
) ||
489 ct
->status
& IPS_NAT_MASK
)
492 tuple
= &ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
;
493 /* No support for GRE v1 */
494 if (tuple
->src
.u
.gre
.key
|| tuple
->dst
.u
.gre
.key
)
503 if (nf_ct_ext_exist(ct
, NF_CT_EXT_HELPER
) ||
504 ct
->status
& IPS_SEQ_ADJUST
)
507 tcf_ct_flow_table_add(ct_ft
, ct
, tcp
, bidirectional
);
511 tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff
*skb
,
512 struct flow_offload_tuple
*tuple
,
513 struct tcphdr
**tcph
)
515 struct flow_ports
*ports
;
521 if (!pskb_network_may_pull(skb
, sizeof(*iph
)))
525 thoff
= iph
->ihl
* 4;
527 if (ip_is_fragment(iph
) ||
528 unlikely(thoff
!= sizeof(struct iphdr
)))
531 ipproto
= iph
->protocol
;
534 hdrsize
= sizeof(struct tcphdr
);
537 hdrsize
= sizeof(*ports
);
539 #ifdef CONFIG_NF_CT_PROTO_GRE
541 hdrsize
= sizeof(struct gre_base_hdr
);
551 if (!pskb_network_may_pull(skb
, thoff
+ hdrsize
))
556 *tcph
= (void *)(skb_network_header(skb
) + thoff
);
559 ports
= (struct flow_ports
*)(skb_network_header(skb
) + thoff
);
560 tuple
->src_port
= ports
->source
;
561 tuple
->dst_port
= ports
->dest
;
564 struct gre_base_hdr
*greh
;
566 greh
= (struct gre_base_hdr
*)(skb_network_header(skb
) + thoff
);
567 if ((greh
->flags
& GRE_VERSION
) != GRE_VERSION_0
)
575 tuple
->src_v4
.s_addr
= iph
->saddr
;
576 tuple
->dst_v4
.s_addr
= iph
->daddr
;
577 tuple
->l3proto
= AF_INET
;
578 tuple
->l4proto
= ipproto
;
584 tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff
*skb
,
585 struct flow_offload_tuple
*tuple
,
586 struct tcphdr
**tcph
)
588 struct flow_ports
*ports
;
589 struct ipv6hdr
*ip6h
;
594 if (!pskb_network_may_pull(skb
, sizeof(*ip6h
)))
597 ip6h
= ipv6_hdr(skb
);
598 thoff
= sizeof(*ip6h
);
600 nexthdr
= ip6h
->nexthdr
;
603 hdrsize
= sizeof(struct tcphdr
);
606 hdrsize
= sizeof(*ports
);
608 #ifdef CONFIG_NF_CT_PROTO_GRE
610 hdrsize
= sizeof(struct gre_base_hdr
);
617 if (ip6h
->hop_limit
<= 1)
620 if (!pskb_network_may_pull(skb
, thoff
+ hdrsize
))
625 *tcph
= (void *)(skb_network_header(skb
) + thoff
);
628 ports
= (struct flow_ports
*)(skb_network_header(skb
) + thoff
);
629 tuple
->src_port
= ports
->source
;
630 tuple
->dst_port
= ports
->dest
;
633 struct gre_base_hdr
*greh
;
635 greh
= (struct gre_base_hdr
*)(skb_network_header(skb
) + thoff
);
636 if ((greh
->flags
& GRE_VERSION
) != GRE_VERSION_0
)
642 ip6h
= ipv6_hdr(skb
);
644 tuple
->src_v6
= ip6h
->saddr
;
645 tuple
->dst_v6
= ip6h
->daddr
;
646 tuple
->l3proto
= AF_INET6
;
647 tuple
->l4proto
= nexthdr
;
652 static bool tcf_ct_flow_table_lookup(struct tcf_ct_params
*p
,
656 struct nf_flowtable
*nf_ft
= &p
->ct_ft
->nf_ft
;
657 struct flow_offload_tuple_rhash
*tuplehash
;
658 struct flow_offload_tuple tuple
= {};
659 enum ip_conntrack_info ctinfo
;
660 struct tcphdr
*tcph
= NULL
;
661 bool force_refresh
= false;
662 struct flow_offload
*flow
;
668 if (!tcf_ct_flow_table_fill_tuple_ipv4(skb
, &tuple
, &tcph
))
672 if (!tcf_ct_flow_table_fill_tuple_ipv6(skb
, &tuple
, &tcph
))
679 tuplehash
= flow_offload_lookup(nf_ft
, &tuple
);
683 dir
= tuplehash
->tuple
.dir
;
684 flow
= container_of(tuplehash
, struct flow_offload
, tuplehash
[dir
]);
687 if (dir
== FLOW_OFFLOAD_DIR_REPLY
&&
688 !test_bit(NF_FLOW_HW_BIDIRECTIONAL
, &flow
->flags
)) {
689 /* Only offload reply direction after connection became
692 if (test_bit(IPS_ASSURED_BIT
, &ct
->status
))
693 set_bit(NF_FLOW_HW_BIDIRECTIONAL
, &flow
->flags
);
694 else if (test_bit(NF_FLOW_HW_ESTABLISHED
, &flow
->flags
))
695 /* If flow_table flow has already been updated to the
696 * established state, then don't refresh.
699 force_refresh
= true;
702 if (tcph
&& (unlikely(tcph
->fin
|| tcph
->rst
))) {
703 flow_offload_teardown(flow
);
707 if (dir
== FLOW_OFFLOAD_DIR_ORIGINAL
)
708 ctinfo
= test_bit(IPS_SEEN_REPLY_BIT
, &ct
->status
) ?
709 IP_CT_ESTABLISHED
: IP_CT_NEW
;
711 ctinfo
= IP_CT_ESTABLISHED_REPLY
;
713 nf_conn_act_ct_ext_fill(skb
, ct
, ctinfo
);
714 tcf_ct_flow_ct_ext_ifidx_update(flow
);
715 flow_offload_refresh(nf_ft
, flow
, force_refresh
);
716 if (!test_bit(IPS_ASSURED_BIT
, &ct
->status
)) {
717 /* Process this flow in SW to allow promoting to ASSURED */
721 nf_conntrack_get(&ct
->ct_general
);
722 nf_ct_set(skb
, ct
, ctinfo
);
723 if (nf_ft
->flags
& NF_FLOWTABLE_COUNTER
)
724 nf_ct_acct_update(ct
, dir
, skb
->len
);
729 static int tcf_ct_flow_tables_init(void)
731 return rhashtable_init(&zones_ht
, &zones_params
);
734 static void tcf_ct_flow_tables_uninit(void)
736 rhashtable_destroy(&zones_ht
);
739 static struct tc_action_ops act_ct_ops
;
741 struct tc_ct_action_net
{
742 struct tc_action_net tn
; /* Must be first */
745 /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */
746 static bool tcf_ct_skb_nfct_cached(struct net
*net
, struct sk_buff
*skb
,
747 struct tcf_ct_params
*p
)
749 enum ip_conntrack_info ctinfo
;
752 ct
= nf_ct_get(skb
, &ctinfo
);
755 if (!net_eq(net
, read_pnet(&ct
->ct_net
)))
757 if (nf_ct_zone(ct
)->id
!= p
->zone
)
760 struct nf_conn_help
*help
;
762 help
= nf_ct_ext_find(ct
, NF_CT_EXT_HELPER
);
763 if (help
&& rcu_access_pointer(help
->helper
) != p
->helper
)
767 /* Force conntrack entry direction. */
768 if ((p
->ct_action
& TCA_CT_ACT_FORCE
) &&
769 CTINFO2DIR(ctinfo
) != IP_CT_DIR_ORIGINAL
) {
770 if (nf_ct_is_confirmed(ct
))
780 nf_ct_set(skb
, NULL
, IP_CT_UNTRACKED
);
785 static u8
tcf_ct_skb_nf_family(struct sk_buff
*skb
)
787 u8 family
= NFPROTO_UNSPEC
;
789 switch (skb_protocol(skb
, true)) {
790 case htons(ETH_P_IP
):
791 family
= NFPROTO_IPV4
;
793 case htons(ETH_P_IPV6
):
794 family
= NFPROTO_IPV6
;
803 static int tcf_ct_ipv4_is_fragment(struct sk_buff
*skb
, bool *frag
)
807 len
= skb_network_offset(skb
) + sizeof(struct iphdr
);
808 if (unlikely(skb
->len
< len
))
810 if (unlikely(!pskb_may_pull(skb
, len
)))
813 *frag
= ip_is_fragment(ip_hdr(skb
));
817 static int tcf_ct_ipv6_is_fragment(struct sk_buff
*skb
, bool *frag
)
819 unsigned int flags
= 0, len
, payload_ofs
= 0;
820 unsigned short frag_off
;
823 len
= skb_network_offset(skb
) + sizeof(struct ipv6hdr
);
824 if (unlikely(skb
->len
< len
))
826 if (unlikely(!pskb_may_pull(skb
, len
)))
829 nexthdr
= ipv6_find_hdr(skb
, &payload_ofs
, -1, &frag_off
, &flags
);
830 if (unlikely(nexthdr
< 0))
833 *frag
= flags
& IP6_FH_F_FRAG
;
837 static int tcf_ct_handle_fragments(struct net
*net
, struct sk_buff
*skb
,
838 u8 family
, u16 zone
, bool *defrag
)
840 enum ip_conntrack_info ctinfo
;
847 /* Previously seen (loopback)? Ignore. */
848 ct
= nf_ct_get(skb
, &ctinfo
);
849 if ((ct
&& !nf_ct_is_template(ct
)) || ctinfo
== IP_CT_UNTRACKED
)
852 if (family
== NFPROTO_IPV4
)
853 err
= tcf_ct_ipv4_is_fragment(skb
, &frag
);
855 err
= tcf_ct_ipv6_is_fragment(skb
, &frag
);
859 err
= nf_ct_handle_fragments(net
, skb
, zone
, family
, &proto
, &mru
);
864 tc_skb_cb(skb
)->mru
= mru
;
869 static void tcf_ct_params_free(struct tcf_ct_params
*params
)
871 if (params
->helper
) {
872 #if IS_ENABLED(CONFIG_NF_NAT)
873 if (params
->ct_action
& TCA_CT_ACT_NAT
)
874 nf_nat_helper_put(params
->helper
);
876 nf_conntrack_helper_put(params
->helper
);
879 tcf_ct_flow_table_put(params
->ct_ft
);
881 if (params
->put_labels
)
882 nf_connlabels_put(nf_ct_net(params
->tmpl
));
884 nf_ct_put(params
->tmpl
);
890 static void tcf_ct_params_free_rcu(struct rcu_head
*head
)
892 struct tcf_ct_params
*params
;
894 params
= container_of(head
, struct tcf_ct_params
, rcu
);
895 tcf_ct_params_free(params
);
898 static void tcf_ct_act_set_mark(struct nf_conn
*ct
, u32 mark
, u32 mask
)
900 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
906 new_mark
= mark
| (READ_ONCE(ct
->mark
) & ~(mask
));
907 if (READ_ONCE(ct
->mark
) != new_mark
) {
908 WRITE_ONCE(ct
->mark
, new_mark
);
909 if (nf_ct_is_confirmed(ct
))
910 nf_conntrack_event_cache(IPCT_MARK
, ct
);
915 static void tcf_ct_act_set_labels(struct nf_conn
*ct
,
919 #if IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)
920 size_t labels_sz
= sizeof_field(struct tcf_ct_params
, labels
);
922 if (!memchr_inv(labels_m
, 0, labels_sz
))
925 nf_connlabels_replace(ct
, labels
, labels_m
, 4);
929 static int tcf_ct_act_nat(struct sk_buff
*skb
,
931 enum ip_conntrack_info ctinfo
,
933 struct nf_nat_range2
*range
,
936 #if IS_ENABLED(CONFIG_NF_NAT)
939 if (!(ct_action
& TCA_CT_ACT_NAT
))
941 if (ct_action
& TCA_CT_ACT_NAT_SRC
)
942 action
|= BIT(NF_NAT_MANIP_SRC
);
943 if (ct_action
& TCA_CT_ACT_NAT_DST
)
944 action
|= BIT(NF_NAT_MANIP_DST
);
946 err
= nf_ct_nat(skb
, ct
, ctinfo
, &action
, range
, commit
);
947 if (err
!= NF_ACCEPT
)
948 return err
& NF_VERDICT_MASK
;
950 if (action
& BIT(NF_NAT_MANIP_SRC
))
951 tc_skb_cb(skb
)->post_ct_snat
= 1;
952 if (action
& BIT(NF_NAT_MANIP_DST
))
953 tc_skb_cb(skb
)->post_ct_dnat
= 1;
961 TC_INDIRECT_SCOPE
int tcf_ct_act(struct sk_buff
*skb
, const struct tc_action
*a
,
962 struct tcf_result
*res
)
964 struct net
*net
= dev_net(skb
->dev
);
965 enum ip_conntrack_info ctinfo
;
966 struct tcf_ct
*c
= to_ct(a
);
967 struct nf_conn
*tmpl
= NULL
;
968 struct nf_hook_state state
;
969 bool cached
, commit
, clear
;
970 int nh_ofs
, err
, retval
;
971 struct tcf_ct_params
*p
;
972 bool add_helper
= false;
973 bool skip_add
= false;
978 p
= rcu_dereference_bh(c
->params
);
980 retval
= READ_ONCE(c
->tcf_action
);
981 commit
= p
->ct_action
& TCA_CT_ACT_COMMIT
;
982 clear
= p
->ct_action
& TCA_CT_ACT_CLEAR
;
985 tcf_lastuse_update(&c
->tcf_tm
);
986 tcf_action_update_bstats(&c
->common
, skb
);
989 tc_skb_cb(skb
)->post_ct
= false;
990 ct
= nf_ct_get(skb
, &ctinfo
);
993 nf_ct_set(skb
, NULL
, IP_CT_UNTRACKED
);
999 family
= tcf_ct_skb_nf_family(skb
);
1000 if (family
== NFPROTO_UNSPEC
)
1003 /* The conntrack module expects to be working at L3.
1004 * We also try to pull the IPv4/6 header to linear area
1006 nh_ofs
= skb_network_offset(skb
);
1007 skb_pull_rcsum(skb
, nh_ofs
);
1008 err
= tcf_ct_handle_fragments(net
, skb
, family
, p
->zone
, &defrag
);
1012 err
= nf_ct_skb_network_trim(skb
, family
);
1016 /* If we are recirculating packets to match on ct fields and
1017 * committing with a separate ct action, then we don't need to
1018 * actually run the packet through conntrack twice unless it's for a
1021 cached
= tcf_ct_skb_nfct_cached(net
, skb
, p
);
1023 if (tcf_ct_flow_table_lookup(p
, skb
, family
)) {
1028 /* Associate skb with specified zone. */
1030 nf_conntrack_put(skb_nfct(skb
));
1031 nf_conntrack_get(&tmpl
->ct_general
);
1032 nf_ct_set(skb
, tmpl
, IP_CT_NEW
);
1035 state
.hook
= NF_INET_PRE_ROUTING
;
1038 err
= nf_conntrack_in(skb
, &state
);
1039 if (err
!= NF_ACCEPT
)
1044 ct
= nf_ct_get(skb
, &ctinfo
);
1047 nf_ct_deliver_cached_events(ct
);
1048 nf_conn_act_ct_ext_fill(skb
, ct
, ctinfo
);
1050 err
= tcf_ct_act_nat(skb
, ct
, ctinfo
, p
->ct_action
, &p
->range
, commit
);
1051 if (err
!= NF_ACCEPT
)
1054 if (!nf_ct_is_confirmed(ct
) && commit
&& p
->helper
&& !nfct_help(ct
)) {
1055 err
= __nf_ct_try_assign_helper(ct
, p
->tmpl
, GFP_ATOMIC
);
1059 if (p
->ct_action
& TCA_CT_ACT_NAT
&& !nfct_seqadj(ct
)) {
1060 if (!nfct_seqadj_ext_add(ct
))
1065 if (nf_ct_is_confirmed(ct
) ? ((!cached
&& !skip_add
) || add_helper
) : commit
) {
1066 err
= nf_ct_helper(skb
, ct
, ctinfo
, family
);
1067 if (err
!= NF_ACCEPT
)
1072 tcf_ct_act_set_mark(ct
, p
->mark
, p
->mark_mask
);
1073 tcf_ct_act_set_labels(ct
, p
->labels
, p
->labels_mask
);
1075 if (!nf_ct_is_confirmed(ct
))
1076 nf_conn_act_ct_ext_add(skb
, ct
, ctinfo
);
1078 /* This will take care of sending queued events
1079 * even if the connection is already confirmed.
1081 err
= nf_conntrack_confirm(skb
);
1082 if (err
!= NF_ACCEPT
)
1085 /* The ct may be dropped if a clash has been resolved,
1086 * so it's necessary to retrieve it from skb again to
1089 ct
= nf_ct_get(skb
, &ctinfo
);
1095 tcf_ct_flow_table_process_conn(p
->ct_ft
, ct
, ctinfo
);
1098 skb_push_rcsum(skb
, nh_ofs
);
1100 tc_skb_cb(skb
)->post_ct
= true;
1101 tc_skb_cb(skb
)->zone
= p
->zone
;
1104 qdisc_skb_cb(skb
)->pkt_len
= skb
->len
;
1108 if (err
!= -EINPROGRESS
)
1109 tcf_action_inc_drop_qstats(&c
->common
);
1110 return TC_ACT_CONSUMED
;
1113 tcf_action_inc_drop_qstats(&c
->common
);
1117 /* some verdicts store extra data in upper bits, such
1118 * as errno or queue number.
1120 switch (err
& NF_VERDICT_MASK
) {
1124 tcf_action_inc_drop_qstats(&c
->common
);
1125 return TC_ACT_CONSUMED
;
1127 DEBUG_NET_WARN_ON_ONCE(1);
1132 static const struct nla_policy ct_policy
[TCA_CT_MAX
+ 1] = {
1133 [TCA_CT_ACTION
] = { .type
= NLA_U16
},
1134 [TCA_CT_PARMS
] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_ct
)),
1135 [TCA_CT_ZONE
] = { .type
= NLA_U16
},
1136 [TCA_CT_MARK
] = { .type
= NLA_U32
},
1137 [TCA_CT_MARK_MASK
] = { .type
= NLA_U32
},
1138 [TCA_CT_LABELS
] = { .type
= NLA_BINARY
,
1139 .len
= 128 / BITS_PER_BYTE
},
1140 [TCA_CT_LABELS_MASK
] = { .type
= NLA_BINARY
,
1141 .len
= 128 / BITS_PER_BYTE
},
1142 [TCA_CT_NAT_IPV4_MIN
] = { .type
= NLA_U32
},
1143 [TCA_CT_NAT_IPV4_MAX
] = { .type
= NLA_U32
},
1144 [TCA_CT_NAT_IPV6_MIN
] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr
)),
1145 [TCA_CT_NAT_IPV6_MAX
] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr
)),
1146 [TCA_CT_NAT_PORT_MIN
] = { .type
= NLA_U16
},
1147 [TCA_CT_NAT_PORT_MAX
] = { .type
= NLA_U16
},
1148 [TCA_CT_HELPER_NAME
] = { .type
= NLA_STRING
, .len
= NF_CT_HELPER_NAME_LEN
},
1149 [TCA_CT_HELPER_FAMILY
] = { .type
= NLA_U8
},
1150 [TCA_CT_HELPER_PROTO
] = { .type
= NLA_U8
},
1153 static int tcf_ct_fill_params_nat(struct tcf_ct_params
*p
,
1156 struct netlink_ext_ack
*extack
)
1158 struct nf_nat_range2
*range
;
1160 if (!(p
->ct_action
& TCA_CT_ACT_NAT
))
1163 if (!IS_ENABLED(CONFIG_NF_NAT
)) {
1164 NL_SET_ERR_MSG_MOD(extack
, "Netfilter nat isn't enabled in kernel");
1168 if (!(p
->ct_action
& (TCA_CT_ACT_NAT_SRC
| TCA_CT_ACT_NAT_DST
)))
1171 if ((p
->ct_action
& TCA_CT_ACT_NAT_SRC
) &&
1172 (p
->ct_action
& TCA_CT_ACT_NAT_DST
)) {
1173 NL_SET_ERR_MSG_MOD(extack
, "dnat and snat can't be enabled at the same time");
1178 if (tb
[TCA_CT_NAT_IPV4_MIN
]) {
1179 struct nlattr
*max_attr
= tb
[TCA_CT_NAT_IPV4_MAX
];
1181 p
->ipv4_range
= true;
1182 range
->flags
|= NF_NAT_RANGE_MAP_IPS
;
1183 range
->min_addr
.ip
=
1184 nla_get_in_addr(tb
[TCA_CT_NAT_IPV4_MIN
]);
1186 range
->max_addr
.ip
=
1187 nla_get_in_addr_default(max_attr
, range
->min_addr
.ip
);
1188 } else if (tb
[TCA_CT_NAT_IPV6_MIN
]) {
1189 struct nlattr
*max_attr
= tb
[TCA_CT_NAT_IPV6_MAX
];
1191 p
->ipv4_range
= false;
1192 range
->flags
|= NF_NAT_RANGE_MAP_IPS
;
1193 range
->min_addr
.in6
=
1194 nla_get_in6_addr(tb
[TCA_CT_NAT_IPV6_MIN
]);
1196 range
->max_addr
.in6
= max_attr
?
1197 nla_get_in6_addr(max_attr
) :
1198 range
->min_addr
.in6
;
1201 if (tb
[TCA_CT_NAT_PORT_MIN
]) {
1202 range
->flags
|= NF_NAT_RANGE_PROTO_SPECIFIED
;
1203 range
->min_proto
.all
= nla_get_be16(tb
[TCA_CT_NAT_PORT_MIN
]);
1205 range
->max_proto
.all
= tb
[TCA_CT_NAT_PORT_MAX
] ?
1206 nla_get_be16(tb
[TCA_CT_NAT_PORT_MAX
]) :
1207 range
->min_proto
.all
;
1213 static void tcf_ct_set_key_val(struct nlattr
**tb
,
1214 void *val
, int val_type
,
1215 void *mask
, int mask_type
,
1220 nla_memcpy(val
, tb
[val_type
], len
);
1225 if (mask_type
== TCA_CT_UNSPEC
|| !tb
[mask_type
])
1226 memset(mask
, 0xff, len
);
1228 nla_memcpy(mask
, tb
[mask_type
], len
);
1231 static int tcf_ct_fill_params(struct net
*net
,
1232 struct tcf_ct_params
*p
,
1235 struct netlink_ext_ack
*extack
)
1237 struct nf_conntrack_zone zone
;
1238 int err
, family
, proto
, len
;
1239 bool put_labels
= false;
1240 struct nf_conn
*tmpl
;
1243 p
->zone
= NF_CT_DEFAULT_ZONE_ID
;
1245 tcf_ct_set_key_val(tb
,
1246 &p
->ct_action
, TCA_CT_ACTION
,
1247 NULL
, TCA_CT_UNSPEC
,
1248 sizeof(p
->ct_action
));
1250 if (p
->ct_action
& TCA_CT_ACT_CLEAR
)
1253 err
= tcf_ct_fill_params_nat(p
, parm
, tb
, extack
);
1257 if (tb
[TCA_CT_MARK
]) {
1258 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK
)) {
1259 NL_SET_ERR_MSG_MOD(extack
, "Conntrack mark isn't enabled.");
1262 tcf_ct_set_key_val(tb
,
1263 &p
->mark
, TCA_CT_MARK
,
1264 &p
->mark_mask
, TCA_CT_MARK_MASK
,
1268 if (tb
[TCA_CT_LABELS
]) {
1269 unsigned int n_bits
= sizeof_field(struct tcf_ct_params
, labels
) * 8;
1271 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS
)) {
1272 NL_SET_ERR_MSG_MOD(extack
, "Conntrack labels isn't enabled.");
1276 if (nf_connlabels_get(net
, n_bits
- 1)) {
1277 NL_SET_ERR_MSG_MOD(extack
, "Failed to set connlabel length");
1283 tcf_ct_set_key_val(tb
,
1284 p
->labels
, TCA_CT_LABELS
,
1285 p
->labels_mask
, TCA_CT_LABELS_MASK
,
1289 if (tb
[TCA_CT_ZONE
]) {
1290 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES
)) {
1291 NL_SET_ERR_MSG_MOD(extack
, "Conntrack zones isn't enabled.");
1295 tcf_ct_set_key_val(tb
,
1296 &p
->zone
, TCA_CT_ZONE
,
1297 NULL
, TCA_CT_UNSPEC
,
1301 nf_ct_zone_init(&zone
, p
->zone
, NF_CT_DEFAULT_ZONE_DIR
, 0);
1302 tmpl
= nf_ct_tmpl_alloc(net
, &zone
, GFP_KERNEL
);
1304 NL_SET_ERR_MSG_MOD(extack
, "Failed to allocate conntrack template");
1308 if (tb
[TCA_CT_HELPER_NAME
]) {
1309 name
= nla_data(tb
[TCA_CT_HELPER_NAME
]);
1310 len
= nla_len(tb
[TCA_CT_HELPER_NAME
]);
1311 if (len
> 16 || name
[len
- 1] != '\0') {
1312 NL_SET_ERR_MSG_MOD(extack
, "Failed to parse helper name.");
1316 family
= nla_get_u8_default(tb
[TCA_CT_HELPER_FAMILY
], AF_INET
);
1317 proto
= nla_get_u8_default(tb
[TCA_CT_HELPER_PROTO
],
1319 err
= nf_ct_add_helper(tmpl
, name
, family
, proto
,
1320 p
->ct_action
& TCA_CT_ACT_NAT
, &p
->helper
);
1322 NL_SET_ERR_MSG_MOD(extack
, "Failed to add helper");
1327 p
->put_labels
= put_labels
;
1329 if (p
->ct_action
& TCA_CT_ACT_COMMIT
)
1330 __set_bit(IPS_CONFIRMED_BIT
, &tmpl
->status
);
1334 nf_connlabels_put(net
);
1341 static int tcf_ct_init(struct net
*net
, struct nlattr
*nla
,
1342 struct nlattr
*est
, struct tc_action
**a
,
1343 struct tcf_proto
*tp
, u32 flags
,
1344 struct netlink_ext_ack
*extack
)
1346 struct tc_action_net
*tn
= net_generic(net
, act_ct_ops
.net_id
);
1347 bool bind
= flags
& TCA_ACT_FLAGS_BIND
;
1348 struct tcf_ct_params
*params
= NULL
;
1349 struct nlattr
*tb
[TCA_CT_MAX
+ 1];
1350 struct tcf_chain
*goto_ch
= NULL
;
1357 NL_SET_ERR_MSG_MOD(extack
, "Ct requires attributes to be passed");
1361 err
= nla_parse_nested(tb
, TCA_CT_MAX
, nla
, ct_policy
, extack
);
1365 if (!tb
[TCA_CT_PARMS
]) {
1366 NL_SET_ERR_MSG_MOD(extack
, "Missing required ct parameters");
1369 parm
= nla_data(tb
[TCA_CT_PARMS
]);
1370 index
= parm
->index
;
1371 err
= tcf_idr_check_alloc(tn
, &index
, a
, bind
);
1376 err
= tcf_idr_create_from_flags(tn
, index
, est
, a
,
1377 &act_ct_ops
, bind
, flags
);
1379 tcf_idr_cleanup(tn
, index
);
1382 res
= ACT_P_CREATED
;
1387 if (!(flags
& TCA_ACT_FLAGS_REPLACE
)) {
1388 tcf_idr_release(*a
, bind
);
1392 err
= tcf_action_check_ctrlact(parm
->action
, tp
, &goto_ch
, extack
);
1398 params
= kzalloc(sizeof(*params
), GFP_KERNEL
);
1399 if (unlikely(!params
)) {
1404 err
= tcf_ct_fill_params(net
, params
, parm
, tb
, extack
);
1408 err
= tcf_ct_flow_table_get(net
, params
);
1412 spin_lock_bh(&c
->tcf_lock
);
1413 goto_ch
= tcf_action_set_ctrlact(*a
, parm
->action
, goto_ch
);
1414 params
= rcu_replace_pointer(c
->params
, params
,
1415 lockdep_is_held(&c
->tcf_lock
));
1416 spin_unlock_bh(&c
->tcf_lock
);
1419 tcf_chain_put_by_act(goto_ch
);
1421 call_rcu(¶ms
->rcu
, tcf_ct_params_free_rcu
);
1427 tcf_chain_put_by_act(goto_ch
);
1429 tcf_ct_params_free(params
);
1430 tcf_idr_release(*a
, bind
);
1434 static void tcf_ct_cleanup(struct tc_action
*a
)
1436 struct tcf_ct_params
*params
;
1437 struct tcf_ct
*c
= to_ct(a
);
1439 params
= rcu_dereference_protected(c
->params
, 1);
1441 call_rcu(¶ms
->rcu
, tcf_ct_params_free_rcu
);
1444 static int tcf_ct_dump_key_val(struct sk_buff
*skb
,
1445 void *val
, int val_type
,
1446 void *mask
, int mask_type
,
1451 if (mask
&& !memchr_inv(mask
, 0, len
))
1454 err
= nla_put(skb
, val_type
, len
, val
);
1458 if (mask_type
!= TCA_CT_UNSPEC
) {
1459 err
= nla_put(skb
, mask_type
, len
, mask
);
1467 static int tcf_ct_dump_nat(struct sk_buff
*skb
, struct tcf_ct_params
*p
)
1469 struct nf_nat_range2
*range
= &p
->range
;
1471 if (!(p
->ct_action
& TCA_CT_ACT_NAT
))
1474 if (!(p
->ct_action
& (TCA_CT_ACT_NAT_SRC
| TCA_CT_ACT_NAT_DST
)))
1477 if (range
->flags
& NF_NAT_RANGE_MAP_IPS
) {
1478 if (p
->ipv4_range
) {
1479 if (nla_put_in_addr(skb
, TCA_CT_NAT_IPV4_MIN
,
1480 range
->min_addr
.ip
))
1482 if (nla_put_in_addr(skb
, TCA_CT_NAT_IPV4_MAX
,
1483 range
->max_addr
.ip
))
1486 if (nla_put_in6_addr(skb
, TCA_CT_NAT_IPV6_MIN
,
1487 &range
->min_addr
.in6
))
1489 if (nla_put_in6_addr(skb
, TCA_CT_NAT_IPV6_MAX
,
1490 &range
->max_addr
.in6
))
1495 if (range
->flags
& NF_NAT_RANGE_PROTO_SPECIFIED
) {
1496 if (nla_put_be16(skb
, TCA_CT_NAT_PORT_MIN
,
1497 range
->min_proto
.all
))
1499 if (nla_put_be16(skb
, TCA_CT_NAT_PORT_MAX
,
1500 range
->max_proto
.all
))
1507 static int tcf_ct_dump_helper(struct sk_buff
*skb
, struct nf_conntrack_helper
*helper
)
1512 if (nla_put_string(skb
, TCA_CT_HELPER_NAME
, helper
->name
) ||
1513 nla_put_u8(skb
, TCA_CT_HELPER_FAMILY
, helper
->tuple
.src
.l3num
) ||
1514 nla_put_u8(skb
, TCA_CT_HELPER_PROTO
, helper
->tuple
.dst
.protonum
))
1520 static inline int tcf_ct_dump(struct sk_buff
*skb
, struct tc_action
*a
,
1523 unsigned char *b
= skb_tail_pointer(skb
);
1524 struct tcf_ct
*c
= to_ct(a
);
1525 struct tcf_ct_params
*p
;
1527 struct tc_ct opt
= {
1528 .index
= c
->tcf_index
,
1529 .refcnt
= refcount_read(&c
->tcf_refcnt
) - ref
,
1530 .bindcnt
= atomic_read(&c
->tcf_bindcnt
) - bind
,
1534 spin_lock_bh(&c
->tcf_lock
);
1535 p
= rcu_dereference_protected(c
->params
,
1536 lockdep_is_held(&c
->tcf_lock
));
1537 opt
.action
= c
->tcf_action
;
1539 if (tcf_ct_dump_key_val(skb
,
1540 &p
->ct_action
, TCA_CT_ACTION
,
1541 NULL
, TCA_CT_UNSPEC
,
1542 sizeof(p
->ct_action
)))
1543 goto nla_put_failure
;
1545 if (p
->ct_action
& TCA_CT_ACT_CLEAR
)
1548 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK
) &&
1549 tcf_ct_dump_key_val(skb
,
1550 &p
->mark
, TCA_CT_MARK
,
1551 &p
->mark_mask
, TCA_CT_MARK_MASK
,
1553 goto nla_put_failure
;
1555 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS
) &&
1556 tcf_ct_dump_key_val(skb
,
1557 p
->labels
, TCA_CT_LABELS
,
1558 p
->labels_mask
, TCA_CT_LABELS_MASK
,
1560 goto nla_put_failure
;
1562 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES
) &&
1563 tcf_ct_dump_key_val(skb
,
1564 &p
->zone
, TCA_CT_ZONE
,
1565 NULL
, TCA_CT_UNSPEC
,
1567 goto nla_put_failure
;
1569 if (tcf_ct_dump_nat(skb
, p
))
1570 goto nla_put_failure
;
1572 if (tcf_ct_dump_helper(skb
, p
->helper
))
1573 goto nla_put_failure
;
1576 if (nla_put(skb
, TCA_CT_PARMS
, sizeof(opt
), &opt
))
1577 goto nla_put_failure
;
1579 tcf_tm_dump(&t
, &c
->tcf_tm
);
1580 if (nla_put_64bit(skb
, TCA_CT_TM
, sizeof(t
), &t
, TCA_CT_PAD
))
1581 goto nla_put_failure
;
1582 spin_unlock_bh(&c
->tcf_lock
);
1586 spin_unlock_bh(&c
->tcf_lock
);
1591 static void tcf_stats_update(struct tc_action
*a
, u64 bytes
, u64 packets
,
1592 u64 drops
, u64 lastuse
, bool hw
)
1594 struct tcf_ct
*c
= to_ct(a
);
1596 tcf_action_update_stats(a
, bytes
, packets
, drops
, hw
);
1597 c
->tcf_tm
.lastuse
= max_t(u64
, c
->tcf_tm
.lastuse
, lastuse
);
1600 static int tcf_ct_offload_act_setup(struct tc_action
*act
, void *entry_data
,
1601 u32
*index_inc
, bool bind
,
1602 struct netlink_ext_ack
*extack
)
1605 struct flow_action_entry
*entry
= entry_data
;
1607 if (tcf_ct_helper(act
))
1610 entry
->id
= FLOW_ACTION_CT
;
1611 entry
->ct
.action
= tcf_ct_action(act
);
1612 entry
->ct
.zone
= tcf_ct_zone(act
);
1613 entry
->ct
.flow_table
= tcf_ct_ft(act
);
1616 struct flow_offload_action
*fl_action
= entry_data
;
1618 fl_action
->id
= FLOW_ACTION_CT
;
1624 static struct tc_action_ops act_ct_ops
= {
1627 .owner
= THIS_MODULE
,
1629 .dump
= tcf_ct_dump
,
1630 .init
= tcf_ct_init
,
1631 .cleanup
= tcf_ct_cleanup
,
1632 .stats_update
= tcf_stats_update
,
1633 .offload_act_setup
= tcf_ct_offload_act_setup
,
1634 .size
= sizeof(struct tcf_ct
),
1636 MODULE_ALIAS_NET_ACT("ct");
1638 static __net_init
int ct_init_net(struct net
*net
)
1640 struct tc_ct_action_net
*tn
= net_generic(net
, act_ct_ops
.net_id
);
1642 return tc_action_net_init(net
, &tn
->tn
, &act_ct_ops
);
1645 static void __net_exit
ct_exit_net(struct list_head
*net_list
)
1647 tc_action_net_exit(net_list
, act_ct_ops
.net_id
);
1650 static struct pernet_operations ct_net_ops
= {
1651 .init
= ct_init_net
,
1652 .exit_batch
= ct_exit_net
,
1653 .id
= &act_ct_ops
.net_id
,
1654 .size
= sizeof(struct tc_ct_action_net
),
1657 static int __init
ct_init_module(void)
1661 act_ct_wq
= alloc_ordered_workqueue("act_ct_workqueue", 0);
1665 err
= tcf_ct_flow_tables_init();
1669 err
= tcf_register_action(&act_ct_ops
, &ct_net_ops
);
1673 static_branch_inc(&tcf_frag_xmit_count
);
1678 tcf_ct_flow_tables_uninit();
1680 destroy_workqueue(act_ct_wq
);
1684 static void __exit
ct_cleanup_module(void)
1686 static_branch_dec(&tcf_frag_xmit_count
);
1687 tcf_unregister_action(&act_ct_ops
, &ct_net_ops
);
1688 tcf_ct_flow_tables_uninit();
1689 destroy_workqueue(act_ct_wq
);
1692 module_init(ct_init_module
);
1693 module_exit(ct_cleanup_module
);
1694 MODULE_AUTHOR("Paul Blakey <paulb@mellanox.com>");
1695 MODULE_AUTHOR("Yossi Kuperman <yossiku@mellanox.com>");
1696 MODULE_AUTHOR("Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>");
1697 MODULE_DESCRIPTION("Connection tracking action");
1698 MODULE_LICENSE("GPL v2");