1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
3 * net/sched/act_ct.c Connection Tracking action
5 * Authors: Paul Blakey <paulb@mellanox.com>
6 * Yossi Kuperman <yossiku@mellanox.com>
7 * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
10 #include <linux/module.h>
11 #include <linux/init.h>
12 #include <linux/kernel.h>
13 #include <linux/skbuff.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/pkt_cls.h>
17 #include <linux/ipv6.h>
18 #include <net/netlink.h>
19 #include <net/pkt_sched.h>
20 #include <net/pkt_cls.h>
21 #include <net/act_api.h>
23 #include <net/ipv6_frag.h>
24 #include <uapi/linux/tc_act/tc_ct.h>
25 #include <net/tc_act/tc_ct.h>
27 #include <net/netfilter/nf_conntrack.h>
28 #include <net/netfilter/nf_conntrack_core.h>
29 #include <net/netfilter/nf_conntrack_zones.h>
30 #include <net/netfilter/nf_conntrack_helper.h>
31 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
32 #include <uapi/linux/netfilter/nf_nat.h>
34 static struct tc_action_ops act_ct_ops
;
35 static unsigned int ct_net_id
;
37 struct tc_ct_action_net
{
38 struct tc_action_net tn
; /* Must be first */
42 /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */
43 static bool tcf_ct_skb_nfct_cached(struct net
*net
, struct sk_buff
*skb
,
44 u16 zone_id
, bool force
)
46 enum ip_conntrack_info ctinfo
;
49 ct
= nf_ct_get(skb
, &ctinfo
);
52 if (!net_eq(net
, read_pnet(&ct
->ct_net
)))
54 if (nf_ct_zone(ct
)->id
!= zone_id
)
57 /* Force conntrack entry direction. */
58 if (force
&& CTINFO2DIR(ctinfo
) != IP_CT_DIR_ORIGINAL
) {
59 if (nf_ct_is_confirmed(ct
))
62 nf_conntrack_put(&ct
->ct_general
);
63 nf_ct_set(skb
, NULL
, IP_CT_UNTRACKED
);
71 /* Trim the skb to the length specified by the IP/IPv6 header,
72 * removing any trailing lower-layer padding. This prepares the skb
73 * for higher-layer processing that assumes skb->len excludes padding
74 * (such as nf_ip_checksum). The caller needs to pull the skb to the
75 * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
77 static int tcf_ct_skb_network_trim(struct sk_buff
*skb
, int family
)
84 len
= ntohs(ip_hdr(skb
)->tot_len
);
87 len
= sizeof(struct ipv6hdr
)
88 + ntohs(ipv6_hdr(skb
)->payload_len
);
94 err
= pskb_trim_rcsum(skb
, len
);
99 static u8
tcf_ct_skb_nf_family(struct sk_buff
*skb
)
101 u8 family
= NFPROTO_UNSPEC
;
103 switch (skb_protocol(skb
, true)) {
104 case htons(ETH_P_IP
):
105 family
= NFPROTO_IPV4
;
107 case htons(ETH_P_IPV6
):
108 family
= NFPROTO_IPV6
;
117 static int tcf_ct_ipv4_is_fragment(struct sk_buff
*skb
, bool *frag
)
121 len
= skb_network_offset(skb
) + sizeof(struct iphdr
);
122 if (unlikely(skb
->len
< len
))
124 if (unlikely(!pskb_may_pull(skb
, len
)))
127 *frag
= ip_is_fragment(ip_hdr(skb
));
131 static int tcf_ct_ipv6_is_fragment(struct sk_buff
*skb
, bool *frag
)
133 unsigned int flags
= 0, len
, payload_ofs
= 0;
134 unsigned short frag_off
;
137 len
= skb_network_offset(skb
) + sizeof(struct ipv6hdr
);
138 if (unlikely(skb
->len
< len
))
140 if (unlikely(!pskb_may_pull(skb
, len
)))
143 nexthdr
= ipv6_find_hdr(skb
, &payload_ofs
, -1, &frag_off
, &flags
);
144 if (unlikely(nexthdr
< 0))
147 *frag
= flags
& IP6_FH_F_FRAG
;
151 static int tcf_ct_handle_fragments(struct net
*net
, struct sk_buff
*skb
,
154 enum ip_conntrack_info ctinfo
;
159 /* Previously seen (loopback)? Ignore. */
160 ct
= nf_ct_get(skb
, &ctinfo
);
161 if ((ct
&& !nf_ct_is_template(ct
)) || ctinfo
== IP_CT_UNTRACKED
)
164 if (family
== NFPROTO_IPV4
)
165 err
= tcf_ct_ipv4_is_fragment(skb
, &frag
);
167 err
= tcf_ct_ipv6_is_fragment(skb
, &frag
);
173 if (family
== NFPROTO_IPV4
) {
174 enum ip_defrag_users user
= IP_DEFRAG_CONNTRACK_IN
+ zone
;
176 memset(IPCB(skb
), 0, sizeof(struct inet_skb_parm
));
178 err
= ip_defrag(net
, skb
, user
);
180 if (err
&& err
!= -EINPROGRESS
)
182 } else { /* NFPROTO_IPV6 */
183 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
184 enum ip6_defrag_users user
= IP6_DEFRAG_CONNTRACK_IN
+ zone
;
186 memset(IP6CB(skb
), 0, sizeof(struct inet6_skb_parm
));
187 err
= nf_ct_frag6_gather(net
, skb
, user
);
188 if (err
&& err
!= -EINPROGRESS
)
205 static void tcf_ct_params_free(struct rcu_head
*head
)
207 struct tcf_ct_params
*params
= container_of(head
,
208 struct tcf_ct_params
, rcu
);
211 nf_conntrack_put(¶ms
->tmpl
->ct_general
);
215 #if IS_ENABLED(CONFIG_NF_NAT)
216 /* Modelled after nf_nat_ipv[46]_fn().
217 * range is only used for new, uninitialized NAT state.
218 * Returns either NF_ACCEPT or NF_DROP.
220 static int ct_nat_execute(struct sk_buff
*skb
, struct nf_conn
*ct
,
221 enum ip_conntrack_info ctinfo
,
222 const struct nf_nat_range2
*range
,
223 enum nf_nat_manip_type maniptype
)
225 __be16 proto
= skb_protocol(skb
, true);
226 int hooknum
, err
= NF_ACCEPT
;
228 /* See HOOK2MANIP(). */
229 if (maniptype
== NF_NAT_MANIP_SRC
)
230 hooknum
= NF_INET_LOCAL_IN
; /* Source NAT */
232 hooknum
= NF_INET_LOCAL_OUT
; /* Destination NAT */
236 case IP_CT_RELATED_REPLY
:
237 if (proto
== htons(ETH_P_IP
) &&
238 ip_hdr(skb
)->protocol
== IPPROTO_ICMP
) {
239 if (!nf_nat_icmp_reply_translation(skb
, ct
, ctinfo
,
243 } else if (IS_ENABLED(CONFIG_IPV6
) && proto
== htons(ETH_P_IPV6
)) {
245 u8 nexthdr
= ipv6_hdr(skb
)->nexthdr
;
246 int hdrlen
= ipv6_skip_exthdr(skb
,
247 sizeof(struct ipv6hdr
),
248 &nexthdr
, &frag_off
);
250 if (hdrlen
>= 0 && nexthdr
== IPPROTO_ICMPV6
) {
251 if (!nf_nat_icmpv6_reply_translation(skb
, ct
,
259 /* Non-ICMP, fall thru to initialize if needed. */
262 /* Seen it before? This can happen for loopback, retrans,
265 if (!nf_nat_initialized(ct
, maniptype
)) {
266 /* Initialize according to the NAT action. */
267 err
= (range
&& range
->flags
& NF_NAT_RANGE_MAP_IPS
)
268 /* Action is set up to establish a new
271 ? nf_nat_setup_info(ct
, range
, maniptype
)
272 : nf_nat_alloc_null_binding(ct
, hooknum
);
273 if (err
!= NF_ACCEPT
)
278 case IP_CT_ESTABLISHED
:
279 case IP_CT_ESTABLISHED_REPLY
:
287 err
= nf_nat_packet(ct
, ctinfo
, hooknum
, skb
);
291 #endif /* CONFIG_NF_NAT */
293 static void tcf_ct_act_set_mark(struct nf_conn
*ct
, u32 mark
, u32 mask
)
295 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
301 new_mark
= mark
| (ct
->mark
& ~(mask
));
302 if (ct
->mark
!= new_mark
) {
304 if (nf_ct_is_confirmed(ct
))
305 nf_conntrack_event_cache(IPCT_MARK
, ct
);
310 static void tcf_ct_act_set_labels(struct nf_conn
*ct
,
314 #if IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)
315 size_t labels_sz
= FIELD_SIZEOF(struct tcf_ct_params
, labels
);
317 if (!memchr_inv(labels_m
, 0, labels_sz
))
320 nf_connlabels_replace(ct
, labels
, labels_m
, 4);
324 static int tcf_ct_act_nat(struct sk_buff
*skb
,
326 enum ip_conntrack_info ctinfo
,
328 struct nf_nat_range2
*range
,
331 #if IS_ENABLED(CONFIG_NF_NAT)
333 enum nf_nat_manip_type maniptype
;
335 if (!(ct_action
& TCA_CT_ACT_NAT
))
338 /* Add NAT extension if not confirmed yet. */
339 if (!nf_ct_is_confirmed(ct
) && !nf_ct_nat_ext_add(ct
))
340 return NF_DROP
; /* Can't NAT. */
342 if (ctinfo
!= IP_CT_NEW
&& (ct
->status
& IPS_NAT_MASK
) &&
343 (ctinfo
!= IP_CT_RELATED
|| commit
)) {
344 /* NAT an established or related connection like before. */
345 if (CTINFO2DIR(ctinfo
) == IP_CT_DIR_REPLY
)
346 /* This is the REPLY direction for a connection
347 * for which NAT was applied in the forward
348 * direction. Do the reverse NAT.
350 maniptype
= ct
->status
& IPS_SRC_NAT
351 ? NF_NAT_MANIP_DST
: NF_NAT_MANIP_SRC
;
353 maniptype
= ct
->status
& IPS_SRC_NAT
354 ? NF_NAT_MANIP_SRC
: NF_NAT_MANIP_DST
;
355 } else if (ct_action
& TCA_CT_ACT_NAT_SRC
) {
356 maniptype
= NF_NAT_MANIP_SRC
;
357 } else if (ct_action
& TCA_CT_ACT_NAT_DST
) {
358 maniptype
= NF_NAT_MANIP_DST
;
363 err
= ct_nat_execute(skb
, ct
, ctinfo
, range
, maniptype
);
364 if (err
== NF_ACCEPT
&&
365 ct
->status
& IPS_SRC_NAT
&& ct
->status
& IPS_DST_NAT
) {
366 if (maniptype
== NF_NAT_MANIP_SRC
)
367 maniptype
= NF_NAT_MANIP_DST
;
369 maniptype
= NF_NAT_MANIP_SRC
;
371 err
= ct_nat_execute(skb
, ct
, ctinfo
, range
, maniptype
);
379 static int tcf_ct_act(struct sk_buff
*skb
, const struct tc_action
*a
,
380 struct tcf_result
*res
)
382 struct net
*net
= dev_net(skb
->dev
);
383 bool cached
, commit
, clear
, force
;
384 enum ip_conntrack_info ctinfo
;
385 struct tcf_ct
*c
= to_ct(a
);
386 struct nf_conn
*tmpl
= NULL
;
387 struct nf_hook_state state
;
388 int nh_ofs
, err
, retval
;
389 struct tcf_ct_params
*p
;
393 p
= rcu_dereference_bh(c
->params
);
395 retval
= READ_ONCE(c
->tcf_action
);
396 commit
= p
->ct_action
& TCA_CT_ACT_COMMIT
;
397 clear
= p
->ct_action
& TCA_CT_ACT_CLEAR
;
398 force
= p
->ct_action
& TCA_CT_ACT_FORCE
;
402 ct
= nf_ct_get(skb
, &ctinfo
);
404 nf_conntrack_put(&ct
->ct_general
);
405 nf_ct_set(skb
, NULL
, IP_CT_UNTRACKED
);
411 family
= tcf_ct_skb_nf_family(skb
);
412 if (family
== NFPROTO_UNSPEC
)
415 /* The conntrack module expects to be working at L3.
416 * We also try to pull the IPv4/6 header to linear area
418 nh_ofs
= skb_network_offset(skb
);
419 skb_pull_rcsum(skb
, nh_ofs
);
420 err
= tcf_ct_handle_fragments(net
, skb
, family
, p
->zone
);
421 if (err
== -EINPROGRESS
) {
422 retval
= TC_ACT_STOLEN
;
428 err
= tcf_ct_skb_network_trim(skb
, family
);
432 /* If we are recirculating packets to match on ct fields and
433 * committing with a separate ct action, then we don't need to
434 * actually run the packet through conntrack twice unless it's for a
437 cached
= tcf_ct_skb_nfct_cached(net
, skb
, p
->zone
, force
);
439 /* Associate skb with specified zone. */
441 ct
= nf_ct_get(skb
, &ctinfo
);
443 nf_conntrack_put(skb_nfct(skb
));
444 nf_conntrack_get(&tmpl
->ct_general
);
445 nf_ct_set(skb
, tmpl
, IP_CT_NEW
);
448 state
.hook
= NF_INET_PRE_ROUTING
;
451 err
= nf_conntrack_in(skb
, &state
);
452 if (err
!= NF_ACCEPT
)
456 ct
= nf_ct_get(skb
, &ctinfo
);
459 nf_ct_deliver_cached_events(ct
);
461 err
= tcf_ct_act_nat(skb
, ct
, ctinfo
, p
->ct_action
, &p
->range
, commit
);
462 if (err
!= NF_ACCEPT
)
466 tcf_ct_act_set_mark(ct
, p
->mark
, p
->mark_mask
);
467 tcf_ct_act_set_labels(ct
, p
->labels
, p
->labels_mask
);
469 /* This will take care of sending queued events
470 * even if the connection is already confirmed.
472 nf_conntrack_confirm(skb
);
476 skb_push_rcsum(skb
, nh_ofs
);
479 bstats_cpu_update(this_cpu_ptr(a
->cpu_bstats
), skb
);
483 qstats_drop_inc(this_cpu_ptr(a
->cpu_qstats
));
487 static const struct nla_policy ct_policy
[TCA_CT_MAX
+ 1] = {
488 [TCA_CT_UNSPEC
] = { .strict_start_type
= TCA_CT_UNSPEC
+ 1 },
489 [TCA_CT_ACTION
] = { .type
= NLA_U16
},
490 [TCA_CT_PARMS
] = { .type
= NLA_EXACT_LEN
, .len
= sizeof(struct tc_ct
) },
491 [TCA_CT_ZONE
] = { .type
= NLA_U16
},
492 [TCA_CT_MARK
] = { .type
= NLA_U32
},
493 [TCA_CT_MARK_MASK
] = { .type
= NLA_U32
},
494 [TCA_CT_LABELS
] = { .type
= NLA_BINARY
,
495 .len
= 128 / BITS_PER_BYTE
},
496 [TCA_CT_LABELS_MASK
] = { .type
= NLA_BINARY
,
497 .len
= 128 / BITS_PER_BYTE
},
498 [TCA_CT_NAT_IPV4_MIN
] = { .type
= NLA_U32
},
499 [TCA_CT_NAT_IPV4_MAX
] = { .type
= NLA_U32
},
500 [TCA_CT_NAT_IPV6_MIN
] = { .type
= NLA_EXACT_LEN
,
501 .len
= sizeof(struct in6_addr
) },
502 [TCA_CT_NAT_IPV6_MAX
] = { .type
= NLA_EXACT_LEN
,
503 .len
= sizeof(struct in6_addr
) },
504 [TCA_CT_NAT_PORT_MIN
] = { .type
= NLA_U16
},
505 [TCA_CT_NAT_PORT_MAX
] = { .type
= NLA_U16
},
508 static int tcf_ct_fill_params_nat(struct tcf_ct_params
*p
,
511 struct netlink_ext_ack
*extack
)
513 struct nf_nat_range2
*range
;
515 if (!(p
->ct_action
& TCA_CT_ACT_NAT
))
518 if (!IS_ENABLED(CONFIG_NF_NAT
)) {
519 NL_SET_ERR_MSG_MOD(extack
, "Netfilter nat isn't enabled in kernel");
523 if (!(p
->ct_action
& (TCA_CT_ACT_NAT_SRC
| TCA_CT_ACT_NAT_DST
)))
526 if ((p
->ct_action
& TCA_CT_ACT_NAT_SRC
) &&
527 (p
->ct_action
& TCA_CT_ACT_NAT_DST
)) {
528 NL_SET_ERR_MSG_MOD(extack
, "dnat and snat can't be enabled at the same time");
533 if (tb
[TCA_CT_NAT_IPV4_MIN
]) {
534 struct nlattr
*max_attr
= tb
[TCA_CT_NAT_IPV4_MAX
];
536 p
->ipv4_range
= true;
537 range
->flags
|= NF_NAT_RANGE_MAP_IPS
;
539 nla_get_in_addr(tb
[TCA_CT_NAT_IPV4_MIN
]);
541 range
->max_addr
.ip
= max_attr
?
542 nla_get_in_addr(max_attr
) :
544 } else if (tb
[TCA_CT_NAT_IPV6_MIN
]) {
545 struct nlattr
*max_attr
= tb
[TCA_CT_NAT_IPV6_MAX
];
547 p
->ipv4_range
= false;
548 range
->flags
|= NF_NAT_RANGE_MAP_IPS
;
549 range
->min_addr
.in6
=
550 nla_get_in6_addr(tb
[TCA_CT_NAT_IPV6_MIN
]);
552 range
->max_addr
.in6
= max_attr
?
553 nla_get_in6_addr(max_attr
) :
557 if (tb
[TCA_CT_NAT_PORT_MIN
]) {
558 range
->flags
|= NF_NAT_RANGE_PROTO_SPECIFIED
;
559 range
->min_proto
.all
= nla_get_be16(tb
[TCA_CT_NAT_PORT_MIN
]);
561 range
->max_proto
.all
= tb
[TCA_CT_NAT_PORT_MAX
] ?
562 nla_get_be16(tb
[TCA_CT_NAT_PORT_MAX
]) :
563 range
->min_proto
.all
;
569 static void tcf_ct_set_key_val(struct nlattr
**tb
,
570 void *val
, int val_type
,
571 void *mask
, int mask_type
,
576 nla_memcpy(val
, tb
[val_type
], len
);
581 if (mask_type
== TCA_CT_UNSPEC
|| !tb
[mask_type
])
582 memset(mask
, 0xff, len
);
584 nla_memcpy(mask
, tb
[mask_type
], len
);
587 static int tcf_ct_fill_params(struct net
*net
,
588 struct tcf_ct_params
*p
,
591 struct netlink_ext_ack
*extack
)
593 struct tc_ct_action_net
*tn
= net_generic(net
, ct_net_id
);
594 struct nf_conntrack_zone zone
;
595 struct nf_conn
*tmpl
;
598 p
->zone
= NF_CT_DEFAULT_ZONE_ID
;
600 tcf_ct_set_key_val(tb
,
601 &p
->ct_action
, TCA_CT_ACTION
,
603 sizeof(p
->ct_action
));
605 if (p
->ct_action
& TCA_CT_ACT_CLEAR
)
608 err
= tcf_ct_fill_params_nat(p
, parm
, tb
, extack
);
612 if (tb
[TCA_CT_MARK
]) {
613 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK
)) {
614 NL_SET_ERR_MSG_MOD(extack
, "Conntrack mark isn't enabled.");
617 tcf_ct_set_key_val(tb
,
618 &p
->mark
, TCA_CT_MARK
,
619 &p
->mark_mask
, TCA_CT_MARK_MASK
,
623 if (tb
[TCA_CT_LABELS
]) {
624 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS
)) {
625 NL_SET_ERR_MSG_MOD(extack
, "Conntrack labels isn't enabled.");
630 NL_SET_ERR_MSG_MOD(extack
, "Failed to set connlabel length");
633 tcf_ct_set_key_val(tb
,
634 p
->labels
, TCA_CT_LABELS
,
635 p
->labels_mask
, TCA_CT_LABELS_MASK
,
639 if (tb
[TCA_CT_ZONE
]) {
640 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES
)) {
641 NL_SET_ERR_MSG_MOD(extack
, "Conntrack zones isn't enabled.");
645 tcf_ct_set_key_val(tb
,
646 &p
->zone
, TCA_CT_ZONE
,
651 if (p
->zone
== NF_CT_DEFAULT_ZONE_ID
)
654 nf_ct_zone_init(&zone
, p
->zone
, NF_CT_DEFAULT_ZONE_DIR
, 0);
655 tmpl
= nf_ct_tmpl_alloc(net
, &zone
, GFP_KERNEL
);
657 NL_SET_ERR_MSG_MOD(extack
, "Failed to allocate conntrack template");
660 __set_bit(IPS_CONFIRMED_BIT
, &tmpl
->status
);
661 nf_conntrack_get(&tmpl
->ct_general
);
667 static int tcf_ct_init(struct net
*net
, struct nlattr
*nla
,
668 struct nlattr
*est
, struct tc_action
**a
,
669 int replace
, int bind
, bool rtnl_held
,
670 struct tcf_proto
*tp
,
671 struct netlink_ext_ack
*extack
)
673 struct tc_action_net
*tn
= net_generic(net
, ct_net_id
);
674 struct tcf_ct_params
*params
= NULL
;
675 struct nlattr
*tb
[TCA_CT_MAX
+ 1];
676 struct tcf_chain
*goto_ch
= NULL
;
683 NL_SET_ERR_MSG_MOD(extack
, "Ct requires attributes to be passed");
687 err
= nla_parse_nested(tb
, TCA_CT_MAX
, nla
, ct_policy
, extack
);
691 if (!tb
[TCA_CT_PARMS
]) {
692 NL_SET_ERR_MSG_MOD(extack
, "Missing required ct parameters");
695 parm
= nla_data(tb
[TCA_CT_PARMS
]);
697 err
= tcf_idr_check_alloc(tn
, &index
, a
, bind
);
702 err
= tcf_idr_create(tn
, index
, est
, a
,
703 &act_ct_ops
, bind
, true);
705 tcf_idr_cleanup(tn
, index
);
714 tcf_idr_release(*a
, bind
);
718 err
= tcf_action_check_ctrlact(parm
->action
, tp
, &goto_ch
, extack
);
724 params
= kzalloc(sizeof(*params
), GFP_KERNEL
);
725 if (unlikely(!params
)) {
730 err
= tcf_ct_fill_params(net
, params
, parm
, tb
, extack
);
734 spin_lock_bh(&c
->tcf_lock
);
735 goto_ch
= tcf_action_set_ctrlact(*a
, parm
->action
, goto_ch
);
736 rcu_swap_protected(c
->params
, params
, lockdep_is_held(&c
->tcf_lock
));
737 spin_unlock_bh(&c
->tcf_lock
);
740 tcf_chain_put_by_act(goto_ch
);
742 call_rcu(¶ms
->rcu
, tcf_ct_params_free
);
743 if (res
== ACT_P_CREATED
)
744 tcf_idr_insert(tn
, *a
);
750 tcf_chain_put_by_act(goto_ch
);
752 tcf_idr_release(*a
, bind
);
756 static void tcf_ct_cleanup(struct tc_action
*a
)
758 struct tcf_ct_params
*params
;
759 struct tcf_ct
*c
= to_ct(a
);
761 params
= rcu_dereference_protected(c
->params
, 1);
763 call_rcu(¶ms
->rcu
, tcf_ct_params_free
);
766 static int tcf_ct_dump_key_val(struct sk_buff
*skb
,
767 void *val
, int val_type
,
768 void *mask
, int mask_type
,
773 if (mask
&& !memchr_inv(mask
, 0, len
))
776 err
= nla_put(skb
, val_type
, len
, val
);
780 if (mask_type
!= TCA_CT_UNSPEC
) {
781 err
= nla_put(skb
, mask_type
, len
, mask
);
789 static int tcf_ct_dump_nat(struct sk_buff
*skb
, struct tcf_ct_params
*p
)
791 struct nf_nat_range2
*range
= &p
->range
;
793 if (!(p
->ct_action
& TCA_CT_ACT_NAT
))
796 if (!(p
->ct_action
& (TCA_CT_ACT_NAT_SRC
| TCA_CT_ACT_NAT_DST
)))
799 if (range
->flags
& NF_NAT_RANGE_MAP_IPS
) {
801 if (nla_put_in_addr(skb
, TCA_CT_NAT_IPV4_MIN
,
804 if (nla_put_in_addr(skb
, TCA_CT_NAT_IPV4_MAX
,
808 if (nla_put_in6_addr(skb
, TCA_CT_NAT_IPV6_MIN
,
809 &range
->min_addr
.in6
))
811 if (nla_put_in6_addr(skb
, TCA_CT_NAT_IPV6_MAX
,
812 &range
->max_addr
.in6
))
817 if (range
->flags
& NF_NAT_RANGE_PROTO_SPECIFIED
) {
818 if (nla_put_be16(skb
, TCA_CT_NAT_PORT_MIN
,
819 range
->min_proto
.all
))
821 if (nla_put_be16(skb
, TCA_CT_NAT_PORT_MAX
,
822 range
->max_proto
.all
))
829 static inline int tcf_ct_dump(struct sk_buff
*skb
, struct tc_action
*a
,
832 unsigned char *b
= skb_tail_pointer(skb
);
833 struct tcf_ct
*c
= to_ct(a
);
834 struct tcf_ct_params
*p
;
837 .index
= c
->tcf_index
,
838 .refcnt
= refcount_read(&c
->tcf_refcnt
) - ref
,
839 .bindcnt
= atomic_read(&c
->tcf_bindcnt
) - bind
,
843 spin_lock_bh(&c
->tcf_lock
);
844 p
= rcu_dereference_protected(c
->params
,
845 lockdep_is_held(&c
->tcf_lock
));
846 opt
.action
= c
->tcf_action
;
848 if (tcf_ct_dump_key_val(skb
,
849 &p
->ct_action
, TCA_CT_ACTION
,
851 sizeof(p
->ct_action
)))
852 goto nla_put_failure
;
854 if (p
->ct_action
& TCA_CT_ACT_CLEAR
)
857 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK
) &&
858 tcf_ct_dump_key_val(skb
,
859 &p
->mark
, TCA_CT_MARK
,
860 &p
->mark_mask
, TCA_CT_MARK_MASK
,
862 goto nla_put_failure
;
864 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS
) &&
865 tcf_ct_dump_key_val(skb
,
866 p
->labels
, TCA_CT_LABELS
,
867 p
->labels_mask
, TCA_CT_LABELS_MASK
,
869 goto nla_put_failure
;
871 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES
) &&
872 tcf_ct_dump_key_val(skb
,
873 &p
->zone
, TCA_CT_ZONE
,
876 goto nla_put_failure
;
878 if (tcf_ct_dump_nat(skb
, p
))
879 goto nla_put_failure
;
882 if (nla_put(skb
, TCA_CT_PARMS
, sizeof(opt
), &opt
))
883 goto nla_put_failure
;
885 tcf_tm_dump(&t
, &c
->tcf_tm
);
886 if (nla_put_64bit(skb
, TCA_CT_TM
, sizeof(t
), &t
, TCA_CT_PAD
))
887 goto nla_put_failure
;
888 spin_unlock_bh(&c
->tcf_lock
);
892 spin_unlock_bh(&c
->tcf_lock
);
897 static int tcf_ct_walker(struct net
*net
, struct sk_buff
*skb
,
898 struct netlink_callback
*cb
, int type
,
899 const struct tc_action_ops
*ops
,
900 struct netlink_ext_ack
*extack
)
902 struct tc_action_net
*tn
= net_generic(net
, ct_net_id
);
904 return tcf_generic_walker(tn
, skb
, cb
, type
, ops
, extack
);
907 static int tcf_ct_search(struct net
*net
, struct tc_action
**a
, u32 index
)
909 struct tc_action_net
*tn
= net_generic(net
, ct_net_id
);
911 return tcf_idr_search(tn
, a
, index
);
914 static void tcf_stats_update(struct tc_action
*a
, u64 bytes
, u32 packets
,
915 u64 lastuse
, bool hw
)
917 struct tcf_ct
*c
= to_ct(a
);
919 _bstats_cpu_update(this_cpu_ptr(a
->cpu_bstats
), bytes
, packets
);
922 _bstats_cpu_update(this_cpu_ptr(a
->cpu_bstats_hw
),
924 c
->tcf_tm
.lastuse
= max_t(u64
, c
->tcf_tm
.lastuse
, lastuse
);
927 static struct tc_action_ops act_ct_ops
= {
930 .owner
= THIS_MODULE
,
934 .cleanup
= tcf_ct_cleanup
,
935 .walk
= tcf_ct_walker
,
936 .lookup
= tcf_ct_search
,
937 .stats_update
= tcf_stats_update
,
938 .size
= sizeof(struct tcf_ct
),
941 static __net_init
int ct_init_net(struct net
*net
)
943 unsigned int n_bits
= FIELD_SIZEOF(struct tcf_ct_params
, labels
) * 8;
944 struct tc_ct_action_net
*tn
= net_generic(net
, ct_net_id
);
946 if (nf_connlabels_get(net
, n_bits
- 1)) {
948 pr_err("act_ct: Failed to set connlabels length");
953 return tc_action_net_init(net
, &tn
->tn
, &act_ct_ops
);
956 static void __net_exit
ct_exit_net(struct list_head
*net_list
)
961 list_for_each_entry(net
, net_list
, exit_list
) {
962 struct tc_ct_action_net
*tn
= net_generic(net
, ct_net_id
);
965 nf_connlabels_put(net
);
969 tc_action_net_exit(net_list
, ct_net_id
);
972 static struct pernet_operations ct_net_ops
= {
974 .exit_batch
= ct_exit_net
,
976 .size
= sizeof(struct tc_ct_action_net
),
979 static int __init
ct_init_module(void)
981 return tcf_register_action(&act_ct_ops
, &ct_net_ops
);
984 static void __exit
ct_cleanup_module(void)
986 tcf_unregister_action(&act_ct_ops
, &ct_net_ops
);
989 module_init(ct_init_module
);
990 module_exit(ct_cleanup_module
);
991 MODULE_AUTHOR("Paul Blakey <paulb@mellanox.com>");
992 MODULE_AUTHOR("Yossi Kuperman <yossiku@mellanox.com>");
993 MODULE_AUTHOR("Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>");
994 MODULE_DESCRIPTION("Connection tracking action");
995 MODULE_LICENSE("GPL v2");