1 // SPDX-License-Identifier: GPL-2.0
3 #include <linux/types.h>
4 #include <linux/netfilter.h>
5 #include <linux/module.h>
6 #include <linux/slab.h>
7 #include <linux/mutex.h>
8 #include <linux/vmalloc.h>
9 #include <linux/stddef.h>
10 #include <linux/err.h>
11 #include <linux/percpu.h>
12 #include <linux/notifier.h>
13 #include <linux/kernel.h>
14 #include <linux/netdevice.h>
16 #include <net/netfilter/nf_conntrack.h>
17 #include <net/netfilter/nf_conntrack_l4proto.h>
18 #include <net/netfilter/nf_conntrack_core.h>
19 #include <net/netfilter/nf_conntrack_bridge.h>
20 #include <net/netfilter/nf_log.h>
23 #include <linux/icmp.h>
24 #include <linux/sysctl.h>
25 #include <net/route.h>
28 #include <linux/netfilter_ipv4.h>
29 #include <linux/netfilter_ipv6.h>
30 #include <linux/netfilter_ipv6/ip6_tables.h>
31 #include <net/netfilter/nf_conntrack_helper.h>
32 #include <net/netfilter/nf_conntrack_zones.h>
33 #include <net/netfilter/nf_conntrack_seqadj.h>
34 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
35 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
36 #include <net/netfilter/nf_nat_helper.h>
37 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
38 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
40 #include <linux/ipv6.h>
41 #include <linux/in6.h>
43 #include <net/inet_frag.h>
45 static DEFINE_MUTEX(nf_ct_proto_mutex
);
49 void nf_l4proto_log_invalid(const struct sk_buff
*skb
,
50 const struct nf_hook_state
*state
,
54 struct net
*net
= state
->net
;
58 if (net
->ct
.sysctl_log_invalid
!= protonum
&&
59 net
->ct
.sysctl_log_invalid
!= IPPROTO_RAW
)
66 nf_log_packet(net
, state
->pf
, 0, skb
, state
->in
, state
->out
,
67 NULL
, "nf_ct_proto_%d: %pV ", protonum
, &vaf
);
70 EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid
);
73 void nf_ct_l4proto_log_invalid(const struct sk_buff
*skb
,
74 const struct nf_conn
*ct
,
75 const struct nf_hook_state
*state
,
83 if (likely(net
->ct
.sysctl_log_invalid
== 0))
90 nf_l4proto_log_invalid(skb
, state
,
91 nf_ct_protonum(ct
), "%pV", &vaf
);
94 EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid
);
97 const struct nf_conntrack_l4proto
*nf_ct_l4proto_find(u8 l4proto
)
100 case IPPROTO_UDP
: return &nf_conntrack_l4proto_udp
;
101 case IPPROTO_TCP
: return &nf_conntrack_l4proto_tcp
;
102 case IPPROTO_ICMP
: return &nf_conntrack_l4proto_icmp
;
103 #ifdef CONFIG_NF_CT_PROTO_DCCP
104 case IPPROTO_DCCP
: return &nf_conntrack_l4proto_dccp
;
106 #ifdef CONFIG_NF_CT_PROTO_SCTP
107 case IPPROTO_SCTP
: return &nf_conntrack_l4proto_sctp
;
109 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
110 case IPPROTO_UDPLITE
: return &nf_conntrack_l4proto_udplite
;
112 #ifdef CONFIG_NF_CT_PROTO_GRE
113 case IPPROTO_GRE
: return &nf_conntrack_l4proto_gre
;
115 #if IS_ENABLED(CONFIG_IPV6)
116 case IPPROTO_ICMPV6
: return &nf_conntrack_l4proto_icmpv6
;
117 #endif /* CONFIG_IPV6 */
120 return &nf_conntrack_l4proto_generic
;
122 EXPORT_SYMBOL_GPL(nf_ct_l4proto_find
);
124 static bool in_vrf_postrouting(const struct nf_hook_state
*state
)
126 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
127 if (state
->hook
== NF_INET_POST_ROUTING
&&
128 netif_is_l3_master(state
->out
))
134 unsigned int nf_confirm(void *priv
,
136 const struct nf_hook_state
*state
)
138 const struct nf_conn_help
*help
;
139 enum ip_conntrack_info ctinfo
;
140 unsigned int protoff
;
147 ct
= nf_ct_get(skb
, &ctinfo
);
148 if (!ct
|| in_vrf_postrouting(state
))
151 help
= nfct_help(ct
);
153 seqadj_needed
= test_bit(IPS_SEQ_ADJUST_BIT
, &ct
->status
) && !nf_is_loopback_packet(skb
);
154 if (!help
&& !seqadj_needed
)
155 return nf_conntrack_confirm(skb
);
157 /* helper->help() do not expect ICMP packets */
158 if (ctinfo
== IP_CT_RELATED_REPLY
)
159 return nf_conntrack_confirm(skb
);
161 switch (nf_ct_l3num(ct
)) {
163 protoff
= skb_network_offset(skb
) + ip_hdrlen(skb
);
166 pnum
= ipv6_hdr(skb
)->nexthdr
;
167 start
= ipv6_skip_exthdr(skb
, sizeof(struct ipv6hdr
), &pnum
, &frag_off
);
168 if (start
< 0 || (frag_off
& htons(~0x7)) != 0)
169 return nf_conntrack_confirm(skb
);
174 return nf_conntrack_confirm(skb
);
178 const struct nf_conntrack_helper
*helper
;
181 /* rcu_read_lock()ed by nf_hook */
182 helper
= rcu_dereference(help
->helper
);
184 ret
= helper
->help(skb
,
187 if (ret
!= NF_ACCEPT
)
193 !nf_ct_seq_adjust(skb
, ct
, ctinfo
, protoff
)) {
194 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct
), drop
);
198 /* We've seen it coming out the other side: confirm it */
199 return nf_conntrack_confirm(skb
);
201 EXPORT_SYMBOL_GPL(nf_confirm
);
203 static unsigned int ipv4_conntrack_in(void *priv
,
205 const struct nf_hook_state
*state
)
207 return nf_conntrack_in(skb
, state
);
210 static unsigned int ipv4_conntrack_local(void *priv
,
212 const struct nf_hook_state
*state
)
214 if (ip_is_fragment(ip_hdr(skb
))) { /* IP_NODEFRAG setsockopt set */
215 enum ip_conntrack_info ctinfo
;
216 struct nf_conn
*tmpl
;
218 tmpl
= nf_ct_get(skb
, &ctinfo
);
219 if (tmpl
&& nf_ct_is_template(tmpl
)) {
220 /* when skipping ct, clear templates to avoid fooling
221 * later targets/matches
229 return nf_conntrack_in(skb
, state
);
232 /* Connection tracking may drop packets, but never alters them, so
233 * make it the first hook.
235 static const struct nf_hook_ops ipv4_conntrack_ops
[] = {
237 .hook
= ipv4_conntrack_in
,
239 .hooknum
= NF_INET_PRE_ROUTING
,
240 .priority
= NF_IP_PRI_CONNTRACK
,
243 .hook
= ipv4_conntrack_local
,
245 .hooknum
= NF_INET_LOCAL_OUT
,
246 .priority
= NF_IP_PRI_CONNTRACK
,
251 .hooknum
= NF_INET_POST_ROUTING
,
252 .priority
= NF_IP_PRI_CONNTRACK_CONFIRM
,
257 .hooknum
= NF_INET_LOCAL_IN
,
258 .priority
= NF_IP_PRI_CONNTRACK_CONFIRM
,
262 /* Fast function for those who don't want to parse /proc (and I don't
264 * Reversing the socket's dst/src point of view gives us the reply
268 getorigdst(struct sock
*sk
, int optval
, void __user
*user
, int *len
)
270 const struct inet_sock
*inet
= inet_sk(sk
);
271 const struct nf_conntrack_tuple_hash
*h
;
272 struct nf_conntrack_tuple tuple
;
274 memset(&tuple
, 0, sizeof(tuple
));
277 tuple
.src
.u3
.ip
= inet
->inet_rcv_saddr
;
278 tuple
.src
.u
.tcp
.port
= inet
->inet_sport
;
279 tuple
.dst
.u3
.ip
= inet
->inet_daddr
;
280 tuple
.dst
.u
.tcp
.port
= inet
->inet_dport
;
281 tuple
.src
.l3num
= PF_INET
;
282 tuple
.dst
.protonum
= sk
->sk_protocol
;
285 /* We only do TCP and SCTP at the moment: is there a better way? */
286 if (tuple
.dst
.protonum
!= IPPROTO_TCP
&&
287 tuple
.dst
.protonum
!= IPPROTO_SCTP
)
290 if ((unsigned int)*len
< sizeof(struct sockaddr_in
))
293 h
= nf_conntrack_find_get(sock_net(sk
), &nf_ct_zone_dflt
, &tuple
);
295 struct sockaddr_in sin
;
296 struct nf_conn
*ct
= nf_ct_tuplehash_to_ctrack(h
);
298 sin
.sin_family
= AF_INET
;
299 sin
.sin_port
= ct
->tuplehash
[IP_CT_DIR_ORIGINAL
]
300 .tuple
.dst
.u
.tcp
.port
;
301 sin
.sin_addr
.s_addr
= ct
->tuplehash
[IP_CT_DIR_ORIGINAL
]
303 memset(sin
.sin_zero
, 0, sizeof(sin
.sin_zero
));
306 if (copy_to_user(user
, &sin
, sizeof(sin
)) != 0)
314 static struct nf_sockopt_ops so_getorigdst
= {
316 .get_optmin
= SO_ORIGINAL_DST
,
317 .get_optmax
= SO_ORIGINAL_DST
+ 1,
319 .owner
= THIS_MODULE
,
322 #if IS_ENABLED(CONFIG_IPV6)
324 ipv6_getorigdst(struct sock
*sk
, int optval
, void __user
*user
, int *len
)
326 struct nf_conntrack_tuple tuple
= { .src
.l3num
= NFPROTO_IPV6
};
327 const struct ipv6_pinfo
*inet6
= inet6_sk(sk
);
328 const struct inet_sock
*inet
= inet_sk(sk
);
329 const struct nf_conntrack_tuple_hash
*h
;
330 struct sockaddr_in6 sin6
;
336 tuple
.src
.u3
.in6
= sk
->sk_v6_rcv_saddr
;
337 tuple
.src
.u
.tcp
.port
= inet
->inet_sport
;
338 tuple
.dst
.u3
.in6
= sk
->sk_v6_daddr
;
339 tuple
.dst
.u
.tcp
.port
= inet
->inet_dport
;
340 tuple
.dst
.protonum
= sk
->sk_protocol
;
341 bound_dev_if
= sk
->sk_bound_dev_if
;
342 flow_label
= inet6
->flow_label
;
345 if (tuple
.dst
.protonum
!= IPPROTO_TCP
&&
346 tuple
.dst
.protonum
!= IPPROTO_SCTP
)
349 if (*len
< 0 || (unsigned int)*len
< sizeof(sin6
))
352 h
= nf_conntrack_find_get(sock_net(sk
), &nf_ct_zone_dflt
, &tuple
);
356 ct
= nf_ct_tuplehash_to_ctrack(h
);
358 sin6
.sin6_family
= AF_INET6
;
359 sin6
.sin6_port
= ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
.dst
.u
.tcp
.port
;
360 sin6
.sin6_flowinfo
= flow_label
& IPV6_FLOWINFO_MASK
;
361 memcpy(&sin6
.sin6_addr
,
362 &ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
.dst
.u3
.in6
,
363 sizeof(sin6
.sin6_addr
));
366 sin6
.sin6_scope_id
= ipv6_iface_scope_id(&sin6
.sin6_addr
, bound_dev_if
);
367 return copy_to_user(user
, &sin6
, sizeof(sin6
)) ? -EFAULT
: 0;
370 static struct nf_sockopt_ops so_getorigdst6
= {
372 .get_optmin
= IP6T_SO_ORIGINAL_DST
,
373 .get_optmax
= IP6T_SO_ORIGINAL_DST
+ 1,
374 .get
= ipv6_getorigdst
,
375 .owner
= THIS_MODULE
,
378 static unsigned int ipv6_conntrack_in(void *priv
,
380 const struct nf_hook_state
*state
)
382 return nf_conntrack_in(skb
, state
);
385 static unsigned int ipv6_conntrack_local(void *priv
,
387 const struct nf_hook_state
*state
)
389 return nf_conntrack_in(skb
, state
);
392 static const struct nf_hook_ops ipv6_conntrack_ops
[] = {
394 .hook
= ipv6_conntrack_in
,
396 .hooknum
= NF_INET_PRE_ROUTING
,
397 .priority
= NF_IP6_PRI_CONNTRACK
,
400 .hook
= ipv6_conntrack_local
,
402 .hooknum
= NF_INET_LOCAL_OUT
,
403 .priority
= NF_IP6_PRI_CONNTRACK
,
408 .hooknum
= NF_INET_POST_ROUTING
,
409 .priority
= NF_IP6_PRI_LAST
,
414 .hooknum
= NF_INET_LOCAL_IN
,
415 .priority
= NF_IP6_PRI_LAST
- 1,
420 static int nf_ct_tcp_fixup(struct nf_conn
*ct
, void *_nfproto
)
422 u8 nfproto
= (unsigned long)_nfproto
;
424 if (nf_ct_l3num(ct
) != nfproto
)
427 if (nf_ct_protonum(ct
) == IPPROTO_TCP
&&
428 ct
->proto
.tcp
.state
== TCP_CONNTRACK_ESTABLISHED
) {
429 ct
->proto
.tcp
.seen
[0].td_maxwin
= 0;
430 ct
->proto
.tcp
.seen
[1].td_maxwin
= 0;
436 static struct nf_ct_bridge_info
*nf_ct_bridge_info
;
438 static int nf_ct_netns_do_get(struct net
*net
, u8 nfproto
)
440 struct nf_conntrack_net
*cnet
= nf_ct_pernet(net
);
441 bool fixup_needed
= false, retry
= true;
444 mutex_lock(&nf_ct_proto_mutex
);
449 if (cnet
->users4
> 1)
451 err
= nf_defrag_ipv4_enable(net
);
457 err
= nf_register_net_hooks(net
, ipv4_conntrack_ops
,
458 ARRAY_SIZE(ipv4_conntrack_ops
));
464 #if IS_ENABLED(CONFIG_IPV6)
467 if (cnet
->users6
> 1)
469 err
= nf_defrag_ipv6_enable(net
);
475 err
= nf_register_net_hooks(net
, ipv6_conntrack_ops
,
476 ARRAY_SIZE(ipv6_conntrack_ops
));
484 if (!nf_ct_bridge_info
) {
489 mutex_unlock(&nf_ct_proto_mutex
);
490 request_module("nf_conntrack_bridge");
494 if (!try_module_get(nf_ct_bridge_info
->me
)) {
498 cnet
->users_bridge
++;
499 if (cnet
->users_bridge
> 1)
502 err
= nf_register_net_hooks(net
, nf_ct_bridge_info
->ops
,
503 nf_ct_bridge_info
->ops_size
);
505 cnet
->users_bridge
= 0;
514 mutex_unlock(&nf_ct_proto_mutex
);
517 struct nf_ct_iter_data iter_data
= {
519 .data
= (void *)(unsigned long)nfproto
,
521 nf_ct_iterate_cleanup_net(nf_ct_tcp_fixup
, &iter_data
);
527 static void nf_ct_netns_do_put(struct net
*net
, u8 nfproto
)
529 struct nf_conntrack_net
*cnet
= nf_ct_pernet(net
);
531 mutex_lock(&nf_ct_proto_mutex
);
534 if (cnet
->users4
&& (--cnet
->users4
== 0)) {
535 nf_unregister_net_hooks(net
, ipv4_conntrack_ops
,
536 ARRAY_SIZE(ipv4_conntrack_ops
));
537 nf_defrag_ipv4_disable(net
);
540 #if IS_ENABLED(CONFIG_IPV6)
542 if (cnet
->users6
&& (--cnet
->users6
== 0)) {
543 nf_unregister_net_hooks(net
, ipv6_conntrack_ops
,
544 ARRAY_SIZE(ipv6_conntrack_ops
));
545 nf_defrag_ipv6_disable(net
);
550 if (!nf_ct_bridge_info
)
552 if (cnet
->users_bridge
&& (--cnet
->users_bridge
== 0))
553 nf_unregister_net_hooks(net
, nf_ct_bridge_info
->ops
,
554 nf_ct_bridge_info
->ops_size
);
556 module_put(nf_ct_bridge_info
->me
);
559 mutex_unlock(&nf_ct_proto_mutex
);
562 static int nf_ct_netns_inet_get(struct net
*net
)
566 err
= nf_ct_netns_do_get(net
, NFPROTO_IPV4
);
567 #if IS_ENABLED(CONFIG_IPV6)
570 err
= nf_ct_netns_do_get(net
, NFPROTO_IPV6
);
576 nf_ct_netns_put(net
, NFPROTO_IPV4
);
582 int nf_ct_netns_get(struct net
*net
, u8 nfproto
)
588 err
= nf_ct_netns_inet_get(net
);
591 err
= nf_ct_netns_do_get(net
, NFPROTO_BRIDGE
);
595 err
= nf_ct_netns_inet_get(net
);
597 nf_ct_netns_put(net
, NFPROTO_BRIDGE
);
602 err
= nf_ct_netns_do_get(net
, nfproto
);
607 EXPORT_SYMBOL_GPL(nf_ct_netns_get
);
609 void nf_ct_netns_put(struct net
*net
, uint8_t nfproto
)
613 nf_ct_netns_do_put(net
, NFPROTO_BRIDGE
);
616 nf_ct_netns_do_put(net
, NFPROTO_IPV4
);
617 nf_ct_netns_do_put(net
, NFPROTO_IPV6
);
620 nf_ct_netns_do_put(net
, nfproto
);
624 EXPORT_SYMBOL_GPL(nf_ct_netns_put
);
626 void nf_ct_bridge_register(struct nf_ct_bridge_info
*info
)
628 WARN_ON(nf_ct_bridge_info
);
629 mutex_lock(&nf_ct_proto_mutex
);
630 nf_ct_bridge_info
= info
;
631 mutex_unlock(&nf_ct_proto_mutex
);
633 EXPORT_SYMBOL_GPL(nf_ct_bridge_register
);
635 void nf_ct_bridge_unregister(struct nf_ct_bridge_info
*info
)
637 WARN_ON(!nf_ct_bridge_info
);
638 mutex_lock(&nf_ct_proto_mutex
);
639 nf_ct_bridge_info
= NULL
;
640 mutex_unlock(&nf_ct_proto_mutex
);
642 EXPORT_SYMBOL_GPL(nf_ct_bridge_unregister
);
644 int nf_conntrack_proto_init(void)
648 ret
= nf_register_sockopt(&so_getorigdst
);
652 #if IS_ENABLED(CONFIG_IPV6)
653 ret
= nf_register_sockopt(&so_getorigdst6
);
655 goto cleanup_sockopt
;
660 #if IS_ENABLED(CONFIG_IPV6)
662 nf_unregister_sockopt(&so_getorigdst
);
667 void nf_conntrack_proto_fini(void)
669 nf_unregister_sockopt(&so_getorigdst
);
670 #if IS_ENABLED(CONFIG_IPV6)
671 nf_unregister_sockopt(&so_getorigdst6
);
675 void nf_conntrack_proto_pernet_init(struct net
*net
)
677 nf_conntrack_generic_init_net(net
);
678 nf_conntrack_udp_init_net(net
);
679 nf_conntrack_tcp_init_net(net
);
680 nf_conntrack_icmp_init_net(net
);
681 #if IS_ENABLED(CONFIG_IPV6)
682 nf_conntrack_icmpv6_init_net(net
);
684 #ifdef CONFIG_NF_CT_PROTO_DCCP
685 nf_conntrack_dccp_init_net(net
);
687 #ifdef CONFIG_NF_CT_PROTO_SCTP
688 nf_conntrack_sctp_init_net(net
);
690 #ifdef CONFIG_NF_CT_PROTO_GRE
691 nf_conntrack_gre_init_net(net
);
695 module_param_call(hashsize
, nf_conntrack_set_hashsize
, param_get_uint
,
696 &nf_conntrack_htable_size
, 0600);
698 MODULE_ALIAS("ip_conntrack");
699 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET
));
700 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6
));
701 MODULE_LICENSE("GPL");
702 MODULE_DESCRIPTION("IPv4 and IPv6 connection tracking");