1 // SPDX-License-Identifier: GPL-2.0-only
2 /* (C) 1999-2001 Paul `Rusty' Russell
3 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
6 #include <linux/types.h>
7 #include <linux/export.h>
8 #include <linux/init.h>
10 #include <linux/tcp.h>
11 #include <linux/icmp.h>
12 #include <linux/icmpv6.h>
14 #include <linux/dccp.h>
15 #include <linux/sctp.h>
16 #include <net/sctp/checksum.h>
18 #include <linux/netfilter.h>
19 #include <net/netfilter/nf_nat.h>
21 #include <linux/ipv6.h>
22 #include <linux/netfilter_ipv6.h>
23 #include <net/checksum.h>
24 #include <net/ip6_checksum.h>
25 #include <net/ip6_route.h>
29 #include <net/netfilter/nf_conntrack_core.h>
30 #include <net/netfilter/nf_conntrack.h>
31 #include <linux/netfilter/nfnetlink_conntrack.h>
33 static void nf_csum_update(struct sk_buff
*skb
,
34 unsigned int iphdroff
, __sum16
*check
,
35 const struct nf_conntrack_tuple
*t
,
36 enum nf_nat_manip_type maniptype
);
39 __udp_manip_pkt(struct sk_buff
*skb
,
40 unsigned int iphdroff
, struct udphdr
*hdr
,
41 const struct nf_conntrack_tuple
*tuple
,
42 enum nf_nat_manip_type maniptype
, bool do_csum
)
44 __be16
*portptr
, newport
;
46 if (maniptype
== NF_NAT_MANIP_SRC
) {
47 /* Get rid of src port */
48 newport
= tuple
->src
.u
.udp
.port
;
49 portptr
= &hdr
->source
;
51 /* Get rid of dst port */
52 newport
= tuple
->dst
.u
.udp
.port
;
56 nf_csum_update(skb
, iphdroff
, &hdr
->check
, tuple
, maniptype
);
57 inet_proto_csum_replace2(&hdr
->check
, skb
, *portptr
, newport
,
60 hdr
->check
= CSUM_MANGLED_0
;
65 static bool udp_manip_pkt(struct sk_buff
*skb
,
66 unsigned int iphdroff
, unsigned int hdroff
,
67 const struct nf_conntrack_tuple
*tuple
,
68 enum nf_nat_manip_type maniptype
)
72 if (skb_ensure_writable(skb
, hdroff
+ sizeof(*hdr
)))
75 hdr
= (struct udphdr
*)(skb
->data
+ hdroff
);
76 __udp_manip_pkt(skb
, iphdroff
, hdr
, tuple
, maniptype
, !!hdr
->check
);
81 static bool udplite_manip_pkt(struct sk_buff
*skb
,
82 unsigned int iphdroff
, unsigned int hdroff
,
83 const struct nf_conntrack_tuple
*tuple
,
84 enum nf_nat_manip_type maniptype
)
86 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
89 if (skb_ensure_writable(skb
, hdroff
+ sizeof(*hdr
)))
92 hdr
= (struct udphdr
*)(skb
->data
+ hdroff
);
93 __udp_manip_pkt(skb
, iphdroff
, hdr
, tuple
, maniptype
, true);
99 sctp_manip_pkt(struct sk_buff
*skb
,
100 unsigned int iphdroff
, unsigned int hdroff
,
101 const struct nf_conntrack_tuple
*tuple
,
102 enum nf_nat_manip_type maniptype
)
104 #ifdef CONFIG_NF_CT_PROTO_SCTP
108 /* This could be an inner header returned in imcp packet; in such
109 * cases we cannot update the checksum field since it is outside
110 * of the 8 bytes of transport layer headers we are guaranteed.
112 if (skb
->len
>= hdroff
+ sizeof(*hdr
))
113 hdrsize
= sizeof(*hdr
);
115 if (skb_ensure_writable(skb
, hdroff
+ hdrsize
))
118 hdr
= (struct sctphdr
*)(skb
->data
+ hdroff
);
120 if (maniptype
== NF_NAT_MANIP_SRC
) {
121 /* Get rid of src port */
122 hdr
->source
= tuple
->src
.u
.sctp
.port
;
124 /* Get rid of dst port */
125 hdr
->dest
= tuple
->dst
.u
.sctp
.port
;
128 if (hdrsize
< sizeof(*hdr
))
131 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
) {
132 hdr
->checksum
= sctp_compute_cksum(skb
, hdroff
);
133 skb
->ip_summed
= CHECKSUM_NONE
;
141 tcp_manip_pkt(struct sk_buff
*skb
,
142 unsigned int iphdroff
, unsigned int hdroff
,
143 const struct nf_conntrack_tuple
*tuple
,
144 enum nf_nat_manip_type maniptype
)
147 __be16
*portptr
, newport
, oldport
;
148 int hdrsize
= 8; /* TCP connection tracking guarantees this much */
150 /* this could be a inner header returned in icmp packet; in such
151 cases we cannot update the checksum field since it is outside of
152 the 8 bytes of transport layer headers we are guaranteed */
153 if (skb
->len
>= hdroff
+ sizeof(struct tcphdr
))
154 hdrsize
= sizeof(struct tcphdr
);
156 if (skb_ensure_writable(skb
, hdroff
+ hdrsize
))
159 hdr
= (struct tcphdr
*)(skb
->data
+ hdroff
);
161 if (maniptype
== NF_NAT_MANIP_SRC
) {
162 /* Get rid of src port */
163 newport
= tuple
->src
.u
.tcp
.port
;
164 portptr
= &hdr
->source
;
166 /* Get rid of dst port */
167 newport
= tuple
->dst
.u
.tcp
.port
;
168 portptr
= &hdr
->dest
;
174 if (hdrsize
< sizeof(*hdr
))
177 nf_csum_update(skb
, iphdroff
, &hdr
->check
, tuple
, maniptype
);
178 inet_proto_csum_replace2(&hdr
->check
, skb
, oldport
, newport
, false);
183 dccp_manip_pkt(struct sk_buff
*skb
,
184 unsigned int iphdroff
, unsigned int hdroff
,
185 const struct nf_conntrack_tuple
*tuple
,
186 enum nf_nat_manip_type maniptype
)
188 #ifdef CONFIG_NF_CT_PROTO_DCCP
189 struct dccp_hdr
*hdr
;
190 __be16
*portptr
, oldport
, newport
;
191 int hdrsize
= 8; /* DCCP connection tracking guarantees this much */
193 if (skb
->len
>= hdroff
+ sizeof(struct dccp_hdr
))
194 hdrsize
= sizeof(struct dccp_hdr
);
196 if (skb_ensure_writable(skb
, hdroff
+ hdrsize
))
199 hdr
= (struct dccp_hdr
*)(skb
->data
+ hdroff
);
201 if (maniptype
== NF_NAT_MANIP_SRC
) {
202 newport
= tuple
->src
.u
.dccp
.port
;
203 portptr
= &hdr
->dccph_sport
;
205 newport
= tuple
->dst
.u
.dccp
.port
;
206 portptr
= &hdr
->dccph_dport
;
212 if (hdrsize
< sizeof(*hdr
))
215 nf_csum_update(skb
, iphdroff
, &hdr
->dccph_checksum
, tuple
, maniptype
);
216 inet_proto_csum_replace2(&hdr
->dccph_checksum
, skb
, oldport
, newport
,
223 icmp_manip_pkt(struct sk_buff
*skb
,
224 unsigned int iphdroff
, unsigned int hdroff
,
225 const struct nf_conntrack_tuple
*tuple
,
226 enum nf_nat_manip_type maniptype
)
230 if (skb_ensure_writable(skb
, hdroff
+ sizeof(*hdr
)))
233 hdr
= (struct icmphdr
*)(skb
->data
+ hdroff
);
238 case ICMP_TIMESTAMPREPLY
:
239 case ICMP_INFO_REQUEST
:
240 case ICMP_INFO_REPLY
:
242 case ICMP_ADDRESSREPLY
:
247 inet_proto_csum_replace2(&hdr
->checksum
, skb
,
248 hdr
->un
.echo
.id
, tuple
->src
.u
.icmp
.id
, false);
249 hdr
->un
.echo
.id
= tuple
->src
.u
.icmp
.id
;
254 icmpv6_manip_pkt(struct sk_buff
*skb
,
255 unsigned int iphdroff
, unsigned int hdroff
,
256 const struct nf_conntrack_tuple
*tuple
,
257 enum nf_nat_manip_type maniptype
)
259 struct icmp6hdr
*hdr
;
261 if (skb_ensure_writable(skb
, hdroff
+ sizeof(*hdr
)))
264 hdr
= (struct icmp6hdr
*)(skb
->data
+ hdroff
);
265 nf_csum_update(skb
, iphdroff
, &hdr
->icmp6_cksum
, tuple
, maniptype
);
266 if (hdr
->icmp6_type
== ICMPV6_ECHO_REQUEST
||
267 hdr
->icmp6_type
== ICMPV6_ECHO_REPLY
) {
268 inet_proto_csum_replace2(&hdr
->icmp6_cksum
, skb
,
269 hdr
->icmp6_identifier
,
270 tuple
->src
.u
.icmp
.id
, false);
271 hdr
->icmp6_identifier
= tuple
->src
.u
.icmp
.id
;
276 /* manipulate a GRE packet according to maniptype */
278 gre_manip_pkt(struct sk_buff
*skb
,
279 unsigned int iphdroff
, unsigned int hdroff
,
280 const struct nf_conntrack_tuple
*tuple
,
281 enum nf_nat_manip_type maniptype
)
283 #if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
284 const struct gre_base_hdr
*greh
;
285 struct pptp_gre_header
*pgreh
;
287 /* pgreh includes two optional 32bit fields which are not required
288 * to be there. That's where the magic '8' comes from */
289 if (skb_ensure_writable(skb
, hdroff
+ sizeof(*pgreh
) - 8))
292 greh
= (void *)skb
->data
+ hdroff
;
293 pgreh
= (struct pptp_gre_header
*)greh
;
295 /* we only have destination manip of a packet, since 'source key'
296 * is not present in the packet itself */
297 if (maniptype
!= NF_NAT_MANIP_DST
)
300 switch (greh
->flags
& GRE_VERSION
) {
302 /* We do not currently NAT any GREv0 packets.
303 * Try to behave like "nf_nat_proto_unknown" */
306 pr_debug("call_id -> 0x%04x\n", ntohs(tuple
->dst
.u
.gre
.key
));
307 pgreh
->call_id
= tuple
->dst
.u
.gre
.key
;
310 pr_debug("can't nat unknown GRE version\n");
317 static bool l4proto_manip_pkt(struct sk_buff
*skb
,
318 unsigned int iphdroff
, unsigned int hdroff
,
319 const struct nf_conntrack_tuple
*tuple
,
320 enum nf_nat_manip_type maniptype
)
322 switch (tuple
->dst
.protonum
) {
324 return tcp_manip_pkt(skb
, iphdroff
, hdroff
,
327 return udp_manip_pkt(skb
, iphdroff
, hdroff
,
329 case IPPROTO_UDPLITE
:
330 return udplite_manip_pkt(skb
, iphdroff
, hdroff
,
333 return sctp_manip_pkt(skb
, iphdroff
, hdroff
,
336 return icmp_manip_pkt(skb
, iphdroff
, hdroff
,
339 return icmpv6_manip_pkt(skb
, iphdroff
, hdroff
,
342 return dccp_manip_pkt(skb
, iphdroff
, hdroff
,
345 return gre_manip_pkt(skb
, iphdroff
, hdroff
,
349 /* If we don't know protocol -- no error, pass it unmodified. */
353 static bool nf_nat_ipv4_manip_pkt(struct sk_buff
*skb
,
354 unsigned int iphdroff
,
355 const struct nf_conntrack_tuple
*target
,
356 enum nf_nat_manip_type maniptype
)
361 if (skb_ensure_writable(skb
, iphdroff
+ sizeof(*iph
)))
364 iph
= (void *)skb
->data
+ iphdroff
;
365 hdroff
= iphdroff
+ iph
->ihl
* 4;
367 if (!l4proto_manip_pkt(skb
, iphdroff
, hdroff
, target
, maniptype
))
369 iph
= (void *)skb
->data
+ iphdroff
;
371 if (maniptype
== NF_NAT_MANIP_SRC
) {
372 csum_replace4(&iph
->check
, iph
->saddr
, target
->src
.u3
.ip
);
373 iph
->saddr
= target
->src
.u3
.ip
;
375 csum_replace4(&iph
->check
, iph
->daddr
, target
->dst
.u3
.ip
);
376 iph
->daddr
= target
->dst
.u3
.ip
;
381 static bool nf_nat_ipv6_manip_pkt(struct sk_buff
*skb
,
382 unsigned int iphdroff
,
383 const struct nf_conntrack_tuple
*target
,
384 enum nf_nat_manip_type maniptype
)
386 #if IS_ENABLED(CONFIG_IPV6)
387 struct ipv6hdr
*ipv6h
;
392 if (skb_ensure_writable(skb
, iphdroff
+ sizeof(*ipv6h
)))
395 ipv6h
= (void *)skb
->data
+ iphdroff
;
396 nexthdr
= ipv6h
->nexthdr
;
397 hdroff
= ipv6_skip_exthdr(skb
, iphdroff
+ sizeof(*ipv6h
),
398 &nexthdr
, &frag_off
);
402 if ((frag_off
& htons(~0x7)) == 0 &&
403 !l4proto_manip_pkt(skb
, iphdroff
, hdroff
, target
, maniptype
))
406 /* must reload, offset might have changed */
407 ipv6h
= (void *)skb
->data
+ iphdroff
;
410 if (maniptype
== NF_NAT_MANIP_SRC
)
411 ipv6h
->saddr
= target
->src
.u3
.in6
;
413 ipv6h
->daddr
= target
->dst
.u3
.in6
;
419 unsigned int nf_nat_manip_pkt(struct sk_buff
*skb
, struct nf_conn
*ct
,
420 enum nf_nat_manip_type mtype
,
421 enum ip_conntrack_dir dir
)
423 struct nf_conntrack_tuple target
;
425 /* We are aiming to look like inverse of other direction. */
426 nf_ct_invert_tuple(&target
, &ct
->tuplehash
[!dir
].tuple
);
428 switch (target
.src
.l3num
) {
430 if (nf_nat_ipv6_manip_pkt(skb
, 0, &target
, mtype
))
434 if (nf_nat_ipv4_manip_pkt(skb
, 0, &target
, mtype
))
445 static void nf_nat_ipv4_csum_update(struct sk_buff
*skb
,
446 unsigned int iphdroff
, __sum16
*check
,
447 const struct nf_conntrack_tuple
*t
,
448 enum nf_nat_manip_type maniptype
)
450 struct iphdr
*iph
= (struct iphdr
*)(skb
->data
+ iphdroff
);
453 if (maniptype
== NF_NAT_MANIP_SRC
) {
455 newip
= t
->src
.u3
.ip
;
458 newip
= t
->dst
.u3
.ip
;
460 inet_proto_csum_replace4(check
, skb
, oldip
, newip
, true);
463 static void nf_nat_ipv6_csum_update(struct sk_buff
*skb
,
464 unsigned int iphdroff
, __sum16
*check
,
465 const struct nf_conntrack_tuple
*t
,
466 enum nf_nat_manip_type maniptype
)
468 #if IS_ENABLED(CONFIG_IPV6)
469 const struct ipv6hdr
*ipv6h
= (struct ipv6hdr
*)(skb
->data
+ iphdroff
);
470 const struct in6_addr
*oldip
, *newip
;
472 if (maniptype
== NF_NAT_MANIP_SRC
) {
473 oldip
= &ipv6h
->saddr
;
474 newip
= &t
->src
.u3
.in6
;
476 oldip
= &ipv6h
->daddr
;
477 newip
= &t
->dst
.u3
.in6
;
479 inet_proto_csum_replace16(check
, skb
, oldip
->s6_addr32
,
480 newip
->s6_addr32
, true);
484 static void nf_csum_update(struct sk_buff
*skb
,
485 unsigned int iphdroff
, __sum16
*check
,
486 const struct nf_conntrack_tuple
*t
,
487 enum nf_nat_manip_type maniptype
)
489 switch (t
->src
.l3num
) {
491 nf_nat_ipv4_csum_update(skb
, iphdroff
, check
, t
, maniptype
);
494 nf_nat_ipv6_csum_update(skb
, iphdroff
, check
, t
, maniptype
);
499 static void nf_nat_ipv4_csum_recalc(struct sk_buff
*skb
,
500 u8 proto
, void *data
, __sum16
*check
,
501 int datalen
, int oldlen
)
503 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
) {
504 const struct iphdr
*iph
= ip_hdr(skb
);
506 skb
->ip_summed
= CHECKSUM_PARTIAL
;
507 skb
->csum_start
= skb_headroom(skb
) + skb_network_offset(skb
) +
509 skb
->csum_offset
= (void *)check
- data
;
510 *check
= ~csum_tcpudp_magic(iph
->saddr
, iph
->daddr
, datalen
,
513 inet_proto_csum_replace2(check
, skb
,
514 htons(oldlen
), htons(datalen
), true);
518 #if IS_ENABLED(CONFIG_IPV6)
519 static void nf_nat_ipv6_csum_recalc(struct sk_buff
*skb
,
520 u8 proto
, void *data
, __sum16
*check
,
521 int datalen
, int oldlen
)
523 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
) {
524 const struct ipv6hdr
*ipv6h
= ipv6_hdr(skb
);
526 skb
->ip_summed
= CHECKSUM_PARTIAL
;
527 skb
->csum_start
= skb_headroom(skb
) + skb_network_offset(skb
) +
528 (data
- (void *)skb
->data
);
529 skb
->csum_offset
= (void *)check
- data
;
530 *check
= ~csum_ipv6_magic(&ipv6h
->saddr
, &ipv6h
->daddr
,
533 inet_proto_csum_replace2(check
, skb
,
534 htons(oldlen
), htons(datalen
), true);
539 void nf_nat_csum_recalc(struct sk_buff
*skb
,
540 u8 nfproto
, u8 proto
, void *data
, __sum16
*check
,
541 int datalen
, int oldlen
)
545 nf_nat_ipv4_csum_recalc(skb
, proto
, data
, check
,
548 #if IS_ENABLED(CONFIG_IPV6)
550 nf_nat_ipv6_csum_recalc(skb
, proto
, data
, check
,
559 int nf_nat_icmp_reply_translation(struct sk_buff
*skb
,
561 enum ip_conntrack_info ctinfo
,
562 unsigned int hooknum
)
568 enum ip_conntrack_dir dir
= CTINFO2DIR(ctinfo
);
569 enum nf_nat_manip_type manip
= HOOK2MANIP(hooknum
);
570 unsigned int hdrlen
= ip_hdrlen(skb
);
571 struct nf_conntrack_tuple target
;
572 unsigned long statusbit
;
574 WARN_ON(ctinfo
!= IP_CT_RELATED
&& ctinfo
!= IP_CT_RELATED_REPLY
);
576 if (skb_ensure_writable(skb
, hdrlen
+ sizeof(*inside
)))
578 if (nf_ip_checksum(skb
, hooknum
, hdrlen
, IPPROTO_ICMP
))
581 inside
= (void *)skb
->data
+ hdrlen
;
582 if (inside
->icmp
.type
== ICMP_REDIRECT
) {
583 if ((ct
->status
& IPS_NAT_DONE_MASK
) != IPS_NAT_DONE_MASK
)
585 if (ct
->status
& IPS_NAT_MASK
)
589 if (manip
== NF_NAT_MANIP_SRC
)
590 statusbit
= IPS_SRC_NAT
;
592 statusbit
= IPS_DST_NAT
;
594 /* Invert if this is reply direction */
595 if (dir
== IP_CT_DIR_REPLY
)
596 statusbit
^= IPS_NAT_MASK
;
598 if (!(ct
->status
& statusbit
))
601 if (!nf_nat_ipv4_manip_pkt(skb
, hdrlen
+ sizeof(inside
->icmp
),
602 &ct
->tuplehash
[!dir
].tuple
, !manip
))
605 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
) {
606 /* Reloading "inside" here since manip_pkt may reallocate */
607 inside
= (void *)skb
->data
+ hdrlen
;
608 inside
->icmp
.checksum
= 0;
609 inside
->icmp
.checksum
=
610 csum_fold(skb_checksum(skb
, hdrlen
,
611 skb
->len
- hdrlen
, 0));
614 /* Change outer to look like the reply to an incoming packet */
615 nf_ct_invert_tuple(&target
, &ct
->tuplehash
[!dir
].tuple
);
616 target
.dst
.protonum
= IPPROTO_ICMP
;
617 if (!nf_nat_ipv4_manip_pkt(skb
, 0, &target
, manip
))
622 EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation
);
625 nf_nat_ipv4_fn(void *priv
, struct sk_buff
*skb
,
626 const struct nf_hook_state
*state
)
629 enum ip_conntrack_info ctinfo
;
631 ct
= nf_ct_get(skb
, &ctinfo
);
635 if (ctinfo
== IP_CT_RELATED
|| ctinfo
== IP_CT_RELATED_REPLY
) {
636 if (ip_hdr(skb
)->protocol
== IPPROTO_ICMP
) {
637 if (!nf_nat_icmp_reply_translation(skb
, ct
, ctinfo
,
645 return nf_nat_inet_fn(priv
, skb
, state
);
649 nf_nat_ipv4_pre_routing(void *priv
, struct sk_buff
*skb
,
650 const struct nf_hook_state
*state
)
653 __be32 daddr
= ip_hdr(skb
)->daddr
;
655 ret
= nf_nat_ipv4_fn(priv
, skb
, state
);
656 if (ret
== NF_ACCEPT
&& daddr
!= ip_hdr(skb
)->daddr
)
663 static int nf_xfrm_me_harder(struct net
*net
, struct sk_buff
*skb
, unsigned int family
)
665 struct sock
*sk
= skb
->sk
;
666 struct dst_entry
*dst
;
671 err
= xfrm_decode_session(net
, skb
, &fl
, family
);
677 dst
= ((struct xfrm_dst
*)dst
)->route
;
678 if (!dst_hold_safe(dst
))
679 return -EHOSTUNREACH
;
681 if (sk
&& !net_eq(net
, sock_net(sk
)))
684 dst
= xfrm_lookup(net
, dst
, &fl
, sk
, 0);
689 skb_dst_set(skb
, dst
);
691 /* Change in oif may mean change in hh_len. */
692 hh_len
= skb_dst(skb
)->dev
->hard_header_len
;
693 if (skb_headroom(skb
) < hh_len
&&
694 pskb_expand_head(skb
, hh_len
- skb_headroom(skb
), 0, GFP_ATOMIC
))
700 static bool nf_nat_inet_port_was_mangled(const struct sk_buff
*skb
, __be16 sport
)
702 enum ip_conntrack_info ctinfo
;
703 enum ip_conntrack_dir dir
;
704 const struct nf_conn
*ct
;
706 ct
= nf_ct_get(skb
, &ctinfo
);
710 switch (nf_ct_protonum(ct
)) {
718 dir
= CTINFO2DIR(ctinfo
);
719 if (dir
!= IP_CT_DIR_ORIGINAL
)
722 return ct
->tuplehash
[!dir
].tuple
.dst
.u
.all
!= sport
;
726 nf_nat_ipv4_local_in(void *priv
, struct sk_buff
*skb
,
727 const struct nf_hook_state
*state
)
729 __be32 saddr
= ip_hdr(skb
)->saddr
;
730 struct sock
*sk
= skb
->sk
;
733 ret
= nf_nat_ipv4_fn(priv
, skb
, state
);
735 if (ret
!= NF_ACCEPT
|| !sk
|| inet_sk_transparent(sk
))
738 /* skb has a socket assigned via tcp edemux. We need to check
739 * if nf_nat_ipv4_fn() has mangled the packet in a way that
740 * edemux would not have found this socket.
742 * This includes both changes to the source address and changes
743 * to the source port, which are both handled by the
744 * nf_nat_ipv4_fn() call above -- long after tcp/udp early demux
745 * might have found a socket for the old (pre-snat) address.
747 if (saddr
!= ip_hdr(skb
)->saddr
||
748 nf_nat_inet_port_was_mangled(skb
, sk
->sk_dport
))
749 skb_orphan(skb
); /* TCP edemux obtained wrong socket */
755 nf_nat_ipv4_out(void *priv
, struct sk_buff
*skb
,
756 const struct nf_hook_state
*state
)
759 const struct nf_conn
*ct
;
760 enum ip_conntrack_info ctinfo
;
765 ret
= nf_nat_ipv4_fn(priv
, skb
, state
);
767 if (ret
!= NF_ACCEPT
)
770 if (IPCB(skb
)->flags
& IPSKB_XFRM_TRANSFORMED
)
773 ct
= nf_ct_get(skb
, &ctinfo
);
775 enum ip_conntrack_dir dir
= CTINFO2DIR(ctinfo
);
777 if (ct
->tuplehash
[dir
].tuple
.src
.u3
.ip
!=
778 ct
->tuplehash
[!dir
].tuple
.dst
.u3
.ip
||
779 (ct
->tuplehash
[dir
].tuple
.dst
.protonum
!= IPPROTO_ICMP
&&
780 ct
->tuplehash
[dir
].tuple
.src
.u
.all
!=
781 ct
->tuplehash
[!dir
].tuple
.dst
.u
.all
)) {
782 err
= nf_xfrm_me_harder(state
->net
, skb
, AF_INET
);
784 ret
= NF_DROP_ERR(err
);
792 nf_nat_ipv4_local_fn(void *priv
, struct sk_buff
*skb
,
793 const struct nf_hook_state
*state
)
795 const struct nf_conn
*ct
;
796 enum ip_conntrack_info ctinfo
;
800 ret
= nf_nat_ipv4_fn(priv
, skb
, state
);
801 if (ret
!= NF_ACCEPT
)
804 ct
= nf_ct_get(skb
, &ctinfo
);
806 enum ip_conntrack_dir dir
= CTINFO2DIR(ctinfo
);
808 if (ct
->tuplehash
[dir
].tuple
.dst
.u3
.ip
!=
809 ct
->tuplehash
[!dir
].tuple
.src
.u3
.ip
) {
810 err
= ip_route_me_harder(state
->net
, state
->sk
, skb
, RTN_UNSPEC
);
812 ret
= NF_DROP_ERR(err
);
815 else if (!(IPCB(skb
)->flags
& IPSKB_XFRM_TRANSFORMED
) &&
816 ct
->tuplehash
[dir
].tuple
.dst
.protonum
!= IPPROTO_ICMP
&&
817 ct
->tuplehash
[dir
].tuple
.dst
.u
.all
!=
818 ct
->tuplehash
[!dir
].tuple
.src
.u
.all
) {
819 err
= nf_xfrm_me_harder(state
->net
, skb
, AF_INET
);
821 ret
= NF_DROP_ERR(err
);
828 static const struct nf_hook_ops nf_nat_ipv4_ops
[] = {
829 /* Before packet filtering, change destination */
831 .hook
= nf_nat_ipv4_pre_routing
,
833 .hooknum
= NF_INET_PRE_ROUTING
,
834 .priority
= NF_IP_PRI_NAT_DST
,
836 /* After packet filtering, change source */
838 .hook
= nf_nat_ipv4_out
,
840 .hooknum
= NF_INET_POST_ROUTING
,
841 .priority
= NF_IP_PRI_NAT_SRC
,
843 /* Before packet filtering, change destination */
845 .hook
= nf_nat_ipv4_local_fn
,
847 .hooknum
= NF_INET_LOCAL_OUT
,
848 .priority
= NF_IP_PRI_NAT_DST
,
850 /* After packet filtering, change source */
852 .hook
= nf_nat_ipv4_local_in
,
854 .hooknum
= NF_INET_LOCAL_IN
,
855 .priority
= NF_IP_PRI_NAT_SRC
,
859 int nf_nat_ipv4_register_fn(struct net
*net
, const struct nf_hook_ops
*ops
)
861 return nf_nat_register_fn(net
, ops
->pf
, ops
, nf_nat_ipv4_ops
,
862 ARRAY_SIZE(nf_nat_ipv4_ops
));
864 EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn
);
866 void nf_nat_ipv4_unregister_fn(struct net
*net
, const struct nf_hook_ops
*ops
)
868 nf_nat_unregister_fn(net
, ops
->pf
, ops
, ARRAY_SIZE(nf_nat_ipv4_ops
));
870 EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn
);
872 #if IS_ENABLED(CONFIG_IPV6)
873 int nf_nat_icmpv6_reply_translation(struct sk_buff
*skb
,
875 enum ip_conntrack_info ctinfo
,
876 unsigned int hooknum
,
880 struct icmp6hdr icmp6
;
883 enum ip_conntrack_dir dir
= CTINFO2DIR(ctinfo
);
884 enum nf_nat_manip_type manip
= HOOK2MANIP(hooknum
);
885 struct nf_conntrack_tuple target
;
886 unsigned long statusbit
;
888 WARN_ON(ctinfo
!= IP_CT_RELATED
&& ctinfo
!= IP_CT_RELATED_REPLY
);
890 if (skb_ensure_writable(skb
, hdrlen
+ sizeof(*inside
)))
892 if (nf_ip6_checksum(skb
, hooknum
, hdrlen
, IPPROTO_ICMPV6
))
895 inside
= (void *)skb
->data
+ hdrlen
;
896 if (inside
->icmp6
.icmp6_type
== NDISC_REDIRECT
) {
897 if ((ct
->status
& IPS_NAT_DONE_MASK
) != IPS_NAT_DONE_MASK
)
899 if (ct
->status
& IPS_NAT_MASK
)
903 if (manip
== NF_NAT_MANIP_SRC
)
904 statusbit
= IPS_SRC_NAT
;
906 statusbit
= IPS_DST_NAT
;
908 /* Invert if this is reply direction */
909 if (dir
== IP_CT_DIR_REPLY
)
910 statusbit
^= IPS_NAT_MASK
;
912 if (!(ct
->status
& statusbit
))
915 if (!nf_nat_ipv6_manip_pkt(skb
, hdrlen
+ sizeof(inside
->icmp6
),
916 &ct
->tuplehash
[!dir
].tuple
, !manip
))
919 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
) {
920 struct ipv6hdr
*ipv6h
= ipv6_hdr(skb
);
922 inside
= (void *)skb
->data
+ hdrlen
;
923 inside
->icmp6
.icmp6_cksum
= 0;
924 inside
->icmp6
.icmp6_cksum
=
925 csum_ipv6_magic(&ipv6h
->saddr
, &ipv6h
->daddr
,
926 skb
->len
- hdrlen
, IPPROTO_ICMPV6
,
927 skb_checksum(skb
, hdrlen
,
928 skb
->len
- hdrlen
, 0));
931 nf_ct_invert_tuple(&target
, &ct
->tuplehash
[!dir
].tuple
);
932 target
.dst
.protonum
= IPPROTO_ICMPV6
;
933 if (!nf_nat_ipv6_manip_pkt(skb
, 0, &target
, manip
))
938 EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation
);
941 nf_nat_ipv6_fn(void *priv
, struct sk_buff
*skb
,
942 const struct nf_hook_state
*state
)
945 enum ip_conntrack_info ctinfo
;
950 ct
= nf_ct_get(skb
, &ctinfo
);
951 /* Can't track? It's not due to stress, or conntrack would
952 * have dropped it. Hence it's the user's responsibilty to
953 * packet filter it out, or implement conntrack/NAT for that
959 if (ctinfo
== IP_CT_RELATED
|| ctinfo
== IP_CT_RELATED_REPLY
) {
960 nexthdr
= ipv6_hdr(skb
)->nexthdr
;
961 hdrlen
= ipv6_skip_exthdr(skb
, sizeof(struct ipv6hdr
),
962 &nexthdr
, &frag_off
);
964 if (hdrlen
>= 0 && nexthdr
== IPPROTO_ICMPV6
) {
965 if (!nf_nat_icmpv6_reply_translation(skb
, ct
, ctinfo
,
974 return nf_nat_inet_fn(priv
, skb
, state
);
978 nf_nat_ipv6_local_in(void *priv
, struct sk_buff
*skb
,
979 const struct nf_hook_state
*state
)
981 struct in6_addr saddr
= ipv6_hdr(skb
)->saddr
;
982 struct sock
*sk
= skb
->sk
;
985 ret
= nf_nat_ipv6_fn(priv
, skb
, state
);
987 if (ret
!= NF_ACCEPT
|| !sk
|| inet_sk_transparent(sk
))
990 /* see nf_nat_ipv4_local_in */
991 if (ipv6_addr_cmp(&saddr
, &ipv6_hdr(skb
)->saddr
) ||
992 nf_nat_inet_port_was_mangled(skb
, sk
->sk_dport
))
999 nf_nat_ipv6_in(void *priv
, struct sk_buff
*skb
,
1000 const struct nf_hook_state
*state
)
1002 unsigned int ret
, verdict
;
1003 struct in6_addr daddr
= ipv6_hdr(skb
)->daddr
;
1005 ret
= nf_nat_ipv6_fn(priv
, skb
, state
);
1006 verdict
= ret
& NF_VERDICT_MASK
;
1007 if (verdict
!= NF_DROP
&& verdict
!= NF_STOLEN
&&
1008 ipv6_addr_cmp(&daddr
, &ipv6_hdr(skb
)->daddr
))
1015 nf_nat_ipv6_out(void *priv
, struct sk_buff
*skb
,
1016 const struct nf_hook_state
*state
)
1019 const struct nf_conn
*ct
;
1020 enum ip_conntrack_info ctinfo
;
1025 ret
= nf_nat_ipv6_fn(priv
, skb
, state
);
1027 if (ret
!= NF_ACCEPT
)
1030 if (IP6CB(skb
)->flags
& IP6SKB_XFRM_TRANSFORMED
)
1032 ct
= nf_ct_get(skb
, &ctinfo
);
1034 enum ip_conntrack_dir dir
= CTINFO2DIR(ctinfo
);
1036 if (!nf_inet_addr_cmp(&ct
->tuplehash
[dir
].tuple
.src
.u3
,
1037 &ct
->tuplehash
[!dir
].tuple
.dst
.u3
) ||
1038 (ct
->tuplehash
[dir
].tuple
.dst
.protonum
!= IPPROTO_ICMPV6
&&
1039 ct
->tuplehash
[dir
].tuple
.src
.u
.all
!=
1040 ct
->tuplehash
[!dir
].tuple
.dst
.u
.all
)) {
1041 err
= nf_xfrm_me_harder(state
->net
, skb
, AF_INET6
);
1043 ret
= NF_DROP_ERR(err
);
1052 nf_nat_ipv6_local_fn(void *priv
, struct sk_buff
*skb
,
1053 const struct nf_hook_state
*state
)
1055 const struct nf_conn
*ct
;
1056 enum ip_conntrack_info ctinfo
;
1060 ret
= nf_nat_ipv6_fn(priv
, skb
, state
);
1061 if (ret
!= NF_ACCEPT
)
1064 ct
= nf_ct_get(skb
, &ctinfo
);
1066 enum ip_conntrack_dir dir
= CTINFO2DIR(ctinfo
);
1068 if (!nf_inet_addr_cmp(&ct
->tuplehash
[dir
].tuple
.dst
.u3
,
1069 &ct
->tuplehash
[!dir
].tuple
.src
.u3
)) {
1070 err
= nf_ip6_route_me_harder(state
->net
, state
->sk
, skb
);
1072 ret
= NF_DROP_ERR(err
);
1075 else if (!(IP6CB(skb
)->flags
& IP6SKB_XFRM_TRANSFORMED
) &&
1076 ct
->tuplehash
[dir
].tuple
.dst
.protonum
!= IPPROTO_ICMPV6
&&
1077 ct
->tuplehash
[dir
].tuple
.dst
.u
.all
!=
1078 ct
->tuplehash
[!dir
].tuple
.src
.u
.all
) {
1079 err
= nf_xfrm_me_harder(state
->net
, skb
, AF_INET6
);
1081 ret
= NF_DROP_ERR(err
);
1089 static const struct nf_hook_ops nf_nat_ipv6_ops
[] = {
1090 /* Before packet filtering, change destination */
1092 .hook
= nf_nat_ipv6_in
,
1094 .hooknum
= NF_INET_PRE_ROUTING
,
1095 .priority
= NF_IP6_PRI_NAT_DST
,
1097 /* After packet filtering, change source */
1099 .hook
= nf_nat_ipv6_out
,
1101 .hooknum
= NF_INET_POST_ROUTING
,
1102 .priority
= NF_IP6_PRI_NAT_SRC
,
1104 /* Before packet filtering, change destination */
1106 .hook
= nf_nat_ipv6_local_fn
,
1108 .hooknum
= NF_INET_LOCAL_OUT
,
1109 .priority
= NF_IP6_PRI_NAT_DST
,
1111 /* After packet filtering, change source */
1113 .hook
= nf_nat_ipv6_local_in
,
1115 .hooknum
= NF_INET_LOCAL_IN
,
1116 .priority
= NF_IP6_PRI_NAT_SRC
,
1120 int nf_nat_ipv6_register_fn(struct net
*net
, const struct nf_hook_ops
*ops
)
1122 return nf_nat_register_fn(net
, ops
->pf
, ops
, nf_nat_ipv6_ops
,
1123 ARRAY_SIZE(nf_nat_ipv6_ops
));
1125 EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn
);
1127 void nf_nat_ipv6_unregister_fn(struct net
*net
, const struct nf_hook_ops
*ops
)
1129 nf_nat_unregister_fn(net
, ops
->pf
, ops
, ARRAY_SIZE(nf_nat_ipv6_ops
));
1131 EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn
);
1132 #endif /* CONFIG_IPV6 */
1134 #if defined(CONFIG_NF_TABLES_INET) && IS_ENABLED(CONFIG_NFT_NAT)
1135 int nf_nat_inet_register_fn(struct net
*net
, const struct nf_hook_ops
*ops
)
1139 if (WARN_ON_ONCE(ops
->pf
!= NFPROTO_INET
))
1142 ret
= nf_nat_register_fn(net
, NFPROTO_IPV6
, ops
, nf_nat_ipv6_ops
,
1143 ARRAY_SIZE(nf_nat_ipv6_ops
));
1147 ret
= nf_nat_register_fn(net
, NFPROTO_IPV4
, ops
, nf_nat_ipv4_ops
,
1148 ARRAY_SIZE(nf_nat_ipv4_ops
));
1150 nf_nat_unregister_fn(net
, NFPROTO_IPV6
, ops
,
1151 ARRAY_SIZE(nf_nat_ipv6_ops
));
1154 EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn
);
1156 void nf_nat_inet_unregister_fn(struct net
*net
, const struct nf_hook_ops
*ops
)
1158 nf_nat_unregister_fn(net
, NFPROTO_IPV4
, ops
, ARRAY_SIZE(nf_nat_ipv4_ops
));
1159 nf_nat_unregister_fn(net
, NFPROTO_IPV6
, ops
, ARRAY_SIZE(nf_nat_ipv6_ops
));
1161 EXPORT_SYMBOL_GPL(nf_nat_inet_unregister_fn
);
1162 #endif /* NFT INET NAT */