4 * Copyright (C) 1995-2003 by Darren Reed.
6 * See the IPFILTER.LICENCE file for details on licencing.
8 * Copyright 2008 Sun Microsystems, Inc.
10 #if defined(KERNEL) || defined(_KERNEL)
16 #include <sys/errno.h>
17 #include <sys/types.h>
18 #include <sys/param.h>
21 #if defined(_KERNEL) && defined(__NetBSD_Version__) && \
22 (__NetBSD_Version__ >= 399002000)
23 # include <sys/kauth.h>
25 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
27 #if defined(__NetBSD_Version__) && (__NetBSD_Version__ < 399001400)
28 # include "opt_ipfilter_log.h"
30 # include "opt_ipfilter.h"
44 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
45 # include <sys/filio.h>
46 # include <sys/fcntl.h>
48 # include <sys/ioctl.h>
51 # include <sys/fcntl.h>
54 # include <sys/protosw.h>
56 #include <sys/socket.h>
58 # include <sys/systm.h>
59 # if !defined(__SVR4) && !defined(__svr4__)
60 # include <sys/mbuf.h>
63 #if defined(__SVR4) || defined(__svr4__)
64 # include <sys/filio.h>
65 # include <sys/byteorder.h>
67 # include <sys/dditypes.h>
69 # include <sys/stream.h>
70 # include <sys/kmem.h>
72 #if __FreeBSD_version >= 300000
73 # include <sys/queue.h>
76 #if __FreeBSD_version >= 300000
77 # include <net/if_var.h>
78 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
79 # include "opt_ipfilter.h"
85 #include <netinet/in.h>
86 #include <netinet/in_systm.h>
87 #include <netinet/ip.h>
91 # include <vpn/ipsec.h>
92 extern struct ifnet vpnif
;
96 # include <netinet/ip_var.h>
98 #include <netinet/tcp.h>
99 #include <netinet/udp.h>
100 #include <netinet/ip_icmp.h>
101 #include "netinet/ip_compat.h"
102 #include <netinet/tcpip.h>
103 #include "netinet/ip_fil.h"
104 #include "netinet/ip_nat.h"
105 #include "netinet/ip_frag.h"
106 #include "netinet/ip_state.h"
107 #include "netinet/ip_proxy.h"
109 #include "netinet/ip_sync.h"
111 #if (__FreeBSD_version >= 300000)
112 # include <sys/malloc.h>
114 /* END OF INCLUDES */
117 #define SOCKADDR_IN struct sockaddr_in
120 #if defined(__NetBSD__)
121 #include <sys/cdefs.h>
122 __KERNEL_RCSID(0, "$NetBSD$");
124 static const char sccsid
[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed";
125 static const char rcsid
[] = "@(#)Id: ip_nat.c,v 2.195.2.127 2009/07/21 09:40:55 darrenr Exp";
130 /* ======================================================================== */
131 /* How the NAT is organised and works. */
133 /* Inside (interface y) NAT Outside (interface x) */
134 /* -------------------- -+- ------------------------------------- */
135 /* Packet going | out, processsed by fr_checknatout() for x */
136 /* ------------> | ------------> */
137 /* src=10.1.1.1 | src=192.1.1.1 */
139 /* | in, processed by fr_checknatin() for x */
140 /* <------------ | <------------ */
141 /* dst=10.1.1.1 | dst=192.1.1.1 */
142 /* -------------------- -+- ------------------------------------- */
143 /* fr_checknatout() - changes ip_src and if required, sport */
144 /* - creates a new mapping, if required. */
145 /* fr_checknatin() - changes ip_dst and if required, dport */
147 /* In the NAT table, internal source is recorded as "in" and externally */
149 /* ======================================================================== */
152 nat_t
**nat_table
[2] = { NULL
, NULL
},
153 *nat_instances
= NULL
;
154 ipnat_t
*nat_list
= NULL
;
155 u_int ipf_nattable_max
= NAT_TABLE_MAX
;
156 u_int ipf_nattable_sz
= NAT_TABLE_SZ
;
157 u_int ipf_natrules_sz
= NAT_SIZE
;
158 u_int ipf_rdrrules_sz
= RDR_SIZE
;
159 u_int ipf_hostmap_sz
= HOSTMAP_SIZE
;
160 u_int fr_nat_maxbucket
= 0,
161 fr_nat_maxbucket_reset
= 1;
162 u_32_t nat_masks
= 0;
163 u_32_t rdr_masks
= 0;
164 u_long nat_last_force_flush
= 0;
165 ipnat_t
**nat_rules
= NULL
;
166 ipnat_t
**rdr_rules
= NULL
;
167 hostmap_t
**ipf_hm_maptable
= NULL
;
168 hostmap_t
*ipf_hm_maplist
= NULL
;
169 ipftq_t nat_tqb
[IPF_TCP_NSTATES
];
173 ipftq_t
*nat_utqe
= NULL
;
174 int fr_nat_doflush
= 0;
181 u_long fr_defnatage
= DEF_NAT_AGE
,
182 fr_defnatipage
= 120, /* 60 seconds */
183 fr_defnaticmpage
= 6; /* 3 seconds */
187 #if SOLARIS && !defined(_INET_IP_STACK_H)
188 extern int pfil_delayed_copy
;
191 static int nat_flush_entry
__P((void *));
192 static int nat_flushtable
__P((void));
193 static int nat_clearlist
__P((void));
194 static void nat_addnat
__P((struct ipnat
*));
195 static void nat_addrdr
__P((struct ipnat
*));
196 static void nat_delrdr
__P((struct ipnat
*));
197 static void nat_delnat
__P((struct ipnat
*));
198 static int fr_natgetent
__P((void *, int));
199 static int fr_natgetsz
__P((void *, int));
200 static int fr_natputent
__P((void *, int));
201 static int nat_extraflush
__P((int));
202 static int nat_gettable
__P((char *));
203 static void nat_tabmove
__P((nat_t
*));
204 static int nat_match
__P((fr_info_t
*, ipnat_t
*));
205 static INLINE
int nat_newmap
__P((fr_info_t
*, nat_t
*, natinfo_t
*));
206 static INLINE
int nat_newrdr
__P((fr_info_t
*, nat_t
*, natinfo_t
*));
207 static hostmap_t
*nat_hostmap
__P((ipnat_t
*, struct in_addr
,
208 struct in_addr
, struct in_addr
, u_32_t
));
209 static int nat_icmpquerytype4
__P((int));
210 static int nat_siocaddnat
__P((ipnat_t
*, ipnat_t
**, int));
211 static void nat_siocdelnat
__P((ipnat_t
*, ipnat_t
**, int));
212 static int nat_finalise
__P((fr_info_t
*, nat_t
*, natinfo_t
*,
213 tcphdr_t
*, nat_t
**, int));
214 static int nat_resolverule
__P((ipnat_t
*));
215 static nat_t
*fr_natclone
__P((fr_info_t
*, nat_t
*));
216 static void nat_mssclamp
__P((tcphdr_t
*, u_32_t
, fr_info_t
*, u_short
*));
217 static int nat_wildok
__P((nat_t
*, int, int, int, int));
218 static int nat_getnext
__P((ipftoken_t
*, ipfgeniter_t
*));
219 static int nat_iterator
__P((ipftoken_t
*, ipfgeniter_t
*));
222 /* ------------------------------------------------------------------------ */
223 /* Function: fr_natinit */
224 /* Returns: int - 0 == success, -1 == failure */
225 /* Parameters: Nil */
227 /* Initialise all of the NAT locks, tables and other structures. */
228 /* ------------------------------------------------------------------------ */
233 KMALLOCS(nat_table
[0], nat_t
**, sizeof(nat_t
*) * ipf_nattable_sz
);
234 if (nat_table
[0] != NULL
)
235 bzero((char *)nat_table
[0], ipf_nattable_sz
* sizeof(nat_t
*));
239 KMALLOCS(nat_table
[1], nat_t
**, sizeof(nat_t
*) * ipf_nattable_sz
);
240 if (nat_table
[1] != NULL
)
241 bzero((char *)nat_table
[1], ipf_nattable_sz
* sizeof(nat_t
*));
245 KMALLOCS(nat_rules
, ipnat_t
**, sizeof(ipnat_t
*) * ipf_natrules_sz
);
246 if (nat_rules
!= NULL
)
247 bzero((char *)nat_rules
, ipf_natrules_sz
* sizeof(ipnat_t
*));
251 KMALLOCS(rdr_rules
, ipnat_t
**, sizeof(ipnat_t
*) * ipf_rdrrules_sz
);
252 if (rdr_rules
!= NULL
)
253 bzero((char *)rdr_rules
, ipf_rdrrules_sz
* sizeof(ipnat_t
*));
257 KMALLOCS(ipf_hm_maptable
, hostmap_t
**, \
258 sizeof(hostmap_t
*) * ipf_hostmap_sz
);
259 if (ipf_hm_maptable
!= NULL
)
260 bzero((char *)ipf_hm_maptable
,
261 sizeof(hostmap_t
*) * ipf_hostmap_sz
);
264 ipf_hm_maplist
= NULL
;
266 KMALLOCS(nat_stats
.ns_bucketlen
[0], u_long
*,
267 ipf_nattable_sz
* sizeof(u_long
));
268 if (nat_stats
.ns_bucketlen
[0] == NULL
)
270 bzero((char *)nat_stats
.ns_bucketlen
[0],
271 ipf_nattable_sz
* sizeof(u_long
));
273 KMALLOCS(nat_stats
.ns_bucketlen
[1], u_long
*,
274 ipf_nattable_sz
* sizeof(u_long
));
275 if (nat_stats
.ns_bucketlen
[1] == NULL
)
278 bzero((char *)nat_stats
.ns_bucketlen
[1],
279 ipf_nattable_sz
* sizeof(u_long
));
281 if (fr_nat_maxbucket
== 0) {
282 for (i
= ipf_nattable_sz
; i
> 0; i
>>= 1)
284 fr_nat_maxbucket
*= 2;
287 fr_sttab_init(nat_tqb
);
289 * Increase this because we may have "keep state" following this too
290 * and packet storms can occur if this is removed too quickly.
292 nat_tqb
[IPF_TCPS_CLOSED
].ifq_ttl
= fr_tcplastack
;
293 nat_tqb
[IPF_TCP_NSTATES
- 1].ifq_next
= &nat_udptq
;
294 nat_udptq
.ifq_ttl
= fr_defnatage
;
295 nat_udptq
.ifq_ref
= 1;
296 nat_udptq
.ifq_head
= NULL
;
297 nat_udptq
.ifq_tail
= &nat_udptq
.ifq_head
;
298 MUTEX_INIT(&nat_udptq
.ifq_lock
, "nat ipftq udp tab");
299 nat_udptq
.ifq_next
= &nat_icmptq
;
300 nat_icmptq
.ifq_ttl
= fr_defnaticmpage
;
301 nat_icmptq
.ifq_ref
= 1;
302 nat_icmptq
.ifq_head
= NULL
;
303 nat_icmptq
.ifq_tail
= &nat_icmptq
.ifq_head
;
304 MUTEX_INIT(&nat_icmptq
.ifq_lock
, "nat icmp ipftq tab");
305 nat_icmptq
.ifq_next
= &nat_iptq
;
306 nat_iptq
.ifq_ttl
= fr_defnatipage
;
307 nat_iptq
.ifq_ref
= 1;
308 nat_iptq
.ifq_head
= NULL
;
309 nat_iptq
.ifq_tail
= &nat_iptq
.ifq_head
;
310 MUTEX_INIT(&nat_iptq
.ifq_lock
, "nat ip ipftq tab");
311 nat_iptq
.ifq_next
= NULL
;
313 for (i
= 0; i
< IPF_TCP_NSTATES
; i
++) {
314 if (nat_tqb
[i
].ifq_ttl
< fr_defnaticmpage
)
315 nat_tqb
[i
].ifq_ttl
= fr_defnaticmpage
;
317 else if (nat_tqb
[i
].ifq_ttl
> fr_defnatage
)
318 nat_tqb
[i
].ifq_ttl
= fr_defnatage
;
323 * Increase this because we may have "keep state" following
324 * this too and packet storms can occur if this is removed
327 nat_tqb
[IPF_TCPS_CLOSED
].ifq_ttl
= nat_tqb
[IPF_TCPS_LAST_ACK
].ifq_ttl
;
329 RWLOCK_INIT(&ipf_nat
, "ipf IP NAT rwlock");
330 RWLOCK_INIT(&ipf_natfrag
, "ipf IP NAT-Frag rwlock");
331 MUTEX_INIT(&ipf_nat_new
, "ipf nat new mutex");
332 MUTEX_INIT(&ipf_natio
, "ipf nat io mutex");
340 /* ------------------------------------------------------------------------ */
341 /* Function: nat_addrdr */
343 /* Parameters: n(I) - pointer to NAT rule to add */
345 /* Adds a redirect rule to the hash table of redirect rules and the list of */
346 /* loaded NAT rules. Updates the bitmask indicating which netmasks are in */
347 /* use by redirect rules. */
348 /* ------------------------------------------------------------------------ */
349 static void nat_addrdr(n
)
357 k
= count4bits(n
->in_outmsk
);
358 if ((k
>= 0) && (k
!= 32))
360 j
= (n
->in_outip
& n
->in_outmsk
);
361 hv
= NAT_HASH_FN(j
, 0, ipf_rdrrules_sz
);
364 np
= &(*np
)->in_rnext
;
372 /* ------------------------------------------------------------------------ */
373 /* Function: nat_addnat */
375 /* Parameters: n(I) - pointer to NAT rule to add */
377 /* Adds a NAT map rule to the hash table of rules and the list of loaded */
378 /* NAT rules. Updates the bitmask indicating which netmasks are in use by */
379 /* redirect rules. */
380 /* ------------------------------------------------------------------------ */
381 static void nat_addnat(n
)
389 k
= count4bits(n
->in_inmsk
);
390 if ((k
>= 0) && (k
!= 32))
392 j
= (n
->in_inip
& n
->in_inmsk
);
393 hv
= NAT_HASH_FN(j
, 0, ipf_natrules_sz
);
396 np
= &(*np
)->in_mnext
;
404 /* ------------------------------------------------------------------------ */
405 /* Function: nat_delrdr */
407 /* Parameters: n(I) - pointer to NAT rule to delete */
409 /* Removes a redirect rule from the hash table of redirect rules. */
410 /* ------------------------------------------------------------------------ */
411 static void nat_delrdr(n
)
415 n
->in_rnext
->in_prnext
= n
->in_prnext
;
416 *n
->in_prnext
= n
->in_rnext
;
420 /* ------------------------------------------------------------------------ */
421 /* Function: nat_delnat */
423 /* Parameters: n(I) - pointer to NAT rule to delete */
425 /* Removes a NAT map rule from the hash table of NAT map rules. */
426 /* ------------------------------------------------------------------------ */
427 static void nat_delnat(n
)
430 if (n
->in_mnext
!= NULL
)
431 n
->in_mnext
->in_pmnext
= n
->in_pmnext
;
432 *n
->in_pmnext
= n
->in_mnext
;
436 /* ------------------------------------------------------------------------ */
437 /* Function: nat_hostmap */
438 /* Returns: struct hostmap* - NULL if no hostmap could be created, */
439 /* else a pointer to the hostmapping to use */
440 /* Parameters: np(I) - pointer to NAT rule */
441 /* real(I) - real IP address */
442 /* map(I) - mapped IP address */
443 /* port(I) - destination port number */
444 /* Write Locks: ipf_nat */
446 /* Check if an ip address has already been allocated for a given mapping */
447 /* that is not doing port based translation. If is not yet allocated, then */
448 /* create a new entry if a non-NULL NAT rule pointer has been supplied. */
449 /* ------------------------------------------------------------------------ */
450 static struct hostmap
*nat_hostmap(np
, src
, dst
, map
, port
)
460 hv
= (src
.s_addr
^ dst
.s_addr
);
464 for (hm
= ipf_hm_maptable
[hv
]; hm
; hm
= hm
->hm_hnext
)
465 if ((hm
->hm_srcip
.s_addr
== src
.s_addr
) &&
466 (hm
->hm_dstip
.s_addr
== dst
.s_addr
) &&
467 ((np
== NULL
) || (np
== hm
->hm_ipnat
)) &&
468 ((port
== 0) || (port
== hm
->hm_port
))) {
476 KMALLOC(hm
, hostmap_t
*);
478 hm
->hm_next
= ipf_hm_maplist
;
479 hm
->hm_pnext
= &ipf_hm_maplist
;
480 if (ipf_hm_maplist
!= NULL
)
481 ipf_hm_maplist
->hm_pnext
= &hm
->hm_next
;
483 hm
->hm_hnext
= ipf_hm_maptable
[hv
];
484 hm
->hm_phnext
= ipf_hm_maptable
+ hv
;
485 if (ipf_hm_maptable
[hv
] != NULL
)
486 ipf_hm_maptable
[hv
]->hm_phnext
= &hm
->hm_hnext
;
487 ipf_hm_maptable
[hv
] = hm
;
499 /* ------------------------------------------------------------------------ */
500 /* Function: fr_hostmapdel */
502 /* Parameters: hmp(I) - pointer to hostmap structure pointer */
503 /* Write Locks: ipf_nat */
505 /* Decrement the references to this hostmap structure by one. If this */
506 /* reaches zero then remove it and free it. */
507 /* ------------------------------------------------------------------------ */
508 void fr_hostmapdel(hmp
)
509 struct hostmap
**hmp
;
517 if (hm
->hm_ref
== 0) {
519 hm
->hm_hnext
->hm_phnext
= hm
->hm_phnext
;
520 *hm
->hm_phnext
= hm
->hm_hnext
;
522 hm
->hm_next
->hm_pnext
= hm
->hm_pnext
;
523 *hm
->hm_pnext
= hm
->hm_next
;
529 /* ------------------------------------------------------------------------ */
530 /* Function: fix_outcksum */
532 /* Parameters: fin(I) - pointer to packet information */
533 /* sp(I) - location of 16bit checksum to update */
534 /* n((I) - amount to adjust checksum by */
536 /* Adjusts the 16bit checksum by "n" for packets going out. */
537 /* ------------------------------------------------------------------------ */
538 void fix_outcksum(fin
, sp
, n
)
549 if (n
& NAT_HW_CKSUM
) {
552 n
= (n
& 0xffff) + (n
>> 16);
556 sum1
= (~ntohs(*sp
)) & 0xffff;
558 sum1
= (sum1
>> 16) + (sum1
& 0xffff);
560 sum1
= (sum1
>> 16) + (sum1
& 0xffff);
561 sumshort
= ~(u_short
)sum1
;
562 *(sp
) = htons(sumshort
);
566 /* ------------------------------------------------------------------------ */
567 /* Function: fix_incksum */
569 /* Parameters: fin(I) - pointer to packet information */
570 /* sp(I) - location of 16bit checksum to update */
571 /* n((I) - amount to adjust checksum by */
573 /* Adjusts the 16bit checksum by "n" for packets going in. */
574 /* ------------------------------------------------------------------------ */
575 void fix_incksum(fin
, sp
, n
)
586 if (n
& NAT_HW_CKSUM
) {
589 n
= (n
& 0xffff) + (n
>> 16);
593 sum1
= (~ntohs(*sp
)) & 0xffff;
594 sum1
+= ~(n
) & 0xffff;
595 sum1
= (sum1
>> 16) + (sum1
& 0xffff);
597 sum1
= (sum1
>> 16) + (sum1
& 0xffff);
598 sumshort
= ~(u_short
)sum1
;
599 *(sp
) = htons(sumshort
);
603 /* ------------------------------------------------------------------------ */
604 /* Function: fix_datacksum */
606 /* Parameters: sp(I) - location of 16bit checksum to update */
607 /* n((I) - amount to adjust checksum by */
609 /* Fix_datacksum is used *only* for the adjustments of checksums in the */
610 /* data section of an IP packet. */
612 /* The only situation in which you need to do this is when NAT'ing an */
613 /* ICMP error message. Such a message, contains in its body the IP header */
614 /* of the original IP packet, that causes the error. */
616 /* You can't use fix_incksum or fix_outcksum in that case, because for the */
617 /* kernel the data section of the ICMP error is just data, and no special */
618 /* processing like hardware cksum or ntohs processing have been done by the */
619 /* kernel on the data section. */
620 /* ------------------------------------------------------------------------ */
621 void fix_datacksum(sp
, n
)
631 sum1
= (~ntohs(*sp
)) & 0xffff;
633 sum1
= (sum1
>> 16) + (sum1
& 0xffff);
635 sum1
= (sum1
>> 16) + (sum1
& 0xffff);
636 sumshort
= ~(u_short
)sum1
;
637 *(sp
) = htons(sumshort
);
641 /* ------------------------------------------------------------------------ */
642 /* Function: fr_nat_ioctl */
643 /* Returns: int - 0 == success, != 0 == failure */
644 /* Parameters: data(I) - pointer to ioctl data */
645 /* cmd(I) - ioctl command integer */
646 /* mode(I) - file mode bits used with open */
648 /* Processes an ioctl call made to operate on the IP Filter NAT device. */
649 /* ------------------------------------------------------------------------ */
650 int fr_nat_ioctl(data
, cmd
, mode
, uid
, ctx
)
656 ipnat_t
*nat
, *nt
, *n
= NULL
, **np
= NULL
;
657 int error
= 0, ret
, arg
, getlock
;
661 #if defined(BSD) && (BSD >= 199306) && defined(_KERNEL)
662 # if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 399002000)
663 if ((mode
& FWRITE
) &&
664 kauth_authorize_network(curlwp
->l_cred
, KAUTH_NETWORK_FIREWALL
,
665 KAUTH_REQ_NETWORK_FIREWALL_FW
,
670 if ((securelevel
>= 2) && (mode
& FWRITE
)) {
676 #if defined(__osf__) && defined(_KERNEL)
679 getlock
= (mode
& NAT_LOCKHELD
) ? 0 : 1;
682 nat
= NULL
; /* XXX gcc -Wuninitialized */
683 if (cmd
== (ioctlcmd_t
)SIOCADNAT
) {
684 KMALLOC(nt
, ipnat_t
*);
689 if ((cmd
== (ioctlcmd_t
)SIOCADNAT
) || (cmd
== (ioctlcmd_t
)SIOCRMNAT
)) {
690 if (mode
& NAT_SYSSPACE
) {
691 bcopy(data
, (char *)&natd
, sizeof(natd
));
694 error
= fr_inobj(data
, &natd
, IPFOBJ_IPNAT
);
702 * For add/delete, look to see if the NAT entry is already present
704 if ((cmd
== (ioctlcmd_t
)SIOCADNAT
) || (cmd
== (ioctlcmd_t
)SIOCRMNAT
)) {
706 if (nat
->in_v
== 0) /* For backward compat. */
708 nat
->in_flags
&= IPN_USERFLAGS
;
709 if ((nat
->in_redir
& NAT_MAPBLK
) == 0) {
710 if ((nat
->in_flags
& IPN_SPLIT
) == 0)
711 nat
->in_inip
&= nat
->in_inmsk
;
712 if ((nat
->in_flags
& IPN_IPRANGE
) == 0)
713 nat
->in_outip
&= nat
->in_outmsk
;
715 MUTEX_ENTER(&ipf_natio
);
716 for (np
= &nat_list
; ((n
= *np
) != NULL
); np
= &n
->in_next
)
717 if (bcmp((char *)&nat
->in_flags
, (char *)&n
->in_flags
,
719 if (nat
->in_redir
== NAT_REDIRECT
&&
720 nat
->in_pnext
!= n
->in_pnext
)
733 if (!(mode
& FWRITE
))
736 tmp
= ipflog_clear(IPL_LOGNAT
);
737 error
= BCOPYOUT((char *)&tmp
, (char *)data
,
746 if (!(mode
& FWRITE
))
749 error
= BCOPYIN((char *)data
, (char *)&nat_logging
,
750 sizeof(nat_logging
));
757 error
= BCOPYOUT((char *)&nat_logging
, (char *)data
,
758 sizeof(nat_logging
));
764 arg
= iplused
[IPL_LOGNAT
];
765 error
= BCOPYOUT(&arg
, data
, sizeof(arg
));
771 if (!(mode
& FWRITE
)) {
773 } else if (n
!= NULL
) {
775 } else if (nt
== NULL
) {
779 MUTEX_EXIT(&ipf_natio
);
782 bcopy((char *)nat
, (char *)nt
, sizeof(*n
));
783 error
= nat_siocaddnat(nt
, np
, getlock
);
784 MUTEX_EXIT(&ipf_natio
);
790 if (!(mode
& FWRITE
)) {
793 } else if (n
== NULL
) {
798 MUTEX_EXIT(&ipf_natio
);
801 nat_siocdelnat(n
, np
, getlock
);
803 MUTEX_EXIT(&ipf_natio
);
808 nat_stats
.ns_table
[0] = nat_table
[0];
809 nat_stats
.ns_table
[1] = nat_table
[1];
810 nat_stats
.ns_list
= nat_list
;
811 nat_stats
.ns_maptable
= ipf_hm_maptable
;
812 nat_stats
.ns_maplist
= ipf_hm_maplist
;
813 nat_stats
.ns_nattab_sz
= ipf_nattable_sz
;
814 nat_stats
.ns_nattab_max
= ipf_nattable_max
;
815 nat_stats
.ns_rultab_sz
= ipf_natrules_sz
;
816 nat_stats
.ns_rdrtab_sz
= ipf_rdrrules_sz
;
817 nat_stats
.ns_hostmap_sz
= ipf_hostmap_sz
;
818 nat_stats
.ns_instances
= nat_instances
;
819 nat_stats
.ns_apslist
= ap_sess_list
;
820 nat_stats
.ns_ticks
= fr_ticks
;
821 error
= fr_outobj(data
, &nat_stats
, IPFOBJ_NATSTAT
);
828 error
= fr_inobj(data
, &nl
, IPFOBJ_NATLOOKUP
);
833 READ_ENTER(&ipf_nat
);
835 ptr
= nat_lookupredir(&nl
);
837 RWLOCK_EXIT(&ipf_nat
);
840 error
= fr_outobj(data
, &nl
, IPFOBJ_NATLOOKUP
);
848 case SIOCIPFFL
: /* old SIOCFLNAT & SIOCCNATL */
849 if (!(mode
& FWRITE
)) {
854 WRITE_ENTER(&ipf_nat
);
857 error
= BCOPYIN(data
, &arg
, sizeof(arg
));
862 ret
= nat_flushtable();
864 ret
= nat_clearlist();
866 ret
= nat_extraflush(arg
);
870 RWLOCK_EXIT(&ipf_nat
);
873 error
= BCOPYOUT(&ret
, data
, sizeof(ret
));
878 error
= appr_ioctl(data
, cmd
, mode
, ctx
);
882 if (!(mode
& FWRITE
)) {
885 error
= fr_lock(data
, &fr_nat_lock
);
890 if ((mode
& FWRITE
) != 0) {
891 error
= fr_natputent(data
, getlock
);
899 error
= fr_natgetsz(data
, getlock
);
906 error
= fr_natgetent(data
, getlock
);
917 error
= fr_inobj(data
, &iter
, IPFOBJ_GENITER
);
919 token
= ipf_findtoken(iter
.igi_type
, uid
, ctx
);
921 error
= nat_iterator(token
, &iter
);
922 WRITE_ENTER(&ipf_tokens
);
923 if (token
->ipt_data
== NULL
)
924 ipf_freetoken(token
);
926 ipf_dereftoken(token
);
927 RWLOCK_EXIT(&ipf_tokens
);
935 error
= BCOPYIN((void *)data
, (void *)&arg
, sizeof(arg
));
938 error
= ipf_deltoken(arg
, uid
, ctx
);
946 error
= fr_outobj(data
, nat_tqb
, IPFOBJ_STATETQTAB
);
950 error
= nat_gettable(data
);
964 /* ------------------------------------------------------------------------ */
965 /* Function: nat_siocaddnat */
966 /* Returns: int - 0 == success, != 0 == failure */
967 /* Parameters: n(I) - pointer to new NAT rule */
968 /* np(I) - pointer to where to insert new NAT rule */
969 /* getlock(I) - flag indicating if lock on ipf_nat is held */
970 /* Mutex Locks: ipf_natio */
972 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
973 /* from information passed to the kernel, then add it to the appropriate */
974 /* NAT rule table(s). */
975 /* ------------------------------------------------------------------------ */
976 static int nat_siocaddnat(n
, np
, getlock
)
982 if (nat_resolverule(n
) != 0)
985 if ((n
->in_age
[0] == 0) && (n
->in_age
[1] != 0))
989 if (n
->in_redir
& NAT_MAPBLK
)
990 n
->in_space
= USABLE_PORTS
* ~ntohl(n
->in_outmsk
);
991 else if (n
->in_flags
& IPN_AUTOPORTMAP
)
992 n
->in_space
= USABLE_PORTS
* ~ntohl(n
->in_inmsk
);
993 else if (n
->in_flags
& IPN_IPRANGE
)
994 n
->in_space
= ntohl(n
->in_outmsk
) - ntohl(n
->in_outip
);
995 else if (n
->in_flags
& IPN_SPLIT
)
997 else if (n
->in_outmsk
!= 0)
998 n
->in_space
= ~ntohl(n
->in_outmsk
);
1003 * Calculate the number of valid IP addresses in the output
1004 * mapping range. In all cases, the range is inclusive of
1005 * the start and ending IP addresses.
1006 * If to a CIDR address, lose 2: broadcast + network address
1008 * If to a range, add one.
1009 * If to a single IP address, set to 1.
1012 if ((n
->in_flags
& IPN_IPRANGE
) != 0)
1019 if ((n
->in_outmsk
!= 0xffffffff) && (n
->in_outmsk
!= 0) &&
1020 ((n
->in_flags
& (IPN_IPRANGE
|IPN_SPLIT
)) == 0))
1021 n
->in_nip
= ntohl(n
->in_outip
) + 1;
1022 else if ((n
->in_flags
& IPN_SPLIT
) &&
1023 (n
->in_redir
& NAT_REDIRECT
))
1024 n
->in_nip
= ntohl(n
->in_inip
);
1026 n
->in_nip
= ntohl(n
->in_outip
);
1027 if (n
->in_redir
& NAT_MAP
) {
1028 n
->in_pnext
= ntohs(n
->in_pmin
);
1030 * Multiply by the number of ports made available.
1032 if (ntohs(n
->in_pmax
) >= ntohs(n
->in_pmin
)) {
1033 n
->in_space
*= (ntohs(n
->in_pmax
) -
1034 ntohs(n
->in_pmin
) + 1);
1036 * Because two different sources can map to
1037 * different destinations but use the same
1039 * If the result is smaller than in_space, then
1040 * we may have wrapped around 32bits.
1043 if ((i
!= 0) && (i
!= 0xffffffff)) {
1044 j
= n
->in_space
* (~ntohl(i
) + 1);
1045 if (j
>= n
->in_space
)
1048 n
->in_space
= 0xffffffff;
1052 * If no protocol is specified, multiple by 256 to allow for
1053 * at least one IP:IP mapping per protocol.
1055 if ((n
->in_flags
& IPN_TCPUDPICMP
) == 0) {
1056 j
= n
->in_space
* 256;
1057 if (j
>= n
->in_space
)
1060 n
->in_space
= 0xffffffff;
1064 /* Otherwise, these fields are preset */
1067 WRITE_ENTER(&ipf_nat
);
1072 if (n
->in_age
[0] != 0)
1073 n
->in_tqehead
[0] = fr_addtimeoutqueue(&nat_utqe
, n
->in_age
[0]);
1075 if (n
->in_age
[1] != 0)
1076 n
->in_tqehead
[1] = fr_addtimeoutqueue(&nat_utqe
, n
->in_age
[1]);
1078 if (n
->in_redir
& NAT_REDIRECT
) {
1079 n
->in_flags
&= ~IPN_NOTDST
;
1082 if (n
->in_redir
& (NAT_MAP
|NAT_MAPBLK
)) {
1083 n
->in_flags
&= ~IPN_NOTSRC
;
1086 MUTEX_INIT(&n
->in_lock
, "ipnat rule lock");
1089 nat_stats
.ns_rules
++;
1090 #if SOLARIS && !defined(_INET_IP_STACK_H)
1091 pfil_delayed_copy
= 0;
1094 RWLOCK_EXIT(&ipf_nat
); /* WRITE */
1101 /* ------------------------------------------------------------------------ */
1102 /* Function: nat_resolvrule */
1104 /* Parameters: n(I) - pointer to NAT rule */
1106 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
1107 /* from information passed to the kernel, then add it to the appropriate */
1108 /* NAT rule table(s). */
1109 /* ------------------------------------------------------------------------ */
1110 static int nat_resolverule(n
)
1113 n
->in_ifnames
[0][LIFNAMSIZ
- 1] = '\0';
1114 n
->in_ifps
[0] = fr_resolvenic(n
->in_ifnames
[0], 4);
1116 n
->in_ifnames
[1][LIFNAMSIZ
- 1] = '\0';
1117 if (n
->in_ifnames
[1][0] == '\0') {
1118 (void) strncpy(n
->in_ifnames
[1], n
->in_ifnames
[0], LIFNAMSIZ
);
1119 n
->in_ifps
[1] = n
->in_ifps
[0];
1121 n
->in_ifps
[1] = fr_resolvenic(n
->in_ifnames
[1], 4);
1124 if (n
->in_plabel
[0] != '\0') {
1125 n
->in_apr
= appr_lookup(n
->in_p
, n
->in_plabel
);
1126 if (n
->in_apr
== NULL
)
1133 /* ------------------------------------------------------------------------ */
1134 /* Function: nat_siocdelnat */
1135 /* Returns: int - 0 == success, != 0 == failure */
1136 /* Parameters: n(I) - pointer to new NAT rule */
1137 /* np(I) - pointer to where to insert new NAT rule */
1138 /* getlock(I) - flag indicating if lock on ipf_nat is held */
1139 /* Mutex Locks: ipf_natio */
1141 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
1142 /* from information passed to the kernel, then add it to the appropriate */
1143 /* NAT rule table(s). */
1144 /* ------------------------------------------------------------------------ */
1145 static void nat_siocdelnat(n
, np
, getlock
)
1150 WRITE_ENTER(&ipf_nat
);
1152 if (n
->in_redir
& NAT_REDIRECT
)
1154 if (n
->in_redir
& (NAT_MAPBLK
|NAT_MAP
))
1156 if (nat_list
== NULL
) {
1161 if (n
->in_tqehead
[0] != NULL
) {
1162 if (fr_deletetimeoutqueue(n
->in_tqehead
[0]) == 0) {
1163 fr_freetimeoutqueue(n
->in_tqehead
[1]);
1167 if (n
->in_tqehead
[1] != NULL
) {
1168 if (fr_deletetimeoutqueue(n
->in_tqehead
[1]) == 0) {
1169 fr_freetimeoutqueue(n
->in_tqehead
[1]);
1175 if (n
->in_use
== 0) {
1177 appr_free(n
->in_apr
);
1178 MUTEX_DESTROY(&n
->in_lock
);
1180 nat_stats
.ns_rules
--;
1181 #if SOLARIS && !defined(_INET_IP_STACK_H)
1182 if (nat_stats
.ns_rules
== 0)
1183 pfil_delayed_copy
= 1;
1186 n
->in_flags
|= IPN_DELETE
;
1190 RWLOCK_EXIT(&ipf_nat
); /* READ/WRITE */
1195 /* ------------------------------------------------------------------------ */
1196 /* Function: fr_natgetsz */
1197 /* Returns: int - 0 == success, != 0 is the error value. */
1198 /* Parameters: data(I) - pointer to natget structure with kernel pointer */
1199 /* get the size of. */
1201 /* Handle SIOCSTGSZ. */
1202 /* Return the size of the nat list entry to be copied back to user space. */
1203 /* The size of the entry is stored in the ng_sz field and the enture natget */
1204 /* structure is copied back to the user. */
1205 /* ------------------------------------------------------------------------ */
1206 static int fr_natgetsz(data
, getlock
)
1214 if (BCOPYIN(data
, &ng
, sizeof(ng
)) != 0)
1218 READ_ENTER(&ipf_nat
);
1223 nat
= nat_instances
;
1226 * Empty list so the size returned is 0. Simple.
1230 RWLOCK_EXIT(&ipf_nat
);
1232 if (BCOPYOUT(&ng
, data
, sizeof(ng
)) != 0)
1238 * Make sure the pointer we're copying from exists in the
1239 * current list of entries. Security precaution to prevent
1240 * copying of random kernel data.
1242 for (n
= nat_instances
; n
; n
= n
->nat_next
)
1247 RWLOCK_EXIT(&ipf_nat
);
1254 * Incluse any space required for proxy data structures.
1256 ng
.ng_sz
= sizeof(nat_save_t
);
1259 ng
.ng_sz
+= sizeof(ap_session_t
) - 4;
1260 if (aps
->aps_data
!= 0)
1261 ng
.ng_sz
+= aps
->aps_psiz
;
1264 RWLOCK_EXIT(&ipf_nat
);
1267 if (BCOPYOUT(&ng
, data
, sizeof(ng
)) != 0)
1273 /* ------------------------------------------------------------------------ */
1274 /* Function: fr_natgetent */
1275 /* Returns: int - 0 == success, != 0 is the error value. */
1276 /* Parameters: data(I) - pointer to natget structure with kernel pointer */
1277 /* to NAT structure to copy out. */
1279 /* Handle SIOCSTGET. */
1280 /* Copies out NAT entry to user space. Any additional data held for a */
1281 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1282 /* ------------------------------------------------------------------------ */
1283 static int fr_natgetent(data
, getlock
)
1289 nat_save_t
*ipn
, ipns
;
1292 error
= fr_inobj(data
, &ipns
, IPFOBJ_NATSAVE
);
1296 if ((ipns
.ipn_dsize
< sizeof(ipns
)) || (ipns
.ipn_dsize
> 81920))
1299 KMALLOCS(ipn
, nat_save_t
*, ipns
.ipn_dsize
);
1304 READ_ENTER(&ipf_nat
);
1307 ipn
->ipn_dsize
= ipns
.ipn_dsize
;
1308 nat
= ipns
.ipn_next
;
1310 nat
= nat_instances
;
1312 if (nat_instances
== NULL
)
1318 * Make sure the pointer we're copying from exists in the
1319 * current list of entries. Security precaution to prevent
1320 * copying of random kernel data.
1322 for (n
= nat_instances
; n
; n
= n
->nat_next
)
1330 ipn
->ipn_next
= nat
->nat_next
;
1333 * Copy the NAT structure.
1335 bcopy((char *)nat
, &ipn
->ipn_nat
, sizeof(*nat
));
1338 * If we have a pointer to the NAT rule it belongs to, save that too.
1340 if (nat
->nat_ptr
!= NULL
)
1341 bcopy((char *)nat
->nat_ptr
, (char *)&ipn
->ipn_ipnat
,
1342 sizeof(ipn
->ipn_ipnat
));
1345 * If we also know the NAT entry has an associated filter rule,
1348 if (nat
->nat_fr
!= NULL
)
1349 bcopy((char *)nat
->nat_fr
, (char *)&ipn
->ipn_fr
,
1350 sizeof(ipn
->ipn_fr
));
1353 * Last but not least, if there is an application proxy session set
1354 * up for this NAT entry, then copy that out too, including any
1355 * private data saved along side it by the proxy.
1358 outsize
= ipn
->ipn_dsize
- sizeof(*ipn
) + sizeof(ipn
->ipn_data
);
1362 if (outsize
< sizeof(*aps
)) {
1368 bcopy((char *)aps
, s
, sizeof(*aps
));
1370 outsize
-= sizeof(*aps
);
1371 if ((aps
->aps_data
!= NULL
) && (outsize
>= aps
->aps_psiz
))
1372 bcopy(aps
->aps_data
, s
, aps
->aps_psiz
);
1378 RWLOCK_EXIT(&ipf_nat
);
1381 error
= fr_outobjsz(data
, ipn
, IPFOBJ_NATSAVE
, ipns
.ipn_dsize
);
1386 RWLOCK_EXIT(&ipf_nat
);
1389 KFREES(ipn
, ipns
.ipn_dsize
);
1395 /* ------------------------------------------------------------------------ */
1396 /* Function: fr_natputent */
1397 /* Returns: int - 0 == success, != 0 is the error value. */
1398 /* Parameters: data(I) - pointer to natget structure with NAT */
1399 /* structure information to load into the kernel */
1400 /* getlock(I) - flag indicating whether or not a write lock */
1401 /* on ipf_nat is already held. */
1403 /* Handle SIOCSTPUT. */
1404 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1405 /* firewall rule data structures, if pointers to them indicate so. */
1406 /* ------------------------------------------------------------------------ */
1407 static int fr_natputent(data
, getlock
)
1411 nat_save_t
*ipn
, *ipnn
;
1420 * Initialise early because of code at junkput label.
1429 KMALLOC(ipn
, nat_save_t
*);
1432 error
= fr_inobj(data
, ipn
, IPFOBJ_NATSAVE
);
1437 * New entry, copy in the rest of the NAT entry if it's size is more
1438 * than just the nat_t structure.
1440 if (ipn
->ipn_dsize
> sizeof(*ipn
)) {
1441 if (ipn
->ipn_dsize
> 81920) {
1446 KMALLOCS(ipnn
, nat_save_t
*, ipn
->ipn_dsize
);
1452 error
= fr_inobjsz(data
, ipnn
, IPFOBJ_NATSAVE
, ipn
->ipn_dsize
);
1460 KMALLOC(nat
, nat_t
*);
1466 bcopy((char *)&ipnn
->ipn_nat
, (char *)nat
, sizeof(*nat
));
1468 * Initialize all these so that nat_delete() doesn't cause a crash.
1470 bzero((char *)nat
, offsetof(struct nat
, nat_tqe
));
1471 nat
->nat_tqe
.tqe_pnext
= NULL
;
1472 nat
->nat_tqe
.tqe_next
= NULL
;
1473 nat
->nat_tqe
.tqe_ifq
= NULL
;
1474 nat
->nat_tqe
.tqe_parent
= nat
;
1477 * Restore the rule associated with this nat session
1479 in
= ipnn
->ipn_nat
.nat_ptr
;
1481 KMALLOC(in
, ipnat_t
*);
1487 bzero((char *)in
, offsetof(struct ipnat
, in_next6
));
1488 bcopy((char *)&ipnn
->ipn_ipnat
, (char *)in
, sizeof(*in
));
1490 in
->in_flags
|= IPN_DELETE
;
1492 ATOMIC_INC(nat_stats
.ns_rules
);
1494 if (nat_resolverule(in
) != 0) {
1501 * Check that the NAT entry doesn't already exist in the kernel.
1503 * For NAT_OUTBOUND, we're lookup for a duplicate MAP entry. To do
1504 * this, we check to see if the inbound combination of addresses and
1505 * ports is already known. Similar logic is applied for NAT_INBOUND.
1508 KMALLOC(fin
, fr_info_t
*);
1513 bzero(fin
, sizeof(*fin
));
1514 fin
->fin_p
= nat
->nat_p
;
1515 fin
->fin_ifp
= nat
->nat_ifps
[0];
1516 if (nat
->nat_dir
== NAT_OUTBOUND
) {
1517 fin
->fin_data
[0] = ntohs(nat
->nat_oport
);
1518 fin
->fin_data
[1] = ntohs(nat
->nat_outport
);
1519 fin
->fin_ifp
= nat
->nat_ifps
[0];
1521 READ_ENTER(&ipf_nat
);
1523 n
= nat_inlookup(fin
, nat
->nat_flags
, fin
->fin_p
,
1524 nat
->nat_oip
, nat
->nat_inip
);
1526 RWLOCK_EXIT(&ipf_nat
);
1532 } else if (nat
->nat_dir
== NAT_INBOUND
) {
1533 fin
->fin_data
[0] = ntohs(nat
->nat_inport
);
1534 fin
->fin_data
[1] = ntohs(nat
->nat_oport
);
1535 fin
->fin_ifp
= nat
->nat_ifps
[0];
1537 READ_ENTER(&ipf_nat
);
1539 n
= nat_outlookup(fin
, nat
->nat_flags
, fin
->fin_p
,
1540 nat
->nat_outip
, nat
->nat_oip
);
1542 RWLOCK_EXIT(&ipf_nat
);
1554 * Restore ap_session_t structure. Include the private data allocated
1559 KMALLOC(aps
, ap_session_t
*);
1565 bcopy(ipnn
->ipn_data
, (char *)aps
, sizeof(*aps
));
1567 aps
->aps_apr
= in
->in_apr
;
1569 aps
->aps_apr
= NULL
;
1570 if (aps
->aps_psiz
!= 0) {
1571 if (aps
->aps_psiz
> 81920) {
1575 KMALLOCS(aps
->aps_data
, void *, aps
->aps_psiz
);
1576 if (aps
->aps_data
== NULL
) {
1580 bcopy(ipnn
->ipn_data
+ sizeof(*aps
), aps
->aps_data
,
1584 aps
->aps_data
= NULL
;
1589 * If there was a filtering rule associated with this entry then
1590 * build up a new one.
1594 if ((nat
->nat_flags
& SI_NEWFR
) != 0) {
1595 KMALLOC(fr
, frentry_t
*);
1601 ipnn
->ipn_nat
.nat_fr
= fr
;
1603 (void) fr_outobj(data
, ipnn
, IPFOBJ_NATSAVE
);
1604 bcopy((char *)&ipnn
->ipn_fr
, (char *)fr
, sizeof(*fr
));
1609 fr
->fr_type
= FR_T_NONE
;
1611 MUTEX_NUKE(&fr
->fr_lock
);
1612 MUTEX_INIT(&fr
->fr_lock
, "nat-filter rule lock");
1615 READ_ENTER(&ipf_nat
);
1617 for (n
= nat_instances
; n
; n
= n
->nat_next
)
1618 if (n
->nat_fr
== fr
)
1622 MUTEX_ENTER(&fr
->fr_lock
);
1624 MUTEX_EXIT(&fr
->fr_lock
);
1627 RWLOCK_EXIT(&ipf_nat
);
1638 KFREES(ipnn
, ipn
->ipn_dsize
);
1643 WRITE_ENTER(&ipf_nat
);
1645 error
= nat_insert(nat
, nat
->nat_rev
);
1646 if ((error
== 0) && (aps
!= NULL
)) {
1647 aps
->aps_next
= ap_sess_list
;
1651 RWLOCK_EXIT(&ipf_nat
);
1663 (void) fr_derefrule(&fr
);
1665 if ((ipnn
!= NULL
) && (ipnn
!= ipn
)) {
1666 KFREES(ipnn
, ipn
->ipn_dsize
);
1672 if (aps
->aps_data
!= NULL
) {
1673 KFREES(aps
->aps_data
, aps
->aps_psiz
);
1679 appr_free(in
->in_apr
);
1688 /* ------------------------------------------------------------------------ */
1689 /* Function: nat_delete */
1691 /* Parameters: natd(I) - pointer to NAT structure to delete */
1692 /* logtype(I) - type of LOG record to create before deleting */
1693 /* Write Lock: ipf_nat */
1695 /* Delete a nat entry from the various lists and table. If NAT logging is */
1696 /* enabled then generate a NAT log record for this event. */
1697 /* ------------------------------------------------------------------------ */
1698 void nat_delete(nat
, logtype
)
1705 if (logtype
!= 0 && nat_logging
!= 0)
1706 nat_log(nat
, logtype
);
1707 #if defined(NEED_LOCAL_RAND) && defined(_KERNEL)
1708 ipf_rand_push(nat
, sizeof(*nat
));
1712 * Take it as a general indication that all the pointers are set if
1715 if (nat
->nat_pnext
!= NULL
) {
1718 nat_stats
.ns_bucketlen
[0][nat
->nat_hv
[0]]--;
1719 nat_stats
.ns_bucketlen
[1][nat
->nat_hv
[1]]--;
1721 *nat
->nat_pnext
= nat
->nat_next
;
1722 if (nat
->nat_next
!= NULL
) {
1723 nat
->nat_next
->nat_pnext
= nat
->nat_pnext
;
1724 nat
->nat_next
= NULL
;
1726 nat
->nat_pnext
= NULL
;
1728 *nat
->nat_phnext
[0] = nat
->nat_hnext
[0];
1729 if (nat
->nat_hnext
[0] != NULL
) {
1730 nat
->nat_hnext
[0]->nat_phnext
[0] = nat
->nat_phnext
[0];
1731 nat
->nat_hnext
[0] = NULL
;
1733 nat
->nat_phnext
[0] = NULL
;
1735 *nat
->nat_phnext
[1] = nat
->nat_hnext
[1];
1736 if (nat
->nat_hnext
[1] != NULL
) {
1737 nat
->nat_hnext
[1]->nat_phnext
[1] = nat
->nat_phnext
[1];
1738 nat
->nat_hnext
[1] = NULL
;
1740 nat
->nat_phnext
[1] = NULL
;
1742 if ((nat
->nat_flags
& SI_WILDP
) != 0)
1743 nat_stats
.ns_wilds
--;
1746 if (nat
->nat_me
!= NULL
) {
1747 *nat
->nat_me
= NULL
;
1751 if (nat
->nat_tqe
.tqe_ifq
!= NULL
)
1752 fr_deletequeueentry(&nat
->nat_tqe
);
1754 if (logtype
== NL_EXPIRE
)
1755 nat_stats
.ns_expire
++;
1757 MUTEX_ENTER(&nat
->nat_lock
);
1759 * NL_DESTROY should only be passed in when we've got nat_ref >= 2.
1760 * This happens when a nat'd packet is blocked and we want to throw
1761 * away the NAT session.
1763 if (logtype
== NL_DESTROY
) {
1764 if (nat
->nat_ref
> 2) {
1766 MUTEX_EXIT(&nat
->nat_lock
);
1768 nat_stats
.ns_orphans
++;
1771 } else if (nat
->nat_ref
> 1) {
1773 MUTEX_EXIT(&nat
->nat_lock
);
1775 nat_stats
.ns_orphans
++;
1778 MUTEX_EXIT(&nat
->nat_lock
);
1781 * At this point, nat_ref is 1, doing "--" would make it 0..
1785 nat_stats
.ns_orphans
--;
1787 #ifdef IPFILTER_SYNC
1789 ipfsync_del(nat
->nat_sync
);
1792 if (nat
->nat_fr
!= NULL
)
1793 (void) fr_derefrule(&nat
->nat_fr
);
1795 if (nat
->nat_hm
!= NULL
)
1796 fr_hostmapdel(&nat
->nat_hm
);
1799 * If there is an active reference from the nat entry to its parent
1800 * rule, decrement the rule's reference count and free it too if no
1801 * longer being used.
1805 fr_ipnatderef(&ipn
);
1808 MUTEX_DESTROY(&nat
->nat_lock
);
1810 aps_free(nat
->nat_aps
);
1811 nat_stats
.ns_inuse
--;
1814 * If there's a fragment table entry too for this nat entry, then
1815 * dereference that as well. This is after nat_lock is released
1818 fr_forgetnat((void *)nat
);
1824 /* ------------------------------------------------------------------------ */
1825 /* Function: nat_flushtable */
1826 /* Returns: int - number of NAT rules deleted */
1827 /* Parameters: Nil */
1829 /* Deletes all currently active NAT sessions. In deleting each NAT entry a */
1830 /* log record should be emitted in nat_delete() if NAT logging is enabled. */
1831 /* ------------------------------------------------------------------------ */
1833 * nat_flushtable - clear the NAT table of all mapping entries.
1835 static int nat_flushtable()
1841 * ALL NAT mappings deleted, so lets just make the deletions
1844 if (nat_table
[0] != NULL
)
1845 bzero((char *)nat_table
[0],
1846 sizeof(nat_table
[0]) * ipf_nattable_sz
);
1847 if (nat_table
[1] != NULL
)
1848 bzero((char *)nat_table
[1],
1849 sizeof(nat_table
[1]) * ipf_nattable_sz
);
1851 while ((nat
= nat_instances
) != NULL
) {
1852 nat_delete(nat
, NL_FLUSH
);
1860 /* ------------------------------------------------------------------------ */
1861 /* Function: nat_clearlist */
1862 /* Returns: int - number of NAT/RDR rules deleted */
1863 /* Parameters: Nil */
1865 /* Delete all rules in the current list of rules. There is nothing elegant */
1866 /* about this cleanup: simply free all entries on the list of rules and */
1867 /* clear out the tables used for hashed NAT rule lookups. */
1868 /* ------------------------------------------------------------------------ */
1869 static int nat_clearlist()
1871 ipnat_t
*n
, **np
= &nat_list
;
1874 if (nat_rules
!= NULL
)
1875 bzero((char *)nat_rules
, sizeof(*nat_rules
) * ipf_natrules_sz
);
1876 if (rdr_rules
!= NULL
)
1877 bzero((char *)rdr_rules
, sizeof(*rdr_rules
) * ipf_rdrrules_sz
);
1879 while ((n
= *np
) != NULL
) {
1881 if (n
->in_use
== 0) {
1882 if (n
->in_apr
!= NULL
)
1883 appr_free(n
->in_apr
);
1884 MUTEX_DESTROY(&n
->in_lock
);
1886 nat_stats
.ns_rules
--;
1888 n
->in_flags
|= IPN_DELETE
;
1893 #if SOLARIS && !defined(_INET_IP_STACK_H)
1894 pfil_delayed_copy
= 1;
1902 /* ------------------------------------------------------------------------ */
1903 /* Function: nat_newmap */
1904 /* Returns: int - -1 == error, 0 == success */
1905 /* Parameters: fin(I) - pointer to packet information */
1906 /* nat(I) - pointer to NAT entry */
1907 /* ni(I) - pointer to structure with misc. information needed */
1908 /* to create new NAT entry. */
1910 /* Given an empty NAT structure, populate it with new information about a */
1911 /* new NAT session, as defined by the matching NAT rule. */
1912 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1913 /* to the new IP address for the translation. */
1914 /* ------------------------------------------------------------------------ */
1915 static INLINE
int nat_newmap(fin
, nat
, ni
)
1920 u_short st_port
, dport
, sport
, port
, sp
, dp
;
1921 struct in_addr in
, inb
;
1930 * If it's an outbound packet which doesn't match any existing
1931 * record, then create a new port
1937 st_port
= np
->in_pnext
;
1938 flags
= ni
->nai_flags
;
1939 sport
= ni
->nai_sport
;
1940 dport
= ni
->nai_dport
;
1943 * Do a loop until we either run out of entries to try or we find
1944 * a NAT mapping that isn't currently being used. This is done
1945 * because the change to the source is not (usually) being fixed.
1949 in
.s_addr
= htonl(np
->in_nip
);
1952 * Check to see if there is an existing NAT
1953 * setup for this IP address pair.
1955 hm
= nat_hostmap(np
, fin
->fin_src
, fin
->fin_dst
,
1958 in
.s_addr
= hm
->hm_mapip
.s_addr
;
1959 } else if ((l
== 1) && (hm
!= NULL
)) {
1962 in
.s_addr
= ntohl(in
.s_addr
);
1966 if ((np
->in_outmsk
== 0xffffffff) && (np
->in_pnext
== 0)) {
1971 if (np
->in_redir
== NAT_BIMAP
&&
1972 np
->in_inmsk
== np
->in_outmsk
) {
1974 * map the address block in a 1:1 fashion
1976 in
.s_addr
= np
->in_outip
;
1977 in
.s_addr
|= fin
->fin_saddr
& ~np
->in_inmsk
;
1978 in
.s_addr
= ntohl(in
.s_addr
);
1980 } else if (np
->in_redir
& NAT_MAPBLK
) {
1981 if ((l
>= np
->in_ppip
) || ((l
> 0) &&
1982 !(flags
& IPN_TCPUDP
)))
1985 * map-block - Calculate destination address.
1987 in
.s_addr
= ntohl(fin
->fin_saddr
);
1988 in
.s_addr
&= ntohl(~np
->in_inmsk
);
1989 inb
.s_addr
= in
.s_addr
;
1990 in
.s_addr
/= np
->in_ippip
;
1991 in
.s_addr
&= ntohl(~np
->in_outmsk
);
1992 in
.s_addr
+= ntohl(np
->in_outip
);
1994 * Calculate destination port.
1996 if ((flags
& IPN_TCPUDP
) &&
1997 (np
->in_ppip
!= 0)) {
1998 port
= ntohs(sport
) + l
;
1999 port
%= np
->in_ppip
;
2000 port
+= np
->in_ppip
*
2001 (inb
.s_addr
% np
->in_ippip
);
2002 port
+= MAPBLK_MINPORT
;
2006 } else if ((np
->in_outip
== 0) &&
2007 (np
->in_outmsk
== 0xffffffff)) {
2009 * 0/32 - use the interface's IP address.
2012 fr_ifpaddr(4, FRI_NORMAL
, fin
->fin_ifp
,
2015 in
.s_addr
= ntohl(in
.s_addr
);
2017 } else if ((np
->in_outip
== 0) && (np
->in_outmsk
== 0)) {
2019 * 0/0 - use the original source address/port.
2023 in
.s_addr
= ntohl(fin
->fin_saddr
);
2025 } else if ((np
->in_outmsk
!= 0xffffffff) &&
2026 (np
->in_pnext
== 0) && ((l
> 0) || (hm
== NULL
)))
2031 if ((flags
& IPN_TCPUDP
) &&
2032 ((np
->in_redir
& NAT_MAPBLK
) == 0) &&
2033 (np
->in_flags
& IPN_AUTOPORTMAP
)) {
2035 * "ports auto" (without map-block)
2037 if ((l
> 0) && (l
% np
->in_ppip
== 0)) {
2038 if (l
> np
->in_space
) {
2040 } else if ((l
> np
->in_ppip
) &&
2041 np
->in_outmsk
!= 0xffffffff)
2044 if (np
->in_ppip
!= 0) {
2045 port
= ntohs(sport
);
2046 port
+= (l
% np
->in_ppip
);
2047 port
%= np
->in_ppip
;
2048 port
+= np
->in_ppip
*
2049 (ntohl(fin
->fin_saddr
) %
2051 port
+= MAPBLK_MINPORT
;
2055 } else if (((np
->in_redir
& NAT_MAPBLK
) == 0) &&
2056 (flags
& IPN_TCPUDPICMP
) && (np
->in_pnext
!= 0)) {
2058 * Standard port translation. Select next port.
2060 if (np
->in_flags
& IPN_SEQUENTIAL
) {
2061 port
= np
->in_pnext
;
2063 port
= ipf_random() % (ntohs(np
->in_pmax
) -
2064 ntohs(np
->in_pmin
));
2065 port
+= ntohs(np
->in_pmin
);
2070 if (np
->in_pnext
> ntohs(np
->in_pmax
)) {
2071 np
->in_pnext
= ntohs(np
->in_pmin
);
2072 if (np
->in_outmsk
!= 0xffffffff)
2077 if (np
->in_flags
& IPN_IPRANGE
) {
2078 if (np
->in_nip
> ntohl(np
->in_outmsk
))
2079 np
->in_nip
= ntohl(np
->in_outip
);
2081 if ((np
->in_outmsk
!= 0xffffffff) &&
2082 ((np
->in_nip
+ 1) & ntohl(np
->in_outmsk
)) >
2083 ntohl(np
->in_outip
))
2084 np
->in_nip
= ntohl(np
->in_outip
) + 1;
2087 if ((port
== 0) && (flags
& (IPN_TCPUDPICMP
|IPN_ICMPQUERY
)))
2091 * Here we do a lookup of the connection as seen from
2092 * the outside. If an IP# pair already exists, try
2093 * again. So if you have A->B becomes C->B, you can
2094 * also have D->E become C->E but not D->B causing
2095 * another C->B. Also take protocol and ports into
2096 * account when determining whether a pre-existing
2097 * NAT setup will cause an external conflict where
2098 * this is appropriate.
2100 inb
.s_addr
= htonl(in
.s_addr
);
2101 sp
= fin
->fin_data
[0];
2102 dp
= fin
->fin_data
[1];
2103 fin
->fin_data
[0] = fin
->fin_data
[1];
2104 fin
->fin_data
[1] = htons(port
);
2105 natl
= nat_inlookup(fin
, flags
& ~(SI_WILDP
|NAT_SEARCH
),
2106 (u_int
)fin
->fin_p
, fin
->fin_dst
, inb
);
2107 fin
->fin_data
[0] = sp
;
2108 fin
->fin_data
[1] = dp
;
2111 * Has the search wrapped around and come back to the
2114 if ((natl
!= NULL
) &&
2115 (np
->in_pnext
!= 0) && (st_port
== np
->in_pnext
) &&
2116 (np
->in_nip
!= 0) && (st_ip
== np
->in_nip
))
2119 } while (natl
!= NULL
);
2121 if (np
->in_space
> 0)
2124 /* Setup the NAT table */
2125 nat
->nat_inip
= fin
->fin_src
;
2126 nat
->nat_outip
.s_addr
= htonl(in
.s_addr
);
2127 nat
->nat_oip
= fin
->fin_dst
;
2128 if (nat
->nat_hm
== NULL
)
2129 nat
->nat_hm
= nat_hostmap(np
, fin
->fin_src
, fin
->fin_dst
,
2133 * The ICMP checksum does not have a pseudo header containing
2136 ni
->nai_sum1
= LONG_SUM(ntohl(fin
->fin_saddr
));
2137 ni
->nai_sum2
= LONG_SUM(in
.s_addr
);
2138 if ((flags
& IPN_TCPUDP
)) {
2139 ni
->nai_sum1
+= ntohs(sport
);
2140 ni
->nai_sum2
+= ntohs(port
);
2143 if (flags
& IPN_TCPUDP
) {
2144 nat
->nat_inport
= sport
;
2145 nat
->nat_outport
= port
; /* sport */
2146 nat
->nat_oport
= dport
;
2147 ((tcphdr_t
*)fin
->fin_dp
)->th_sport
= port
;
2148 } else if (flags
& IPN_ICMPQUERY
) {
2149 ((icmphdr_t
*)fin
->fin_dp
)->icmp_id
= port
;
2150 nat
->nat_inport
= port
;
2151 nat
->nat_outport
= port
;
2153 } else if (fin
->fin_p
== IPPROTO_GRE
) {
2154 nat
->nat_gre
.gs_flags
= ((grehdr_t
*)fin
->fin_dp
)->gr_flags
;
2155 if (GRE_REV(nat
->nat_gre
.gs_flags
) == 1) {
2156 nat
->nat_oport
= 0;/*fin->fin_data[1];*/
2157 nat
->nat_inport
= 0;/*fin->fin_data[0];*/
2158 nat
->nat_outport
= 0;/*fin->fin_data[0];*/
2159 nat
->nat_call
[0] = fin
->fin_data
[0];
2160 nat
->nat_call
[1] = fin
->fin_data
[0];
2164 ni
->nai_ip
.s_addr
= in
.s_addr
;
2165 ni
->nai_port
= port
;
2166 ni
->nai_nport
= dport
;
2171 /* ------------------------------------------------------------------------ */
2172 /* Function: nat_newrdr */
2173 /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */
2174 /* allow rule to be moved if IPN_ROUNDR is set. */
2175 /* Parameters: fin(I) - pointer to packet information */
2176 /* nat(I) - pointer to NAT entry */
2177 /* ni(I) - pointer to structure with misc. information needed */
2178 /* to create new NAT entry. */
2180 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2181 /* to the new IP address for the translation. */
2182 /* ------------------------------------------------------------------------ */
2183 static INLINE
int nat_newrdr(fin
, nat
, ni
)
2188 u_short nport
, dport
, sport
;
2189 struct in_addr in
, inb
;
2201 flags
= ni
->nai_flags
;
2202 sport
= ni
->nai_sport
;
2203 dport
= ni
->nai_dport
;
2206 * If the matching rule has IPN_STICKY set, then we want to have the
2207 * same rule kick in as before. Why would this happen? If you have
2208 * a collection of rdr rules with "round-robin sticky", the current
2209 * packet might match a different one to the previous connection but
2210 * we want the same destination to be used.
2212 if (((np
->in_flags
& (IPN_ROUNDR
|IPN_SPLIT
)) != 0) &&
2213 ((np
->in_flags
& IPN_STICKY
) != 0)) {
2214 hm
= nat_hostmap(NULL
, fin
->fin_src
, fin
->fin_dst
, in
,
2217 in
.s_addr
= ntohl(hm
->hm_mapip
.s_addr
);
2225 * Otherwise, it's an inbound packet. Most likely, we don't
2226 * want to rewrite source ports and source addresses. Instead,
2227 * we want to rewrite to a fixed internal address and fixed
2230 if (np
->in_flags
& IPN_SPLIT
) {
2231 in
.s_addr
= np
->in_nip
;
2233 if ((np
->in_flags
& (IPN_ROUNDR
|IPN_STICKY
)) == IPN_STICKY
) {
2234 hm
= nat_hostmap(NULL
, fin
->fin_src
, fin
->fin_dst
,
2237 in
.s_addr
= hm
->hm_mapip
.s_addr
;
2242 if (hm
== NULL
|| hm
->hm_ref
== 1) {
2243 if (np
->in_inip
== htonl(in
.s_addr
)) {
2244 np
->in_nip
= ntohl(np
->in_inmsk
);
2247 np
->in_nip
= ntohl(np
->in_inip
);
2251 } else if ((np
->in_inip
== 0) && (np
->in_inmsk
== 0xffffffff)) {
2253 * 0/32 - use the interface's IP address.
2255 if (fr_ifpaddr(4, FRI_NORMAL
, fin
->fin_ifp
, &in
, NULL
) == -1)
2257 in
.s_addr
= ntohl(in
.s_addr
);
2259 } else if ((np
->in_inip
== 0) && (np
->in_inmsk
== 0)) {
2261 * 0/0 - use the original destination address/port.
2263 in
.s_addr
= ntohl(fin
->fin_daddr
);
2265 } else if (np
->in_redir
== NAT_BIMAP
&&
2266 np
->in_inmsk
== np
->in_outmsk
) {
2268 * map the address block in a 1:1 fashion
2270 in
.s_addr
= np
->in_inip
;
2271 in
.s_addr
|= fin
->fin_daddr
& ~np
->in_inmsk
;
2272 in
.s_addr
= ntohl(in
.s_addr
);
2274 in
.s_addr
= ntohl(np
->in_inip
);
2277 if ((np
->in_pnext
== 0) || ((flags
& NAT_NOTRULEPORT
) != 0))
2281 * Whilst not optimized for the case where
2282 * pmin == pmax, the gain is not significant.
2284 if (((np
->in_flags
& IPN_FIXEDDPORT
) == 0) &&
2285 (np
->in_pmin
!= np
->in_pmax
)) {
2286 nport
= ntohs(dport
) - ntohs(np
->in_pmin
) +
2287 ntohs(np
->in_pnext
);
2288 nport
= htons(nport
);
2290 nport
= np
->in_pnext
;
2294 * When the redirect-to address is set to 0.0.0.0, just
2295 * assume a blank `forwarding' of the packet. We don't
2296 * setup any translation for this either.
2298 if (in
.s_addr
== 0) {
2301 in
.s_addr
= ntohl(fin
->fin_daddr
);
2305 * Check to see if this redirect mapping already exists and if
2306 * it does, return "failure" (allowing it to be created will just
2307 * cause one or both of these "connections" to stop working.)
2309 inb
.s_addr
= htonl(in
.s_addr
);
2310 sp
= fin
->fin_data
[0];
2311 dp
= fin
->fin_data
[1];
2312 fin
->fin_data
[1] = fin
->fin_data
[0];
2313 fin
->fin_data
[0] = ntohs(nport
);
2314 natl
= nat_outlookup(fin
, flags
& ~(SI_WILDP
|NAT_SEARCH
),
2315 (u_int
)fin
->fin_p
, inb
, fin
->fin_src
);
2316 fin
->fin_data
[0] = sp
;
2317 fin
->fin_data
[1] = dp
;
2321 nat
->nat_inip
.s_addr
= htonl(in
.s_addr
);
2322 nat
->nat_outip
= fin
->fin_dst
;
2323 nat
->nat_oip
= fin
->fin_src
;
2324 if ((nat
->nat_hm
== NULL
) && ((np
->in_flags
& IPN_STICKY
) != 0))
2325 nat
->nat_hm
= nat_hostmap(np
, fin
->fin_src
, fin
->fin_dst
, in
,
2328 ni
->nai_sum1
= LONG_SUM(ntohl(fin
->fin_daddr
)) + ntohs(dport
);
2329 ni
->nai_sum2
= LONG_SUM(in
.s_addr
) + ntohs(nport
);
2331 ni
->nai_ip
.s_addr
= in
.s_addr
;
2332 ni
->nai_nport
= nport
;
2333 ni
->nai_port
= sport
;
2335 if (flags
& IPN_TCPUDP
) {
2336 nat
->nat_inport
= nport
;
2337 nat
->nat_outport
= dport
;
2338 nat
->nat_oport
= sport
;
2339 ((tcphdr_t
*)fin
->fin_dp
)->th_dport
= nport
;
2340 } else if (flags
& IPN_ICMPQUERY
) {
2341 ((icmphdr_t
*)fin
->fin_dp
)->icmp_id
= nport
;
2342 nat
->nat_inport
= nport
;
2343 nat
->nat_outport
= nport
;
2345 } else if (fin
->fin_p
== IPPROTO_GRE
) {
2346 nat
->nat_gre
.gs_flags
= ((grehdr_t
*)fin
->fin_dp
)->gr_flags
;
2347 if (GRE_REV(nat
->nat_gre
.gs_flags
) == 1) {
2348 nat
->nat_call
[0] = fin
->fin_data
[0];
2349 nat
->nat_call
[1] = fin
->fin_data
[1];
2350 nat
->nat_oport
= 0; /*fin->fin_data[0];*/
2351 nat
->nat_inport
= 0; /*fin->fin_data[1];*/
2352 nat
->nat_outport
= 0; /*fin->fin_data[1];*/
2360 /* ------------------------------------------------------------------------ */
2361 /* Function: nat_new */
2362 /* Returns: nat_t* - NULL == failure to create new NAT structure, */
2363 /* else pointer to new NAT structure */
2364 /* Parameters: fin(I) - pointer to packet information */
2365 /* np(I) - pointer to NAT rule */
2366 /* natsave(I) - pointer to where to store NAT struct pointer */
2367 /* flags(I) - flags describing the current packet */
2368 /* direction(I) - direction of packet (in/out) */
2369 /* Write Lock: ipf_nat */
2371 /* Attempts to create a new NAT entry. Does not actually change the packet */
2374 /* This fucntion is in three main parts: (1) deal with creating a new NAT */
2375 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */
2376 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2377 /* and (3) building that structure and putting it into the NAT table(s). */
2379 /* NOTE: natsave should NOT be used top point back to an ipstate_t struct */
2380 /* as it can result in memory being corrupted. */
2381 /* ------------------------------------------------------------------------ */
2382 nat_t
*nat_new(fin
, np
, natsave
, flags
, direction
)
2389 u_short port
= 0, sport
= 0, dport
= 0, nport
= 0;
2390 tcphdr_t
*tcp
= NULL
;
2391 hostmap_t
*hm
= NULL
;
2398 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2399 qpktinfo_t
*qpi
= fin
->fin_qpi
;
2402 memset(&ni
, 0, sizeof ni
); /* XXX gcc */
2404 if (nat_stats
.ns_inuse
>= ipf_nattable_max
) {
2405 nat_stats
.ns_memfail
++;
2411 nflags
= np
->in_flags
& flags
;
2412 nflags
&= NAT_FROMRULE
;
2415 ni
.nai_nflags
= nflags
;
2416 ni
.nai_flags
= flags
;
2420 /* Give me a new nat */
2421 KMALLOC(nat
, nat_t
*);
2423 nat_stats
.ns_memfail
++;
2425 * Try to automatically tune the max # of entries in the
2426 * table allowed to be less than what will cause kmem_alloc()
2427 * to fail and try to eliminate panics due to out of memory
2428 * conditions arising.
2430 if (ipf_nattable_max
> ipf_nattable_sz
) {
2431 ipf_nattable_max
= nat_stats
.ns_inuse
- 100;
2432 printf("ipf_nattable_max reduced to %d\n",
2438 if (flags
& IPN_TCPUDP
) {
2440 ni
.nai_sport
= htons(fin
->fin_sport
);
2441 ni
.nai_dport
= htons(fin
->fin_dport
);
2442 } else if (flags
& IPN_ICMPQUERY
) {
2444 * In the ICMP query NAT code, we translate the ICMP id fields
2445 * to make them unique. This is indepedent of the ICMP type
2446 * (e.g. in the unlikely event that a host sends an echo and
2447 * an tstamp request with the same id, both packets will have
2448 * their ip address/id field changed in the same way).
2450 /* The icmp_id field is used by the sender to identify the
2451 * process making the icmp request. (the receiver justs
2452 * copies it back in its response). So, it closely matches
2453 * the concept of source port. We overlay sport, so we can
2454 * maximally reuse the existing code.
2456 ni
.nai_sport
= ((icmphdr_t
*)fin
->fin_dp
)->icmp_id
;
2457 ni
.nai_dport
= ni
.nai_sport
;
2460 bzero((char *)nat
, sizeof(*nat
));
2461 nat
->nat_flags
= flags
;
2462 nat
->nat_redir
= np
->in_redir
;
2465 * Search the current table for a match.
2467 if (direction
== NAT_OUTBOUND
) {
2469 * We can now arrange to call this for the same connection
2470 * because ipf_nat_new doesn't protect the code path into
2473 natl
= nat_outlookup(fin
, nflags
, (u_int
)fin
->fin_p
,
2474 fin
->fin_src
, fin
->fin_dst
);
2481 move
= nat_newmap(fin
, nat
, &ni
);
2489 * NAT_INBOUND is used only for redirects rules
2491 natl
= nat_inlookup(fin
, nflags
, (u_int
)fin
->fin_p
,
2492 fin
->fin_src
, fin
->fin_dst
);
2499 move
= nat_newrdr(fin
, nat
, &ni
);
2507 nport
= ni
.nai_nport
;
2509 if ((move
== 1) && (np
->in_flags
& IPN_ROUNDR
)) {
2510 if (np
->in_redir
== NAT_REDIRECT
) {
2513 } else if (np
->in_redir
== NAT_MAP
) {
2519 if (flags
& IPN_TCPUDP
) {
2520 sport
= ni
.nai_sport
;
2521 dport
= ni
.nai_dport
;
2522 } else if (flags
& IPN_ICMPQUERY
) {
2523 sport
= ni
.nai_sport
;
2527 CALC_SUMD(ni
.nai_sum1
, ni
.nai_sum2
, sumd
);
2528 nat
->nat_sumd
[0] = (sumd
& 0xffff) + (sumd
>> 16);
2529 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2530 if ((flags
& IPN_TCP
) && dohwcksum
&&
2531 (((ill_t
*)qpi
->qpi_ill
)->ill_ick
.ick_magic
== ICK_M_CTL_MAGIC
)) {
2532 if (direction
== NAT_OUTBOUND
)
2533 ni
.nai_sum1
= LONG_SUM(in
.s_addr
);
2535 ni
.nai_sum1
= LONG_SUM(ntohl(fin
->fin_saddr
));
2536 ni
.nai_sum1
+= LONG_SUM(ntohl(fin
->fin_daddr
));
2538 ni
.nai_sum1
= (ni
.nai_sum1
& 0xffff) + (ni
.nai_sum1
>> 16);
2539 nat
->nat_sumd
[1] = NAT_HW_CKSUM
|(ni
.nai_sum1
& 0xffff);
2542 nat
->nat_sumd
[1] = nat
->nat_sumd
[0];
2544 if ((flags
& IPN_TCPUDPICMP
) && ((sport
!= port
) || (dport
!= nport
))) {
2545 if (direction
== NAT_OUTBOUND
)
2546 ni
.nai_sum1
= LONG_SUM(ntohl(fin
->fin_saddr
));
2548 ni
.nai_sum1
= LONG_SUM(ntohl(fin
->fin_daddr
));
2550 ni
.nai_sum2
= LONG_SUM(in
.s_addr
);
2552 CALC_SUMD(ni
.nai_sum1
, ni
.nai_sum2
, sumd
);
2553 nat
->nat_ipsumd
= (sumd
& 0xffff) + (sumd
>> 16);
2555 nat
->nat_ipsumd
= nat
->nat_sumd
[0];
2556 if (!(flags
& IPN_TCPUDPICMP
)) {
2557 nat
->nat_sumd
[0] = 0;
2558 nat
->nat_sumd
[1] = 0;
2562 if (nat_finalise(fin
, nat
, &ni
, tcp
, natsave
, direction
) == -1) {
2566 if (flags
& SI_WILDP
)
2567 nat_stats
.ns_wilds
++;
2568 fin
->fin_flx
|= FI_NEWNAT
;
2571 nat_stats
.ns_badnat
++;
2572 if ((hm
= nat
->nat_hm
) != NULL
)
2577 if (nat
!= NULL
&& np
!= NULL
)
2583 /* ------------------------------------------------------------------------ */
2584 /* Function: nat_finalise */
2585 /* Returns: int - 0 == sucess, -1 == failure */
2586 /* Parameters: fin(I) - pointer to packet information */
2587 /* nat(I) - pointer to NAT entry */
2588 /* ni(I) - pointer to structure with misc. information needed */
2589 /* to create new NAT entry. */
2590 /* Write Lock: ipf_nat */
2592 /* This is the tail end of constructing a new NAT entry and is the same */
2593 /* for both IPv4 and IPv6. */
2594 /* ------------------------------------------------------------------------ */
2596 static int nat_finalise(
2610 if (np
->in_ifps
[0] != NULL
) {
2611 COPYIFNAME(4, np
->in_ifps
[0], nat
->nat_ifnames
[0]);
2613 if (np
->in_ifps
[1] != NULL
) {
2614 COPYIFNAME(4, np
->in_ifps
[1], nat
->nat_ifnames
[1]);
2616 #ifdef IPFILTER_SYNC
2617 if ((nat
->nat_flags
& SI_CLONE
) == 0)
2618 nat
->nat_sync
= ipfsync_new(SMC_NAT
, fin
, nat
);
2621 nat
->nat_me
= natsave
;
2622 nat
->nat_dir
= direction
;
2623 nat
->nat_ifps
[0] = np
->in_ifps
[0];
2624 nat
->nat_ifps
[1] = np
->in_ifps
[1];
2626 nat
->nat_p
= fin
->fin_p
;
2627 nat
->nat_mssclamp
= np
->in_mssclamp
;
2628 if (nat
->nat_flags
& IPN_TCP
)
2629 nat
->nat_seqnext
[0] = ntohl(tcp
->th_seq
);
2631 if ((np
->in_apr
!= NULL
) && ((ni
->nai_flags
& NAT_SLAVE
) == 0))
2632 if (appr_new(fin
, nat
) == -1)
2635 if (nat_insert(nat
, fin
->fin_rev
) == 0) {
2637 nat_log(nat
, (u_int
)np
->in_redir
);
2642 MUTEX_ENTER(&fr
->fr_lock
);
2644 MUTEX_EXIT(&fr
->fr_lock
);
2650 * nat_insert failed, so cleanup time...
2656 /* ------------------------------------------------------------------------ */
2657 /* Function: nat_insert */
2658 /* Returns: int - 0 == sucess, -1 == failure */
2659 /* Parameters: nat(I) - pointer to NAT structure */
2660 /* rev(I) - flag indicating forward/reverse direction of packet */
2661 /* Write Lock: ipf_nat */
2663 /* Insert a NAT entry into the hash tables for searching and add it to the */
2664 /* list of active NAT entries. Adjust global counters when complete. */
2665 /* ------------------------------------------------------------------------ */
2666 int nat_insert(nat
, rev
)
2674 * Try and return an error as early as possible, so calculate the hash
2675 * entry numbers first and then proceed.
2677 if ((nat
->nat_flags
& (SI_W_SPORT
|SI_W_DPORT
)) == 0) {
2678 hv1
= NAT_HASH_FN(nat
->nat_inip
.s_addr
, nat
->nat_inport
,
2680 hv1
= NAT_HASH_FN(nat
->nat_oip
.s_addr
, hv1
+ nat
->nat_oport
,
2682 hv2
= NAT_HASH_FN(nat
->nat_outip
.s_addr
, nat
->nat_outport
,
2684 hv2
= NAT_HASH_FN(nat
->nat_oip
.s_addr
, hv2
+ nat
->nat_oport
,
2687 hv1
= NAT_HASH_FN(nat
->nat_inip
.s_addr
, 0, 0xffffffff);
2688 hv1
= NAT_HASH_FN(nat
->nat_oip
.s_addr
, hv1
, ipf_nattable_sz
);
2689 hv2
= NAT_HASH_FN(nat
->nat_outip
.s_addr
, 0, 0xffffffff);
2690 hv2
= NAT_HASH_FN(nat
->nat_oip
.s_addr
, hv2
, ipf_nattable_sz
);
2693 if (nat_stats
.ns_bucketlen
[0][hv1
] >= fr_nat_maxbucket
||
2694 nat_stats
.ns_bucketlen
[1][hv2
] >= fr_nat_maxbucket
) {
2698 nat
->nat_hv
[0] = hv1
;
2699 nat
->nat_hv
[1] = hv2
;
2701 MUTEX_INIT(&nat
->nat_lock
, "nat entry lock");
2706 nat
->nat_ifnames
[0][LIFNAMSIZ
- 1] = '\0';
2707 nat
->nat_ifps
[0] = fr_resolvenic(nat
->nat_ifnames
[0], 4);
2709 if (nat
->nat_ifnames
[1][0] != '\0') {
2710 nat
->nat_ifnames
[1][LIFNAMSIZ
- 1] = '\0';
2711 nat
->nat_ifps
[1] = fr_resolvenic(nat
->nat_ifnames
[1], 4);
2713 (void) strncpy(nat
->nat_ifnames
[1], nat
->nat_ifnames
[0],
2715 nat
->nat_ifnames
[1][LIFNAMSIZ
- 1] = '\0';
2716 nat
->nat_ifps
[1] = nat
->nat_ifps
[0];
2719 nat
->nat_next
= nat_instances
;
2720 nat
->nat_pnext
= &nat_instances
;
2722 nat_instances
->nat_pnext
= &nat
->nat_next
;
2723 nat_instances
= nat
;
2726 * Bump this before the hash table inserts.
2728 nat_stats
.ns_added
++;
2730 natp
= &nat_table
[0][hv1
];
2731 nat
->nat_phnext
[0] = natp
;
2732 nat
->nat_hnext
[0] = *natp
;
2734 (*natp
)->nat_phnext
[0] = &nat
->nat_hnext
[0];
2736 nat_stats
.ns_bucketlen
[0][hv1
]++;
2738 natp
= &nat_table
[1][hv2
];
2739 nat
->nat_phnext
[1] = natp
;
2740 nat
->nat_hnext
[1] = *natp
;
2742 (*natp
)->nat_phnext
[1] = &nat
->nat_hnext
[1];
2744 nat_stats
.ns_bucketlen
[1][hv2
]++;
2746 fr_setnatqueue(nat
, rev
);
2748 nat_stats
.ns_inuse
++;
2753 /* ------------------------------------------------------------------------ */
2754 /* Function: nat_icmperrorlookup */
2755 /* Returns: nat_t* - point to matching NAT structure */
2756 /* Parameters: fin(I) - pointer to packet information */
2757 /* dir(I) - direction of packet (in/out) */
2759 /* Check if the ICMP error message is related to an existing TCP, UDP or */
2760 /* ICMP query nat entry. It is assumed that the packet is already of the */
2761 /* the required length. */
2762 /* ------------------------------------------------------------------------ */
2763 nat_t
*nat_icmperrorlookup(fin
, dir
)
2767 int flags
= 0, type
, minlen
;
2768 icmphdr_t
*icmp
, *orgicmp
;
2769 tcphdr_t
*tcp
= NULL
;
2776 type
= icmp
->icmp_type
;
2778 * Does it at least have the return (basic) IP header ?
2779 * Only a basic IP header (no options) should be with an ICMP error
2780 * header. Also, if it's not an error type, then return.
2782 if ((fin
->fin_hlen
!= sizeof(ip_t
)) || !(fin
->fin_flx
& FI_ICMPERR
))
2788 oip
= (ip_t
*)((char *)fin
->fin_dp
+ 8);
2789 minlen
= IP_HL(oip
) << 2;
2790 if ((minlen
< sizeof(ip_t
)) ||
2791 (fin
->fin_plen
< ICMPERR_IPICMPHLEN
+ minlen
))
2794 * Is the buffer big enough for all of it ? It's the size of the IP
2795 * header claimed in the encapsulated part which is of concern. It
2796 * may be too big to be in this buffer but not so big that it's
2797 * outside the ICMP packet, leading to TCP deref's causing problems.
2798 * This is possible because we don't know how big oip_hl is when we
2799 * do the pullup early in fr_check() and thus can't gaurantee it is
2807 # if defined(MENTAT)
2808 if ((char *)oip
+ fin
->fin_dlen
- ICMPERR_ICMPHLEN
> (char *)m
->b_wptr
)
2811 if ((char *)oip
+ fin
->fin_dlen
- ICMPERR_ICMPHLEN
>
2812 (char *)fin
->fin_ip
+ M_LEN(m
))
2818 if (fin
->fin_daddr
!= oip
->ip_src
.s_addr
)
2822 if (p
== IPPROTO_TCP
)
2824 else if (p
== IPPROTO_UDP
)
2826 else if (p
== IPPROTO_ICMP
) {
2827 orgicmp
= (icmphdr_t
*)((char *)oip
+ (IP_HL(oip
) << 2));
2829 /* see if this is related to an ICMP query */
2830 if (nat_icmpquerytype4(orgicmp
->icmp_type
)) {
2831 data
[0] = fin
->fin_data
[0];
2832 data
[1] = fin
->fin_data
[1];
2833 fin
->fin_data
[0] = 0;
2834 fin
->fin_data
[1] = orgicmp
->icmp_id
;
2836 flags
= IPN_ICMPERR
|IPN_ICMPQUERY
;
2838 * NOTE : dir refers to the direction of the original
2839 * ip packet. By definition the icmp error
2840 * message flows in the opposite direction.
2842 if (dir
== NAT_INBOUND
)
2843 nat
= nat_inlookup(fin
, flags
, p
, oip
->ip_dst
,
2846 nat
= nat_outlookup(fin
, flags
, p
, oip
->ip_dst
,
2848 fin
->fin_data
[0] = data
[0];
2849 fin
->fin_data
[1] = data
[1];
2854 if (flags
& IPN_TCPUDP
) {
2855 minlen
+= 8; /* + 64bits of data to get ports */
2856 if (fin
->fin_plen
< ICMPERR_IPICMPHLEN
+ minlen
)
2859 data
[0] = fin
->fin_data
[0];
2860 data
[1] = fin
->fin_data
[1];
2861 tcp
= (tcphdr_t
*)((char *)oip
+ (IP_HL(oip
) << 2));
2862 fin
->fin_data
[0] = ntohs(tcp
->th_dport
);
2863 fin
->fin_data
[1] = ntohs(tcp
->th_sport
);
2865 if (dir
== NAT_INBOUND
) {
2866 nat
= nat_inlookup(fin
, flags
, p
, oip
->ip_dst
,
2869 nat
= nat_outlookup(fin
, flags
, p
, oip
->ip_dst
,
2872 fin
->fin_data
[0] = data
[0];
2873 fin
->fin_data
[1] = data
[1];
2876 if (dir
== NAT_INBOUND
)
2877 return nat_inlookup(fin
, 0, p
, oip
->ip_dst
, oip
->ip_src
);
2879 return nat_outlookup(fin
, 0, p
, oip
->ip_dst
, oip
->ip_src
);
2883 /* ------------------------------------------------------------------------ */
2884 /* Function: nat_icmperror */
2885 /* Returns: nat_t* - point to matching NAT structure */
2886 /* Parameters: fin(I) - pointer to packet information */
2887 /* nflags(I) - NAT flags for this packet */
2888 /* dir(I) - direction of packet (in/out) */
2890 /* Fix up an ICMP packet which is an error message for an existing NAT */
2891 /* session. This will correct both packet header data and checksums. */
2893 /* This should *ONLY* be used for incoming ICMP error packets to make sure */
2894 /* a NAT'd ICMP packet gets correctly recognised. */
2895 /* ------------------------------------------------------------------------ */
2896 nat_t
*nat_icmperror(fin
, nflags
, dir
)
2901 u_32_t sum1
, sum2
, sumd
, sumd2
;
2902 struct in_addr a1
, a2
;
2903 int flags
, dlen
, odst
;
2911 if ((fin
->fin_flx
& (FI_SHORT
|FI_FRAGBODY
)))
2914 * nat_icmperrorlookup() will return NULL for `defective' packets.
2916 if ((fin
->fin_v
!= 4) || !(nat
= nat_icmperrorlookup(fin
, dir
)))
2923 *nflags
= IPN_ICMPERR
;
2925 oip
= (ip_t
*)&icmp
->icmp_ip
;
2926 dp
= (((char *)oip
) + (IP_HL(oip
) << 2));
2927 if (oip
->ip_p
== IPPROTO_TCP
) {
2928 tcp
= (tcphdr_t
*)dp
;
2929 csump
= (u_short
*)&tcp
->th_sum
;
2931 } else if (oip
->ip_p
== IPPROTO_UDP
) {
2934 udp
= (udphdr_t
*)dp
;
2935 tcp
= (tcphdr_t
*)dp
;
2936 csump
= (u_short
*)&udp
->uh_sum
;
2938 } else if (oip
->ip_p
== IPPROTO_ICMP
)
2939 flags
= IPN_ICMPQUERY
;
2940 dlen
= fin
->fin_plen
- ((char *)dp
- (char *)fin
->fin_ip
);
2943 * Need to adjust ICMP header to include the real IP#'s and
2944 * port #'s. Only apply a checksum change relative to the
2945 * IP address change as it will be modified again in fr_checknatout
2946 * for both address and port. Two checksum changes are
2947 * necessary for the two header address changes. Be careful
2948 * to only modify the checksum once for the port # and twice
2954 * Fix the IP addresses in the offending IP packet. You also need
2955 * to adjust the IP header checksum of that offending IP packet.
2957 * Normally, you would expect that the ICMP checksum of the
2958 * ICMP error message needs to be adjusted as well for the
2959 * IP address change in oip.
2960 * However, this is a NOP, because the ICMP checksum is
2961 * calculated over the complete ICMP packet, which includes the
2962 * changed oip IP addresses and oip->ip_sum. However, these
2963 * two changes cancel each other out (if the delta for
2964 * the IP address is x, then the delta for ip_sum is minus x),
2965 * so no change in the icmp_cksum is necessary.
2969 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2970 * - response to outgoing packet (a,b)=>(c,b) (OIP_SRC=c,OIP_DST=b)
2971 * - OIP_SRC(c)=nat_outip, OIP_DST(b)=nat_oip
2973 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2974 * - response to outgoing packet (c,a)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2975 * - OIP_SRC(b)=nat_outip, OIP_DST(a)=nat_oip
2979 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2980 * - response to incoming packet (b,c)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2981 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2983 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2984 * - response to incoming packet (a,b)=>(a,c) (OIP_SRC=a,OIP_DST=c)
2985 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2988 odst
= (oip
->ip_dst
.s_addr
== nat
->nat_oip
.s_addr
) ? 1 : 0;
2990 a1
.s_addr
= ntohl(nat
->nat_inip
.s_addr
);
2991 a2
.s_addr
= ntohl(oip
->ip_src
.s_addr
);
2992 oip
->ip_src
.s_addr
= htonl(a1
.s_addr
);
2994 a1
.s_addr
= ntohl(nat
->nat_outip
.s_addr
);
2995 a2
.s_addr
= ntohl(oip
->ip_dst
.s_addr
);
2996 oip
->ip_dst
.s_addr
= htonl(a1
.s_addr
);
2999 sumd
= a2
.s_addr
- a1
.s_addr
;
3001 if (a1
.s_addr
> a2
.s_addr
)
3005 fix_datacksum(&oip
->ip_sum
, sumd
);
3013 * Fix UDP pseudo header checksum to compensate for the
3014 * IP address change.
3016 if (((flags
& IPN_TCPUDP
) != 0) && (dlen
>= 4)) {
3019 * For offending TCP/UDP IP packets, translate the ports as
3020 * well, based on the NAT specification. Of course such
3021 * a change may be reflected in the ICMP checksum as well.
3023 * Since the port fields are part of the TCP/UDP checksum
3024 * of the offending IP packet, you need to adjust that checksum
3025 * as well... except that the change in the port numbers should
3026 * be offset by the checksum change. However, the TCP/UDP
3027 * checksum will also need to change if there has been an
3028 * IP address change.
3031 sum1
= ntohs(nat
->nat_inport
);
3032 sum2
= ntohs(tcp
->th_sport
);
3034 tcp
->th_sport
= htons(sum1
);
3036 sum1
= ntohs(nat
->nat_outport
);
3037 sum2
= ntohs(tcp
->th_dport
);
3039 tcp
->th_dport
= htons(sum1
);
3042 sumd
+= sum1
- sum2
;
3043 if (sumd
!= 0 || sumd2
!= 0) {
3045 * At this point, sumd is the delta to apply to the
3046 * TCP/UDP header, given the changes in both the IP
3047 * address and the ports and sumd2 is the delta to
3048 * apply to the ICMP header, given the IP address
3049 * change delta that may need to be applied to the
3050 * TCP/UDP checksum instead.
3052 * If we will both the IP and TCP/UDP checksums
3053 * then the ICMP checksum changes by the address
3054 * delta applied to the TCP/UDP checksum. If we
3055 * do not change the TCP/UDP checksum them we
3056 * apply the delta in ports to the ICMP checksum.
3058 if (oip
->ip_p
== IPPROTO_UDP
) {
3059 if ((dlen
>= 8) && (*csump
!= 0)) {
3060 fix_datacksum(csump
, sumd
);
3062 sumd2
= sum1
- sum2
;
3066 } else if (oip
->ip_p
== IPPROTO_TCP
) {
3068 fix_datacksum(csump
, sumd
);
3070 sumd2
= sum2
- sum1
;
3080 sumd2
= (sumd2
& 0xffff) + (sumd2
>> 16);
3081 sumd2
= (sumd2
& 0xffff) + (sumd2
>> 16);
3082 sumd2
= (sumd2
& 0xffff) + (sumd2
>> 16);
3084 if ((odst
== 0) && (dir
== NAT_OUTBOUND
) &&
3085 (fin
->fin_rev
== 0) && (np
!= NULL
) &&
3086 (np
->in_redir
& NAT_REDIRECT
)) {
3087 fix_outcksum(fin
, &icmp
->icmp_cksum
,
3090 fix_incksum(fin
, &icmp
->icmp_cksum
,
3095 } else if (((flags
& IPN_ICMPQUERY
) != 0) && (dlen
>= 8)) {
3099 * XXX - what if this is bogus hl and we go off the end ?
3100 * In this case, nat_icmperrorlookup() will have returned NULL.
3102 orgicmp
= (icmphdr_t
*)dp
;
3105 if (orgicmp
->icmp_id
!= nat
->nat_inport
) {
3108 * Fix ICMP checksum (of the offening ICMP
3109 * query packet) to compensate the change
3110 * in the ICMP id of the offending ICMP
3113 * Since you modify orgicmp->icmp_id with
3114 * a delta (say x) and you compensate that
3115 * in origicmp->icmp_cksum with a delta
3116 * minus x, you don't have to adjust the
3117 * overall icmp->icmp_cksum
3119 sum1
= ntohs(orgicmp
->icmp_id
);
3120 sum2
= ntohs(nat
->nat_inport
);
3121 CALC_SUMD(sum1
, sum2
, sumd
);
3122 orgicmp
->icmp_id
= nat
->nat_inport
;
3123 fix_datacksum(&orgicmp
->icmp_cksum
, sumd
);
3125 } /* nat_dir == NAT_INBOUND is impossible for icmp queries */
3132 * NB: these lookups don't lock access to the list, it assumed that it has
3133 * already been done!
3136 /* ------------------------------------------------------------------------ */
3137 /* Function: nat_inlookup */
3138 /* Returns: nat_t* - NULL == no match, */
3139 /* else pointer to matching NAT entry */
3140 /* Parameters: fin(I) - pointer to packet information */
3141 /* flags(I) - NAT flags for this packet */
3142 /* p(I) - protocol for this packet */
3143 /* src(I) - source IP address */
3144 /* mapdst(I) - destination IP address */
3146 /* Lookup a nat entry based on the mapped destination ip address/port and */
3147 /* real source address/port. We use this lookup when receiving a packet, */
3148 /* we're looking for a table entry, based on the destination address. */
3150 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */
3152 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */
3153 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */
3155 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */
3156 /* the packet is of said protocol */
3157 /* ------------------------------------------------------------------------ */
3158 nat_t
*nat_inlookup(fin
, flags
, p
, src
, mapdst
)
3161 struct in_addr src
, mapdst
;
3163 u_short sport
, dport
;
3177 dst
= mapdst
.s_addr
;
3178 sflags
= flags
& NAT_TCPUDPICMP
;
3184 sport
= htons(fin
->fin_data
[0]);
3185 dport
= htons(fin
->fin_data
[1]);
3188 if (flags
& IPN_ICMPERR
)
3189 sport
= fin
->fin_data
[1];
3191 dport
= fin
->fin_data
[1];
3198 if ((flags
& SI_WILDP
) != 0)
3199 goto find_in_wild_ports
;
3201 hv
= NAT_HASH_FN(dst
, dport
, 0xffffffff);
3202 hv
= NAT_HASH_FN(src
.s_addr
, hv
+ sport
, ipf_nattable_sz
);
3203 nat
= nat_table
[1][hv
];
3204 for (; nat
; nat
= nat
->nat_hnext
[1]) {
3205 if (nat
->nat_ifps
[0] != NULL
) {
3206 if ((ifp
!= NULL
) && (ifp
!= nat
->nat_ifps
[0]))
3210 nflags
= nat
->nat_flags
;
3212 if (nat
->nat_oip
.s_addr
== src
.s_addr
&&
3213 nat
->nat_outip
.s_addr
== dst
&&
3215 (sflags
== (nat
->nat_flags
& IPN_TCPUDPICMP
)))
3216 || (p
== nat
->nat_p
))) {
3221 if (nat
->nat_call
[1] != fin
->fin_data
[0])
3226 if ((flags
& IPN_ICMPERR
) != 0) {
3227 if (nat
->nat_outport
!= sport
)
3230 if (nat
->nat_outport
!= dport
)
3236 if (nat
->nat_oport
!= sport
)
3238 if (nat
->nat_outport
!= dport
)
3246 if ((ipn
!= NULL
) && (nat
->nat_aps
!= NULL
))
3247 if (appr_match(fin
, nat
) != 0)
3249 if ((nat
->nat_ifps
[0] == NULL
) && (ifp
!= NULL
))
3250 nat
->nat_ifps
[0] = ifp
;
3256 * So if we didn't find it but there are wildcard members in the hash
3257 * table, go back and look for them. We do this search and update here
3258 * because it is modifying the NAT table and we want to do this only
3259 * for the first packet that matches. The exception, of course, is
3260 * for "dummy" (FI_IGNORE) lookups.
3263 if (!(flags
& NAT_TCPUDP
) || !(flags
& NAT_SEARCH
))
3265 if (nat_stats
.ns_wilds
== 0)
3268 RWLOCK_EXIT(&ipf_nat
);
3270 hv
= NAT_HASH_FN(dst
, 0, 0xffffffff);
3271 hv
= NAT_HASH_FN(src
.s_addr
, hv
, ipf_nattable_sz
);
3273 WRITE_ENTER(&ipf_nat
);
3275 nat
= nat_table
[1][hv
];
3276 for (; nat
; nat
= nat
->nat_hnext
[1]) {
3277 if (nat
->nat_ifps
[0] != NULL
) {
3278 if ((ifp
!= NULL
) && (ifp
!= nat
->nat_ifps
[0]))
3282 if (nat
->nat_p
!= fin
->fin_p
)
3284 if (nat
->nat_oip
.s_addr
!= src
.s_addr
||
3285 nat
->nat_outip
.s_addr
!= dst
)
3288 nflags
= nat
->nat_flags
;
3289 if (!(nflags
& (NAT_TCPUDP
|SI_WILDP
)))
3292 if (nat_wildok(nat
, (int)sport
, (int)dport
, nflags
,
3293 NAT_INBOUND
) == 1) {
3294 if ((fin
->fin_flx
& FI_IGNORE
) != 0)
3296 if ((nflags
& SI_CLONE
) != 0) {
3297 nat
= fr_natclone(fin
, nat
);
3301 MUTEX_ENTER(&ipf_nat_new
);
3302 nat_stats
.ns_wilds
--;
3303 MUTEX_EXIT(&ipf_nat_new
);
3305 if ((nat
->nat_ifps
[0] == NULL
) && (ifp
!= NULL
))
3306 nat
->nat_ifps
[0] = ifp
;
3307 nat
->nat_oport
= sport
;
3308 nat
->nat_outport
= dport
;
3309 nat
->nat_flags
&= ~(SI_W_DPORT
|SI_W_SPORT
);
3315 MUTEX_DOWNGRADE(&ipf_nat
);
3321 /* ------------------------------------------------------------------------ */
3322 /* Function: nat_tabmove */
3324 /* Parameters: nat(I) - pointer to NAT structure */
3325 /* Write Lock: ipf_nat */
3327 /* This function is only called for TCP/UDP NAT table entries where the */
3328 /* original was placed in the table without hashing on the ports and we now */
3329 /* want to include hashing on port numbers. */
3330 /* ------------------------------------------------------------------------ */
3331 static void nat_tabmove(nat
)
3337 if (nat
->nat_flags
& SI_CLONE
)
3341 * Remove the NAT entry from the old location
3343 if (nat
->nat_hnext
[0])
3344 nat
->nat_hnext
[0]->nat_phnext
[0] = nat
->nat_phnext
[0];
3345 *nat
->nat_phnext
[0] = nat
->nat_hnext
[0];
3346 nat_stats
.ns_bucketlen
[0][nat
->nat_hv
[0]]--;
3348 if (nat
->nat_hnext
[1])
3349 nat
->nat_hnext
[1]->nat_phnext
[1] = nat
->nat_phnext
[1];
3350 *nat
->nat_phnext
[1] = nat
->nat_hnext
[1];
3351 nat_stats
.ns_bucketlen
[1][nat
->nat_hv
[1]]--;
3354 * Add into the NAT table in the new position
3356 hv
= NAT_HASH_FN(nat
->nat_inip
.s_addr
, nat
->nat_inport
, 0xffffffff);
3357 hv
= NAT_HASH_FN(nat
->nat_oip
.s_addr
, hv
+ nat
->nat_oport
,
3359 nat
->nat_hv
[0] = hv
;
3360 natp
= &nat_table
[0][hv
];
3362 (*natp
)->nat_phnext
[0] = &nat
->nat_hnext
[0];
3363 nat
->nat_phnext
[0] = natp
;
3364 nat
->nat_hnext
[0] = *natp
;
3366 nat_stats
.ns_bucketlen
[0][hv
]++;
3368 hv
= NAT_HASH_FN(nat
->nat_outip
.s_addr
, nat
->nat_outport
, 0xffffffff);
3369 hv
= NAT_HASH_FN(nat
->nat_oip
.s_addr
, hv
+ nat
->nat_oport
,
3371 nat
->nat_hv
[1] = hv
;
3372 natp
= &nat_table
[1][hv
];
3374 (*natp
)->nat_phnext
[1] = &nat
->nat_hnext
[1];
3375 nat
->nat_phnext
[1] = natp
;
3376 nat
->nat_hnext
[1] = *natp
;
3378 nat_stats
.ns_bucketlen
[1][hv
]++;
3382 /* ------------------------------------------------------------------------ */
3383 /* Function: nat_outlookup */
3384 /* Returns: nat_t* - NULL == no match, */
3385 /* else pointer to matching NAT entry */
3386 /* Parameters: fin(I) - pointer to packet information */
3387 /* flags(I) - NAT flags for this packet */
3388 /* p(I) - protocol for this packet */
3389 /* src(I) - source IP address */
3390 /* dst(I) - destination IP address */
3391 /* rw(I) - 1 == write lock on ipf_nat held, 0 == read lock. */
3393 /* Lookup a nat entry based on the source 'real' ip address/port and */
3394 /* destination address/port. We use this lookup when sending a packet out, */
3395 /* we're looking for a table entry, based on the source address. */
3397 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */
3399 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */
3400 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */
3402 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */
3403 /* the packet is of said protocol */
3404 /* ------------------------------------------------------------------------ */
3405 nat_t
*nat_outlookup(fin
, flags
, p
, src
, dst
)
3408 struct in_addr src
, dst
;
3410 u_short sport
, dport
;
3421 sflags
= flags
& IPN_TCPUDPICMP
;
3429 sport
= htons(fin
->fin_data
[0]);
3430 dport
= htons(fin
->fin_data
[1]);
3433 if (flags
& IPN_ICMPERR
)
3434 sport
= fin
->fin_data
[1];
3436 dport
= fin
->fin_data
[1];
3442 if ((flags
& SI_WILDP
) != 0)
3443 goto find_out_wild_ports
;
3445 hv
= NAT_HASH_FN(srcip
, sport
, 0xffffffff);
3446 hv
= NAT_HASH_FN(dst
.s_addr
, hv
+ dport
, ipf_nattable_sz
);
3447 nat
= nat_table
[0][hv
];
3448 for (; nat
; nat
= nat
->nat_hnext
[0]) {
3449 if (nat
->nat_ifps
[1] != NULL
) {
3450 if ((ifp
!= NULL
) && (ifp
!= nat
->nat_ifps
[1]))
3454 nflags
= nat
->nat_flags
;
3456 if (nat
->nat_inip
.s_addr
== srcip
&&
3457 nat
->nat_oip
.s_addr
== dst
.s_addr
&&
3458 (((p
== 0) && (sflags
== (nflags
& NAT_TCPUDPICMP
)))
3459 || (p
== nat
->nat_p
))) {
3464 if (nat
->nat_call
[1] != fin
->fin_data
[0])
3470 if (nat
->nat_oport
!= dport
)
3472 if (nat
->nat_inport
!= sport
)
3480 if ((ipn
!= NULL
) && (nat
->nat_aps
!= NULL
))
3481 if (appr_match(fin
, nat
) != 0)
3483 if ((nat
->nat_ifps
[1] == NULL
) && (ifp
!= NULL
))
3484 nat
->nat_ifps
[1] = ifp
;
3490 * So if we didn't find it but there are wildcard members in the hash
3491 * table, go back and look for them. We do this search and update here
3492 * because it is modifying the NAT table and we want to do this only
3493 * for the first packet that matches. The exception, of course, is
3494 * for "dummy" (FI_IGNORE) lookups.
3496 find_out_wild_ports
:
3497 if (!(flags
& NAT_TCPUDP
) || !(flags
& NAT_SEARCH
))
3499 if (nat_stats
.ns_wilds
== 0)
3502 RWLOCK_EXIT(&ipf_nat
);
3504 hv
= NAT_HASH_FN(srcip
, 0, 0xffffffff);
3505 hv
= NAT_HASH_FN(dst
.s_addr
, hv
, ipf_nattable_sz
);
3507 WRITE_ENTER(&ipf_nat
);
3509 nat
= nat_table
[0][hv
];
3510 for (; nat
; nat
= nat
->nat_hnext
[0]) {
3511 if (nat
->nat_ifps
[1] != NULL
) {
3512 if ((ifp
!= NULL
) && (ifp
!= nat
->nat_ifps
[1]))
3516 if (nat
->nat_p
!= fin
->fin_p
)
3518 if ((nat
->nat_inip
.s_addr
!= srcip
) ||
3519 (nat
->nat_oip
.s_addr
!= dst
.s_addr
))
3522 nflags
= nat
->nat_flags
;
3523 if (!(nflags
& (NAT_TCPUDP
|SI_WILDP
)))
3526 if (nat_wildok(nat
, (int)sport
, (int)dport
, nflags
,
3527 NAT_OUTBOUND
) == 1) {
3528 if ((fin
->fin_flx
& FI_IGNORE
) != 0)
3530 if ((nflags
& SI_CLONE
) != 0) {
3531 nat
= fr_natclone(fin
, nat
);
3535 MUTEX_ENTER(&ipf_nat_new
);
3536 nat_stats
.ns_wilds
--;
3537 MUTEX_EXIT(&ipf_nat_new
);
3539 if ((nat
->nat_ifps
[1] == NULL
) && (ifp
!= NULL
))
3540 nat
->nat_ifps
[1] = ifp
;
3541 nat
->nat_inport
= sport
;
3542 nat
->nat_oport
= dport
;
3543 if (nat
->nat_outport
== 0)
3544 nat
->nat_outport
= sport
;
3545 nat
->nat_flags
&= ~(SI_W_DPORT
|SI_W_SPORT
);
3551 MUTEX_DOWNGRADE(&ipf_nat
);
3557 /* ------------------------------------------------------------------------ */
3558 /* Function: nat_lookupredir */
3559 /* Returns: nat_t* - NULL == no match, */
3560 /* else pointer to matching NAT entry */
3561 /* Parameters: np(I) - pointer to description of packet to find NAT table */
3564 /* Lookup the NAT tables to search for a matching redirect */
3565 /* The contents of natlookup_t should imitate those found in a packet that */
3566 /* would be translated - ie a packet coming in for RDR or going out for MAP.*/
3567 /* We can do the lookup in one of two ways, imitating an inbound or */
3568 /* outbound packet. By default we assume outbound, unless IPN_IN is set. */
3569 /* For IN, the fields are set as follows: */
3570 /* nl_real* = source information */
3571 /* nl_out* = destination information (translated) */
3572 /* For an out packet, the fields are set like this: */
3573 /* nl_in* = source information (untranslated) */
3574 /* nl_out* = destination information (translated) */
3575 /* ------------------------------------------------------------------------ */
3576 nat_t
*nat_lookupredir(np
)
3582 bzero((char *)&fi
, sizeof(fi
));
3583 if (np
->nl_flags
& IPN_IN
) {
3584 fi
.fin_data
[0] = ntohs(np
->nl_realport
);
3585 fi
.fin_data
[1] = ntohs(np
->nl_outport
);
3587 fi
.fin_data
[0] = ntohs(np
->nl_inport
);
3588 fi
.fin_data
[1] = ntohs(np
->nl_outport
);
3590 if (np
->nl_flags
& IPN_TCP
)
3591 fi
.fin_p
= IPPROTO_TCP
;
3592 else if (np
->nl_flags
& IPN_UDP
)
3593 fi
.fin_p
= IPPROTO_UDP
;
3594 else if (np
->nl_flags
& (IPN_ICMPERR
|IPN_ICMPQUERY
))
3595 fi
.fin_p
= IPPROTO_ICMP
;
3598 * We can do two sorts of lookups:
3599 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3600 * - default: we have the `in' and `out' address, look for `real'.
3602 if (np
->nl_flags
& IPN_IN
) {
3603 if ((nat
= nat_inlookup(&fi
, np
->nl_flags
, fi
.fin_p
,
3604 np
->nl_realip
, np
->nl_outip
))) {
3605 np
->nl_inip
= nat
->nat_inip
;
3606 np
->nl_inport
= nat
->nat_inport
;
3610 * If nl_inip is non null, this is a lookup based on the real
3611 * ip address. Else, we use the fake.
3613 if ((nat
= nat_outlookup(&fi
, np
->nl_flags
, fi
.fin_p
,
3614 np
->nl_inip
, np
->nl_outip
))) {
3616 if ((np
->nl_flags
& IPN_FINDFORWARD
) != 0) {
3618 bzero((char *)&fin
, sizeof(fin
));
3619 fin
.fin_p
= nat
->nat_p
;
3620 fin
.fin_data
[0] = ntohs(nat
->nat_outport
);
3621 fin
.fin_data
[1] = ntohs(nat
->nat_oport
);
3622 if (nat_inlookup(&fin
, np
->nl_flags
, fin
.fin_p
,
3624 nat
->nat_oip
) != NULL
) {
3625 np
->nl_flags
&= ~IPN_FINDFORWARD
;
3629 np
->nl_realip
= nat
->nat_outip
;
3630 np
->nl_realport
= nat
->nat_outport
;
3638 /* ------------------------------------------------------------------------ */
3639 /* Function: nat_match */
3640 /* Returns: int - 0 == no match, 1 == match */
3641 /* Parameters: fin(I) - pointer to packet information */
3642 /* np(I) - pointer to NAT rule */
3644 /* Pull the matching of a packet against a NAT rule out of that complex */
3645 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3646 /* ------------------------------------------------------------------------ */
3647 static int nat_match(fin
, np
)
3653 if (fin
->fin_v
!= 4)
3656 if (np
->in_p
&& fin
->fin_p
!= np
->in_p
)
3660 if (!(np
->in_redir
& (NAT_MAP
|NAT_MAPBLK
)))
3662 if (((fin
->fin_fi
.fi_saddr
& np
->in_inmsk
) != np
->in_inip
)
3663 ^ ((np
->in_flags
& IPN_NOTSRC
) != 0))
3665 if (((fin
->fin_fi
.fi_daddr
& np
->in_srcmsk
) != np
->in_srcip
)
3666 ^ ((np
->in_flags
& IPN_NOTDST
) != 0))
3669 if (!(np
->in_redir
& NAT_REDIRECT
))
3671 if (((fin
->fin_fi
.fi_saddr
& np
->in_srcmsk
) != np
->in_srcip
)
3672 ^ ((np
->in_flags
& IPN_NOTSRC
) != 0))
3674 if (((fin
->fin_fi
.fi_daddr
& np
->in_outmsk
) != np
->in_outip
)
3675 ^ ((np
->in_flags
& IPN_NOTDST
) != 0))
3680 if (!(fin
->fin_flx
& FI_TCPUDP
) ||
3681 (fin
->fin_flx
& (FI_SHORT
|FI_FRAGBODY
))) {
3682 if (ft
->ftu_scmp
|| ft
->ftu_dcmp
)
3687 return fr_tcpudpchk(fin
, ft
);
3691 /* ------------------------------------------------------------------------ */
3692 /* Function: nat_update */
3694 /* Parameters: nat(I) - pointer to NAT structure */
3695 /* np(I) - pointer to NAT rule */
3696 /* Locks: nat_lock */
3698 /* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */
3699 /* called with fin_rev updated - i.e. after calling nat_proto(). */
3701 /* This must be called *after* nat_proto() because we need fin_rev set. */
3702 /* ------------------------------------------------------------------------ */
3703 void nat_update(fin
, nat
)
3707 ipftq_t
*ifq
, *ifq2
;
3709 ipnat_t
*np
= nat
->nat_ptr
;
3711 tqe
= &nat
->nat_tqe
;
3715 * We allow over-riding of NAT timeouts from NAT rules, even for
3716 * TCP, however, if it is TCP and there is no rule timeout set,
3717 * then do not update the timeout here.
3720 ifq2
= np
->in_tqehead
[fin
->fin_rev
];
3724 if (nat
->nat_p
== IPPROTO_TCP
&& ifq2
== NULL
) {
3731 tcpflags
= tcp
->th_flags
;
3732 dsize
= fin
->fin_dlen
- (TCP_OFF(tcp
) << 2) +
3733 ((tcpflags
& TH_SYN
) ? 1 : 0) +
3734 ((tcpflags
& TH_FIN
) ? 1 : 0);
3736 ack
= ntohl(tcp
->th_ack
);
3737 end
= ntohl(tcp
->th_seq
) + dsize
;
3739 if (SEQ_GT(ack
, nat
->nat_seqnext
[1 - fin
->fin_rev
]))
3740 nat
->nat_seqnext
[1 - fin
->fin_rev
] = ack
;
3742 if (nat
->nat_seqnext
[fin
->fin_rev
] == 0)
3743 nat
->nat_seqnext
[fin
->fin_rev
] = end
;
3745 (void) fr_tcp_age(&nat
->nat_tqe
, fin
, nat_tqb
, 0);
3748 if (nat
->nat_p
== IPPROTO_UDP
)
3750 else if (nat
->nat_p
== IPPROTO_ICMP
)
3756 fr_movequeue(tqe
, ifq
, ifq2
);
3761 /* ------------------------------------------------------------------------ */
3762 /* Function: fr_checknatout */
3763 /* Returns: int - -1 == packet failed NAT checks so block it, */
3764 /* 0 == no packet translation occurred, */
3765 /* 1 == packet was successfully translated. */
3766 /* Parameters: fin(I) - pointer to packet information */
3767 /* passp(I) - pointer to filtering result flags */
3769 /* Check to see if an outcoming packet should be changed. ICMP packets are */
3770 /* first checked to see if they match an existing entry (if an error), */
3771 /* otherwise a search of the current NAT table is made. If neither results */
3772 /* in a match then a search for a matching NAT rule is made. Create a new */
3773 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */
3774 /* packet header(s) as required. */
3775 /* ------------------------------------------------------------------------ */
3776 int fr_checknatout(fin
, passp
)
3780 ipnat_t
*np
= NULL
, *npnext
;
3781 struct ifnet
*ifp
, *sifp
;
3782 icmphdr_t
*icmp
= NULL
;
3783 tcphdr_t
*tcp
= NULL
;
3784 int rval
, natfailed
;
3791 if (fr_nat_lock
!= 0)
3793 if (nat_stats
.ns_rules
== 0 && nat_instances
== NULL
)
3798 sifp
= fin
->fin_ifp
;
3800 ifp
= fr
->fr_tifs
[fin
->fin_rev
].fd_ifp
;
3801 if ((ifp
!= NULL
) && (ifp
!= (void *)-1))
3806 if (!(fin
->fin_flx
& FI_SHORT
) && (fin
->fin_off
== 0)) {
3819 * This is an incoming packet, so the destination is
3820 * the icmp_id and the source port equals 0
3822 if (nat_icmpquerytype4(icmp
->icmp_type
))
3823 nflags
= IPN_ICMPQUERY
;
3829 if ((nflags
& IPN_TCPUDP
))
3833 ipa
= fin
->fin_saddr
;
3835 READ_ENTER(&ipf_nat
);
3837 if (((fin
->fin_flx
& FI_ICMPERR
) != 0) &&
3838 (nat
= nat_icmperror(fin
, &nflags
, NAT_OUTBOUND
)))
3840 else if ((fin
->fin_flx
& FI_FRAG
) && (nat
= fr_nat_knownfrag(fin
)))
3842 else if ((nat
= nat_outlookup(fin
, nflags
|NAT_SEARCH
, (u_int
)fin
->fin_p
,
3843 fin
->fin_src
, fin
->fin_dst
))) {
3844 nflags
= nat
->nat_flags
;
3845 } else if (fin
->fin_off
== 0) {
3846 u_32_t hv
, msk
, nmsk
;
3851 * If there is no current entry in the nat table for this IP#,
3852 * create one for it (if there is a matching rule).
3855 iph
= ipa
& htonl(msk
);
3856 hv
= NAT_HASH_FN(iph
, 0, ipf_natrules_sz
);
3857 for (np
= nat_rules
[hv
]; np
; np
= npnext
) {
3858 npnext
= np
->in_mnext
;
3859 if (np
->in_ifps
[1] && (np
->in_ifps
[1] != ifp
))
3861 if (np
->in_v
!= fin
->fin_v
)
3863 if (np
->in_p
&& (np
->in_p
!= fin
->fin_p
))
3865 if ((np
->in_flags
& IPN_RF
) && !(np
->in_flags
& nflags
))
3867 if (np
->in_flags
& IPN_FILTER
) {
3868 if (!nat_match(fin
, np
))
3870 } else if ((ipa
& np
->in_inmsk
) != np
->in_inip
)
3874 !fr_matchtag(&np
->in_tag
, &fr
->fr_nattag
))
3877 if (*np
->in_plabel
!= '\0') {
3878 if (((np
->in_flags
& IPN_FILTER
) == 0) &&
3879 (np
->in_dport
!= tcp
->th_dport
))
3881 if (appr_ok(fin
, tcp
, np
) == 0)
3885 MUTEX_ENTER(&ipf_nat_new
);
3886 nat
= nat_new(fin
, np
, NULL
, nflags
, NAT_OUTBOUND
);
3887 MUTEX_EXIT(&ipf_nat_new
);
3894 if ((np
== NULL
) && (nmsk
!= 0)) {
3897 if (nmsk
& 0x80000000)
3909 rval
= fr_natout(fin
, nat
, natadd
, nflags
);
3911 MUTEX_ENTER(&nat
->nat_lock
);
3912 nat_update(fin
, nat
);
3913 nat
->nat_bytes
[1] += fin
->fin_plen
;
3915 fin
->fin_pktnum
= nat
->nat_pkts
[1];
3916 MUTEX_EXIT(&nat
->nat_lock
);
3920 RWLOCK_EXIT(&ipf_nat
);
3925 fin
->fin_flx
|= FI_BADNAT
;
3927 fin
->fin_ifp
= sifp
;
3931 /* ------------------------------------------------------------------------ */
3932 /* Function: fr_natout */
3933 /* Returns: int - -1 == packet failed NAT checks so block it, */
3934 /* 1 == packet was successfully translated. */
3935 /* Parameters: fin(I) - pointer to packet information */
3936 /* nat(I) - pointer to NAT structure */
3937 /* natadd(I) - flag indicating if it is safe to add frag cache */
3938 /* nflags(I) - NAT flags set for this packet */
3940 /* Translate a packet coming "out" on an interface. */
3941 /* ------------------------------------------------------------------------ */
3942 int fr_natout(fin
, nat
, natadd
, nflags
)
3957 if ((natadd
!= 0) && (fin
->fin_flx
& FI_FRAG
) && (np
!= NULL
))
3958 (void) fr_nat_newfrag(fin
, 0, nat
);
3961 * Fix up checksums, not by recalculating them, but
3962 * simply computing adjustments.
3963 * This is only done for STREAMS based IP implementations where the
3964 * checksum has already been calculated by IP. In all other cases,
3965 * IPFilter is called before the checksum needs calculating so there
3966 * is no call to modify whatever is in the header now.
3968 if (fin
->fin_v
== 4) {
3969 if (nflags
== IPN_ICMPERR
) {
3970 u_32_t s1
, s2
, sumd
;
3972 s1
= LONG_SUM(ntohl(fin
->fin_saddr
));
3973 s2
= LONG_SUM(ntohl(nat
->nat_outip
.s_addr
));
3974 CALC_SUMD(s1
, s2
, sumd
);
3975 fix_outcksum(fin
, &fin
->fin_ip
->ip_sum
, sumd
);
3977 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3978 defined(linux) || defined(BRIDGE_IPF)
3981 * Strictly speaking, this isn't necessary on BSD
3982 * kernels because they do checksum calculation after
3983 * this code has run BUT if ipfilter is being used
3984 * to do NAT as a bridge, that code doesn't exist.
3986 if (nat
->nat_dir
== NAT_OUTBOUND
)
3987 fix_outcksum(fin
, &fin
->fin_ip
->ip_sum
,
3990 fix_incksum(fin
, &fin
->fin_ip
->ip_sum
,
3996 if (!(fin
->fin_flx
& FI_SHORT
) && (fin
->fin_off
== 0)) {
3999 if ((nat
->nat_outport
!= 0) && (nflags
& IPN_TCPUDP
)) {
4002 tcp
->th_sport
= nat
->nat_outport
;
4003 fin
->fin_data
[0] = ntohs(nat
->nat_outport
);
4006 if ((nat
->nat_outport
!= 0) && (nflags
& IPN_ICMPQUERY
)) {
4008 icmp
->icmp_id
= nat
->nat_outport
;
4011 csump
= nat_proto(fin
, nat
, nflags
);
4014 * The above comments do not hold for layer 4 (or higher)
4017 if (csump
!= NULL
) {
4018 if (nat
->nat_dir
== NAT_OUTBOUND
)
4019 fix_outcksum(fin
, csump
, nat
->nat_sumd
[1]);
4021 fix_incksum(fin
, csump
, nat
->nat_sumd
[1]);
4025 fin
->fin_ip
->ip_src
= nat
->nat_outip
;
4026 #ifdef IPFILTER_SYNC
4027 ipfsync_update(SMC_NAT
, fin
, nat
->nat_sync
);
4029 /* ------------------------------------------------------------- */
4030 /* A few quick notes: */
4031 /* Following are test conditions prior to calling the */
4032 /* appr_check routine. */
4034 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */
4035 /* with a redirect rule, we attempt to match the packet's */
4036 /* source port against in_dport, otherwise we'd compare the */
4037 /* packet's destination. */
4038 /* ------------------------------------------------------------- */
4039 if ((np
!= NULL
) && (np
->in_apr
!= NULL
)) {
4040 i
= appr_check(fin
, nat
);
4045 ATOMIC_INCL(nat_stats
.ns_mapped
[1]);
4046 fin
->fin_flx
|= FI_NATED
;
4051 /* ------------------------------------------------------------------------ */
4052 /* Function: fr_checknatin */
4053 /* Returns: int - -1 == packet failed NAT checks so block it, */
4054 /* 0 == no packet translation occurred, */
4055 /* 1 == packet was successfully translated. */
4056 /* Parameters: fin(I) - pointer to packet information */
4057 /* passp(I) - pointer to filtering result flags */
4059 /* Check to see if an incoming packet should be changed. ICMP packets are */
4060 /* first checked to see if they match an existing entry (if an error), */
4061 /* otherwise a search of the current NAT table is made. If neither results */
4062 /* in a match then a search for a matching NAT rule is made. Create a new */
4063 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */
4064 /* packet header(s) as required. */
4065 /* ------------------------------------------------------------------------ */
4066 int fr_checknatin(fin
, passp
)
4070 ipnat_t
*np
, *npnext
;
4071 u_int nflags
, natadd
;
4072 int rval
, natfailed
;
4081 if (fr_nat_lock
!= 0)
4083 if (nat_stats
.ns_rules
== 0 && nat_instances
== NULL
)
4094 if (!(fin
->fin_flx
& FI_SHORT
) && (fin
->fin_off
== 0)) {
4107 * This is an incoming packet, so the destination is
4108 * the icmp_id and the source port equals 0
4110 if (nat_icmpquerytype4(icmp
->icmp_type
)) {
4111 nflags
= IPN_ICMPQUERY
;
4112 dport
= icmp
->icmp_id
;
4118 if ((nflags
& IPN_TCPUDP
)) {
4120 dport
= tcp
->th_dport
;
4126 READ_ENTER(&ipf_nat
);
4128 if (((fin
->fin_flx
& FI_ICMPERR
) != 0) &&
4129 (nat
= nat_icmperror(fin
, &nflags
, NAT_INBOUND
)))
4131 else if ((fin
->fin_flx
& FI_FRAG
) &&
4132 (nat
= fr_nat_knownfrag(fin
)))
4134 else if ((nat
= nat_inlookup(fin
, nflags
|NAT_SEARCH
, (u_int
)fin
->fin_p
,
4135 fin
->fin_src
, in
))) {
4136 nflags
= nat
->nat_flags
;
4137 } else if (fin
->fin_off
== 0) {
4138 u_32_t hv
, msk
, rmsk
;
4143 * If there is no current entry in the nat table for this IP#,
4144 * create one for it (if there is a matching rule).
4147 iph
= in
.s_addr
& htonl(msk
);
4148 hv
= NAT_HASH_FN(iph
, 0, ipf_rdrrules_sz
);
4149 for (np
= rdr_rules
[hv
]; np
; np
= npnext
) {
4150 npnext
= np
->in_rnext
;
4151 if (np
->in_ifps
[0] && (np
->in_ifps
[0] != ifp
))
4153 if (np
->in_v
!= fin
->fin_v
)
4155 if (np
->in_p
&& (np
->in_p
!= fin
->fin_p
))
4157 if ((np
->in_flags
& IPN_RF
) && !(np
->in_flags
& nflags
))
4159 if (np
->in_flags
& IPN_FILTER
) {
4160 if (!nat_match(fin
, np
))
4163 if ((in
.s_addr
& np
->in_outmsk
) != np
->in_outip
)
4166 ((ntohs(np
->in_pmax
) < ntohs(dport
)) ||
4167 (ntohs(dport
) < ntohs(np
->in_pmin
))))
4171 if (*np
->in_plabel
!= '\0') {
4172 if (!appr_ok(fin
, tcp
, np
)) {
4178 * If we've matched a round-robin rule but it has
4179 * moved in the list since we got it, start over as
4180 * this is now no longer correct.
4182 MUTEX_ENTER(&ipf_nat_new
);
4183 if ((npnext
!= np
->in_rnext
) &&
4184 (np
->in_flags
& IPN_ROUNDR
)) {
4185 MUTEX_EXIT(&ipf_nat_new
);
4188 nat
= nat_new(fin
, np
, NULL
, nflags
, NAT_INBOUND
);
4189 MUTEX_EXIT(&ipf_nat_new
);
4197 if ((np
== NULL
) && (rmsk
!= 0)) {
4200 if (rmsk
& 0x80000000)
4212 rval
= fr_natin(fin
, nat
, natadd
, nflags
);
4214 MUTEX_ENTER(&nat
->nat_lock
);
4215 nat_update(fin
, nat
);
4216 nat
->nat_bytes
[0] += fin
->fin_plen
;
4218 fin
->fin_pktnum
= nat
->nat_pkts
[0];
4219 MUTEX_EXIT(&nat
->nat_lock
);
4223 RWLOCK_EXIT(&ipf_nat
);
4228 fin
->fin_flx
|= FI_BADNAT
;
4234 /* ------------------------------------------------------------------------ */
4235 /* Function: fr_natin */
4236 /* Returns: int - -1 == packet failed NAT checks so block it, */
4237 /* 1 == packet was successfully translated. */
4238 /* Parameters: fin(I) - pointer to packet information */
4239 /* nat(I) - pointer to NAT structure */
4240 /* natadd(I) - flag indicating if it is safe to add frag cache */
4241 /* nflags(I) - NAT flags set for this packet */
4242 /* Locks Held: ipf_nat (READ) */
4244 /* Translate a packet coming "in" on an interface. */
4245 /* ------------------------------------------------------------------------ */
4246 int fr_natin(fin
, nat
, natadd
, nflags
)
4259 fin
->fin_fr
= nat
->nat_fr
;
4262 if ((natadd
!= 0) && (fin
->fin_flx
& FI_FRAG
))
4263 (void) fr_nat_newfrag(fin
, 0, nat
);
4265 /* ------------------------------------------------------------- */
4266 /* A few quick notes: */
4267 /* Following are test conditions prior to calling the */
4268 /* appr_check routine. */
4270 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */
4271 /* with a map rule, we attempt to match the packet's */
4272 /* source port against in_dport, otherwise we'd compare the */
4273 /* packet's destination. */
4274 /* ------------------------------------------------------------- */
4275 if (np
->in_apr
!= NULL
) {
4276 i
= appr_check(fin
, nat
);
4283 #ifdef IPFILTER_SYNC
4284 ipfsync_update(SMC_NAT
, fin
, nat
->nat_sync
);
4287 fin
->fin_ip
->ip_dst
= nat
->nat_inip
;
4288 fin
->fin_fi
.fi_daddr
= nat
->nat_inip
.s_addr
;
4289 if (nflags
& IPN_TCPUDP
)
4293 * Fix up checksums, not by recalculating them, but
4294 * simply computing adjustments.
4295 * Why only do this for some platforms on inbound packets ?
4296 * Because for those that it is done, IP processing is yet to happen
4297 * and so the IPv4 header checksum has not yet been evaluated.
4298 * Perhaps it should always be done for the benefit of things like
4299 * fast forwarding (so that it doesn't need to be recomputed) but with
4300 * header checksum offloading, perhaps it is a moot point.
4302 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4303 defined(__osf__) || defined(linux)
4304 if (nat
->nat_dir
== NAT_OUTBOUND
)
4305 fix_incksum(fin
, &fin
->fin_ip
->ip_sum
, nat
->nat_ipsumd
);
4307 fix_outcksum(fin
, &fin
->fin_ip
->ip_sum
, nat
->nat_ipsumd
);
4310 if (!(fin
->fin_flx
& FI_SHORT
) && (fin
->fin_off
== 0)) {
4313 if ((nat
->nat_inport
!= 0) && (nflags
& IPN_TCPUDP
)) {
4314 tcp
->th_dport
= nat
->nat_inport
;
4315 fin
->fin_data
[1] = ntohs(nat
->nat_inport
);
4319 if ((nat
->nat_inport
!= 0) && (nflags
& IPN_ICMPQUERY
)) {
4322 icmp
->icmp_id
= nat
->nat_inport
;
4325 csump
= nat_proto(fin
, nat
, nflags
);
4328 * The above comments do not hold for layer 4 (or higher)
4331 if (csump
!= NULL
) {
4332 if (nat
->nat_dir
== NAT_OUTBOUND
)
4333 fix_incksum(fin
, csump
, nat
->nat_sumd
[0]);
4335 fix_outcksum(fin
, csump
, nat
->nat_sumd
[0]);
4338 ATOMIC_INCL(nat_stats
.ns_mapped
[0]);
4339 fin
->fin_flx
|= FI_NATED
;
4340 if (np
!= NULL
&& np
->in_tag
.ipt_num
[0] != 0)
4341 fin
->fin_nattag
= &np
->in_tag
;
4346 /* ------------------------------------------------------------------------ */
4347 /* Function: nat_proto */
4348 /* Returns: u_short* - pointer to transport header checksum to update, */
4349 /* NULL if the transport protocol is not recognised */
4350 /* as needing a checksum update. */
4351 /* Parameters: fin(I) - pointer to packet information */
4352 /* nat(I) - pointer to NAT structure */
4353 /* nflags(I) - NAT flags set for this packet */
4355 /* Return the pointer to the checksum field for each protocol so understood.*/
4356 /* If support for making other changes to a protocol header is required, */
4357 /* that is not strictly 'address' translation, such as clamping the MSS in */
4358 /* TCP down to a specific value, then do it from here. */
4359 /* ------------------------------------------------------------------------ */
4360 u_short
*nat_proto(fin
, nat
, nflags
)
4371 if (fin
->fin_out
== 0) {
4372 fin
->fin_rev
= (nat
->nat_dir
== NAT_OUTBOUND
);
4374 fin
->fin_rev
= (nat
->nat_dir
== NAT_INBOUND
);
4382 csump
= &tcp
->th_sum
;
4385 * Do a MSS CLAMPING on a SYN packet,
4386 * only deal IPv4 for now.
4388 if ((nat
->nat_mssclamp
!= 0) && (tcp
->th_flags
& TH_SYN
) != 0)
4389 nat_mssclamp(tcp
, nat
->nat_mssclamp
, fin
, csump
);
4397 csump
= &udp
->uh_sum
;
4403 if ((nflags
& IPN_ICMPQUERY
) != 0) {
4404 if (icmp
->icmp_cksum
!= 0)
4405 csump
= &icmp
->icmp_cksum
;
4413 /* ------------------------------------------------------------------------ */
4414 /* Function: fr_natunload */
4416 /* Parameters: Nil */
4418 /* Free all memory used by NAT structures allocated at runtime. */
4419 /* ------------------------------------------------------------------------ */
4422 ipftq_t
*ifq
, *ifqnext
;
4424 (void) nat_clearlist();
4425 (void) nat_flushtable();
4428 * Proxy timeout queues are not cleaned here because although they
4429 * exist on the NAT list, appr_unload is called after fr_natunload
4430 * and the proxies actually are responsible for them being created.
4431 * Should the proxy timeouts have their own list? There's no real
4432 * justification as this is the only complication.
4434 for (ifq
= nat_utqe
; ifq
!= NULL
; ifq
= ifqnext
) {
4435 ifqnext
= ifq
->ifq_next
;
4436 if (((ifq
->ifq_flags
& IFQF_PROXY
) == 0) &&
4437 (fr_deletetimeoutqueue(ifq
) == 0))
4438 fr_freetimeoutqueue(ifq
);
4441 if (nat_table
[0] != NULL
) {
4442 KFREES(nat_table
[0], sizeof(nat_t
*) * ipf_nattable_sz
);
4443 nat_table
[0] = NULL
;
4445 if (nat_table
[1] != NULL
) {
4446 KFREES(nat_table
[1], sizeof(nat_t
*) * ipf_nattable_sz
);
4447 nat_table
[1] = NULL
;
4449 if (nat_rules
!= NULL
) {
4450 KFREES(nat_rules
, sizeof(ipnat_t
*) * ipf_natrules_sz
);
4453 if (rdr_rules
!= NULL
) {
4454 KFREES(rdr_rules
, sizeof(ipnat_t
*) * ipf_rdrrules_sz
);
4457 if (ipf_hm_maptable
!= NULL
) {
4458 KFREES(ipf_hm_maptable
, sizeof(hostmap_t
*) * ipf_hostmap_sz
);
4459 ipf_hm_maptable
= NULL
;
4461 if (nat_stats
.ns_bucketlen
[0] != NULL
) {
4462 KFREES(nat_stats
.ns_bucketlen
[0],
4463 sizeof(u_long
*) * ipf_nattable_sz
);
4464 nat_stats
.ns_bucketlen
[0] = NULL
;
4466 if (nat_stats
.ns_bucketlen
[1] != NULL
) {
4467 KFREES(nat_stats
.ns_bucketlen
[1],
4468 sizeof(u_long
*) * ipf_nattable_sz
);
4469 nat_stats
.ns_bucketlen
[1] = NULL
;
4472 if (fr_nat_maxbucket_reset
== 1)
4473 fr_nat_maxbucket
= 0;
4475 if (fr_nat_init
== 1) {
4477 fr_sttab_destroy(nat_tqb
);
4479 RW_DESTROY(&ipf_natfrag
);
4480 RW_DESTROY(&ipf_nat
);
4482 MUTEX_DESTROY(&ipf_nat_new
);
4483 MUTEX_DESTROY(&ipf_natio
);
4485 MUTEX_DESTROY(&nat_udptq
.ifq_lock
);
4486 MUTEX_DESTROY(&nat_icmptq
.ifq_lock
);
4487 MUTEX_DESTROY(&nat_iptq
.ifq_lock
);
4492 /* ------------------------------------------------------------------------ */
4493 /* Function: fr_natexpire */
4495 /* Parameters: Nil */
4497 /* Check all of the timeout queues for entries at the top which need to be */
4499 /* ------------------------------------------------------------------------ */
4502 ipftq_t
*ifq
, *ifqnext
;
4503 ipftqent_t
*tqe
, *tqn
;
4508 WRITE_ENTER(&ipf_nat
);
4509 for (ifq
= nat_tqb
, i
= 0; ifq
!= NULL
; ifq
= ifq
->ifq_next
) {
4510 for (tqn
= ifq
->ifq_head
; ((tqe
= tqn
) != NULL
); i
++) {
4511 if (tqe
->tqe_die
> fr_ticks
)
4513 tqn
= tqe
->tqe_next
;
4514 nat_delete(tqe
->tqe_parent
, NL_EXPIRE
);
4518 for (ifq
= nat_utqe
; ifq
!= NULL
; ifq
= ifqnext
) {
4519 ifqnext
= ifq
->ifq_next
;
4521 for (tqn
= ifq
->ifq_head
; ((tqe
= tqn
) != NULL
); i
++) {
4522 if (tqe
->tqe_die
> fr_ticks
)
4524 tqn
= tqe
->tqe_next
;
4525 nat_delete(tqe
->tqe_parent
, NL_EXPIRE
);
4529 for (ifq
= nat_utqe
; ifq
!= NULL
; ifq
= ifqnext
) {
4530 ifqnext
= ifq
->ifq_next
;
4532 if (((ifq
->ifq_flags
& IFQF_DELETE
) != 0) &&
4533 (ifq
->ifq_ref
== 0)) {
4534 fr_freetimeoutqueue(ifq
);
4538 if (fr_nat_doflush
!= 0) {
4543 RWLOCK_EXIT(&ipf_nat
);
4548 /* ------------------------------------------------------------------------ */
4549 /* Function: fr_natsync */
4551 /* Parameters: ifp(I) - pointer to network interface */
4553 /* Walk through all of the currently active NAT sessions, looking for those */
4554 /* which need to have their translated address updated. */
4555 /* ------------------------------------------------------------------------ */
4556 void fr_natsync(ifp
)
4559 u_32_t sum1
, sum2
, sumd
;
4566 if (fr_running
<= 0)
4570 * Change IP addresses for NAT sessions for any protocol except TCP
4571 * since it will break the TCP connection anyway. The only rules
4572 * which will get changed are those which are "map ... -> 0/32",
4573 * where the rule specifies the address is taken from the interface.
4576 WRITE_ENTER(&ipf_nat
);
4578 if (fr_running
<= 0) {
4579 RWLOCK_EXIT(&ipf_nat
);
4583 for (nat
= nat_instances
; nat
; nat
= nat
->nat_next
) {
4584 if ((nat
->nat_flags
& IPN_TCP
) != 0)
4588 (n
->in_outip
!= 0) || (n
->in_outmsk
!= 0xffffffff))
4590 if (((ifp
== NULL
) || (ifp
== nat
->nat_ifps
[0]) ||
4591 (ifp
== nat
->nat_ifps
[1]))) {
4592 nat
->nat_ifps
[0] = GETIFP(nat
->nat_ifnames
[0], 4);
4593 if (nat
->nat_ifnames
[1][0] != '\0') {
4594 nat
->nat_ifps
[1] = GETIFP(nat
->nat_ifnames
[1],
4597 nat
->nat_ifps
[1] = nat
->nat_ifps
[0];
4598 ifp2
= nat
->nat_ifps
[0];
4603 * Change the map-to address to be the same as the
4606 sum1
= nat
->nat_outip
.s_addr
;
4607 if (fr_ifpaddr(4, FRI_NORMAL
, ifp2
, &in
, NULL
) != -1)
4608 nat
->nat_outip
= in
;
4609 sum2
= nat
->nat_outip
.s_addr
;
4614 * Readjust the checksum adjustment to take into
4615 * account the new IP#.
4617 CALC_SUMD(sum1
, sum2
, sumd
);
4618 /* XXX - dont change for TCP when solaris does
4619 * hardware checksumming.
4621 sumd
+= nat
->nat_sumd
[0];
4622 nat
->nat_sumd
[0] = (sumd
& 0xffff) + (sumd
>> 16);
4623 nat
->nat_sumd
[1] = nat
->nat_sumd
[0];
4627 for (n
= nat_list
; (n
!= NULL
); n
= n
->in_next
) {
4628 if ((ifp
== NULL
) || (n
->in_ifps
[0] == ifp
))
4629 n
->in_ifps
[0] = fr_resolvenic(n
->in_ifnames
[0], 4);
4630 if ((ifp
== NULL
) || (n
->in_ifps
[1] == ifp
))
4631 n
->in_ifps
[1] = fr_resolvenic(n
->in_ifnames
[1], 4);
4633 RWLOCK_EXIT(&ipf_nat
);
4638 /* ------------------------------------------------------------------------ */
4639 /* Function: nat_icmpquerytype4 */
4640 /* Returns: int - 1 == success, 0 == failure */
4641 /* Parameters: icmptype(I) - ICMP type number */
4643 /* Tests to see if the ICMP type number passed is a query/response type or */
4645 /* ------------------------------------------------------------------------ */
4646 static int nat_icmpquerytype4(icmptype
)
4651 * For the ICMP query NAT code, it is essential that both the query
4652 * and the reply match on the NAT rule. Because the NAT structure
4653 * does not keep track of the icmptype, and a single NAT structure
4654 * is used for all icmp types with the same src, dest and id, we
4655 * simply define the replies as queries as well. The funny thing is,
4656 * altough it seems silly to call a reply a query, this is exactly
4657 * as it is defined in the IPv4 specification
4663 case ICMP_ECHOREPLY
:
4665 /* route aedvertisement/solliciation is currently unsupported: */
4666 /* it would require rewriting the ICMP data section */
4668 case ICMP_TSTAMPREPLY
:
4670 case ICMP_IREQREPLY
:
4672 case ICMP_MASKREPLY
:
4680 /* ------------------------------------------------------------------------ */
4681 /* Function: nat_log */
4683 /* Parameters: nat(I) - pointer to NAT structure */
4684 /* type(I) - type of log entry to create */
4686 /* Creates a NAT log entry. */
4687 /* ------------------------------------------------------------------------ */
4688 void nat_log(nat
, type
)
4702 natl
.nl_inip
= nat
->nat_inip
;
4703 natl
.nl_outip
= nat
->nat_outip
;
4704 natl
.nl_origip
= nat
->nat_oip
;
4705 natl
.nl_bytes
[0] = nat
->nat_bytes
[0];
4706 natl
.nl_bytes
[1] = nat
->nat_bytes
[1];
4707 natl
.nl_pkts
[0] = nat
->nat_pkts
[0];
4708 natl
.nl_pkts
[1] = nat
->nat_pkts
[1];
4709 natl
.nl_origport
= nat
->nat_oport
;
4710 natl
.nl_inport
= nat
->nat_inport
;
4711 natl
.nl_outport
= nat
->nat_outport
;
4712 natl
.nl_p
= nat
->nat_p
;
4713 natl
.nl_type
= type
;
4716 if (nat
->nat_ptr
!= NULL
) {
4717 for (rulen
= 0, np
= nat_list
; np
; np
= np
->in_next
, rulen
++)
4718 if (np
== nat
->nat_ptr
) {
4719 natl
.nl_rule
= rulen
;
4725 sizes
[0] = sizeof(natl
);
4728 (void) ipllog(IPL_LOGNAT
, NULL
, items
, sizes
, types
, 1);
4733 #if defined(__OpenBSD__)
4734 /* ------------------------------------------------------------------------ */
4735 /* Function: nat_ifdetach */
4737 /* Parameters: ifp(I) - pointer to network interface */
4739 /* Compatibility interface for OpenBSD to trigger the correct updating of */
4740 /* interface references within IPFilter. */
4741 /* ------------------------------------------------------------------------ */
4742 void nat_ifdetach(ifp
)
4751 /* ------------------------------------------------------------------------ */
4752 /* Function: fr_ipnatderef */
4754 /* Parameters: isp(I) - pointer to pointer to NAT rule */
4755 /* Write Locks: ipf_nat */
4757 /* ------------------------------------------------------------------------ */
4758 void fr_ipnatderef(inp
)
4767 if (in
->in_use
== 0 && (in
->in_flags
& IPN_DELETE
)) {
4769 appr_free(in
->in_apr
);
4770 MUTEX_DESTROY(&in
->in_lock
);
4772 nat_stats
.ns_rules
--;
4773 #if SOLARIS && !defined(_INET_IP_STACK_H)
4774 if (nat_stats
.ns_rules
== 0)
4775 pfil_delayed_copy
= 1;
4781 /* ------------------------------------------------------------------------ */
4782 /* Function: fr_natderef */
4784 /* Parameters: isp(I) - pointer to pointer to NAT table entry */
4786 /* Decrement the reference counter for this NAT table entry and free it if */
4787 /* there are no more things using it. */
4789 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4790 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
4791 /* incremented. If nat_ref == 1 then we shouldn't decrement it here */
4792 /* because nat_delete() will do that and send nat_ref to -1. */
4794 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
4795 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
4796 /* ------------------------------------------------------------------------ */
4797 void fr_natderef(natp
)
4805 MUTEX_ENTER(&nat
->nat_lock
);
4806 if (nat
->nat_ref
> 1) {
4808 MUTEX_EXIT(&nat
->nat_lock
);
4811 MUTEX_EXIT(&nat
->nat_lock
);
4813 WRITE_ENTER(&ipf_nat
);
4814 nat_delete(nat
, NL_EXPIRE
);
4815 RWLOCK_EXIT(&ipf_nat
);
4819 /* ------------------------------------------------------------------------ */
4820 /* Function: fr_natclone */
4821 /* Returns: ipstate_t* - NULL == cloning failed, */
4822 /* else pointer to new state structure */
4823 /* Parameters: fin(I) - pointer to packet information */
4824 /* is(I) - pointer to master state structure */
4825 /* Write Lock: ipf_nat */
4827 /* Create a "duplcate" state table entry from the master. */
4828 /* ------------------------------------------------------------------------ */
4829 static nat_t
*fr_natclone(fin
, nat
)
4837 KMALLOC(clone
, nat_t
*);
4840 bcopy((char *)nat
, (char *)clone
, sizeof(*clone
));
4842 MUTEX_NUKE(&clone
->nat_lock
);
4844 clone
->nat_aps
= NULL
;
4846 * Initialize all these so that nat_delete() doesn't cause a crash.
4848 clone
->nat_tqe
.tqe_pnext
= NULL
;
4849 clone
->nat_tqe
.tqe_next
= NULL
;
4850 clone
->nat_tqe
.tqe_ifq
= NULL
;
4851 clone
->nat_tqe
.tqe_parent
= clone
;
4853 clone
->nat_flags
&= ~SI_CLONE
;
4854 clone
->nat_flags
|= SI_CLONED
;
4857 clone
->nat_hm
->hm_ref
++;
4859 if (nat_insert(clone
, fin
->fin_rev
) == -1) {
4863 np
= clone
->nat_ptr
;
4866 nat_log(clone
, (u_int
)np
->in_redir
);
4871 MUTEX_ENTER(&fr
->fr_lock
);
4873 MUTEX_EXIT(&fr
->fr_lock
);
4877 * Because the clone is created outside the normal loop of things and
4878 * TCP has special needs in terms of state, initialise the timeout
4879 * state of the new NAT from here.
4881 if (clone
->nat_p
== IPPROTO_TCP
) {
4882 (void) fr_tcp_age(&clone
->nat_tqe
, fin
, nat_tqb
,
4885 #ifdef IPFILTER_SYNC
4886 clone
->nat_sync
= ipfsync_new(SMC_NAT
, fin
, clone
);
4889 nat_log(clone
, NL_CLONE
);
4894 /* ------------------------------------------------------------------------ */
4895 /* Function: nat_wildok */
4896 /* Returns: int - 1 == packet's ports match wildcards */
4897 /* 0 == packet's ports don't match wildcards */
4898 /* Parameters: nat(I) - NAT entry */
4899 /* sport(I) - source port */
4900 /* dport(I) - destination port */
4901 /* flags(I) - wildcard flags */
4902 /* dir(I) - packet direction */
4904 /* Use NAT entry and packet direction to determine which combination of */
4905 /* wildcard flags should be used. */
4906 /* ------------------------------------------------------------------------ */
4907 static int nat_wildok(nat
, sport
, dport
, flags
, dir
)
4915 * When called by dir is set to
4916 * nat_inlookup NAT_INBOUND (0)
4917 * nat_outlookup NAT_OUTBOUND (1)
4919 * We simply combine the packet's direction in dir with the original
4920 * "intended" direction of that NAT entry in nat->nat_dir to decide
4921 * which combination of wildcard flags to allow.
4924 switch ((dir
<< 1) | nat
->nat_dir
)
4926 case 3: /* outbound packet / outbound entry */
4927 if (((nat
->nat_inport
== sport
) ||
4928 (flags
& SI_W_SPORT
)) &&
4929 ((nat
->nat_oport
== dport
) ||
4930 (flags
& SI_W_DPORT
)))
4933 case 2: /* outbound packet / inbound entry */
4934 if (((nat
->nat_outport
== sport
) ||
4935 (flags
& SI_W_DPORT
)) &&
4936 ((nat
->nat_oport
== dport
) ||
4937 (flags
& SI_W_SPORT
)))
4940 case 1: /* inbound packet / outbound entry */
4941 if (((nat
->nat_oport
== sport
) ||
4942 (flags
& SI_W_DPORT
)) &&
4943 ((nat
->nat_outport
== dport
) ||
4944 (flags
& SI_W_SPORT
)))
4947 case 0: /* inbound packet / inbound entry */
4948 if (((nat
->nat_oport
== sport
) ||
4949 (flags
& SI_W_SPORT
)) &&
4950 ((nat
->nat_outport
== dport
) ||
4951 (flags
& SI_W_DPORT
)))
4962 /* ------------------------------------------------------------------------ */
4963 /* Function: nat_mssclamp */
4965 /* Parameters: tcp(I) - pointer to TCP header */
4966 /* maxmss(I) - value to clamp the TCP MSS to */
4967 /* fin(I) - pointer to packet information */
4968 /* csump(I) - pointer to TCP checksum */
4970 /* Check for MSS option and clamp it if necessary. If found and changed, */
4971 /* then the TCP header checksum will be updated to reflect the change in */
4973 /* ------------------------------------------------------------------------ */
4974 static void nat_mssclamp(tcp
, maxmss
, fin
, csump
)
4980 u_char
*cp
, *ep
, opt
;
4984 hlen
= TCP_OFF(tcp
) << 2;
4985 if (hlen
> sizeof(*tcp
)) {
4986 cp
= (u_char
*)tcp
+ sizeof(*tcp
);
4987 ep
= (u_char
*)tcp
+ hlen
;
4991 if (opt
== TCPOPT_EOL
)
4993 else if (opt
== TCPOPT_NOP
) {
5001 if ((cp
+ advance
> ep
) || (advance
<= 0))
5008 mss
= cp
[2] * 256 + cp
[3];
5010 cp
[2] = maxmss
/ 256;
5011 cp
[3] = maxmss
& 0xff;
5012 CALC_SUMD(mss
, maxmss
, sumd
);
5013 fix_outcksum(fin
, csump
, sumd
);
5017 /* ignore unknown options */
5027 /* ------------------------------------------------------------------------ */
5028 /* Function: fr_setnatqueue */
5030 /* Parameters: nat(I)- pointer to NAT structure */
5031 /* rev(I) - forward(0) or reverse(1) direction */
5032 /* Locks: ipf_nat (read or write) */
5034 /* Put the NAT entry on its default queue entry, using rev as a helped in */
5035 /* determining which queue it should be placed on. */
5036 /* ------------------------------------------------------------------------ */
5037 void fr_setnatqueue(nat
, rev
)
5041 ipftq_t
*oifq
, *nifq
;
5043 if (nat
->nat_ptr
!= NULL
)
5044 nifq
= nat
->nat_ptr
->in_tqehead
[rev
];
5058 nifq
= nat_tqb
+ nat
->nat_tqe
.tqe_state
[rev
];
5066 oifq
= nat
->nat_tqe
.tqe_ifq
;
5068 * If it's currently on a timeout queue, move it from one queue to
5069 * another, else put it on the end of the newly determined queue.
5072 fr_movequeue(&nat
->nat_tqe
, oifq
, nifq
);
5074 fr_queueappend(&nat
->nat_tqe
, nifq
, nat
);
5079 /* ------------------------------------------------------------------------ */
5080 /* Function: nat_getnext */
5081 /* Returns: int - 0 == ok, else error */
5082 /* Parameters: t(I) - pointer to ipftoken structure */
5083 /* itp(I) - pointer to ipfgeniter_t structure */
5085 /* Fetch the next nat/ipnat structure pointer from the linked list and */
5086 /* copy it out to the storage space pointed to by itp. The next item */
5087 /* in the list to look at is put back in the ipftoken struture. */
5088 /* ------------------------------------------------------------------------ */
5089 static int nat_getnext(t
, itp
)
5093 hostmap_t
*hm
= NULL
, *nexthm
= NULL
, zerohm
;
5094 ipnat_t
*ipn
= NULL
, *nextipnat
= NULL
, zeroipn
;
5095 nat_t
*nat
= NULL
, *nextnat
= NULL
, zeronat
;
5096 int error
= 0, count
;
5099 if (itp
->igi_nitems
< 1)
5102 READ_ENTER(&ipf_nat
);
5105 * Get "previous" entry from the token and find the next entry.
5107 switch (itp
->igi_type
)
5109 case IPFGENITER_HOSTMAP
:
5112 nexthm
= ipf_hm_maplist
;
5114 nexthm
= hm
->hm_next
;
5118 case IPFGENITER_IPNAT
:
5121 nextipnat
= nat_list
;
5123 nextipnat
= ipn
->in_next
;
5127 case IPFGENITER_NAT
:
5130 nextnat
= nat_instances
;
5132 nextnat
= nat
->nat_next
;
5137 RWLOCK_EXIT(&ipf_nat
);
5141 dst
= itp
->igi_data
;
5142 for (count
= itp
->igi_nitems
; count
> 0; count
--) {
5144 * If we found an entry, add a reference and update the token.
5145 * Otherwise, zero out data to be returned and NULL out token.
5147 switch (itp
->igi_type
)
5149 case IPFGENITER_HOSTMAP
:
5150 if (nexthm
!= NULL
) {
5151 ATOMIC_INC32(nexthm
->hm_ref
);
5152 t
->ipt_data
= nexthm
;
5154 bzero(&zerohm
, sizeof(zerohm
));
5160 case IPFGENITER_IPNAT
:
5161 if (nextipnat
!= NULL
) {
5162 ATOMIC_INC32(nextipnat
->in_use
);
5163 t
->ipt_data
= nextipnat
;
5165 bzero(&zeroipn
, sizeof(zeroipn
));
5166 nextipnat
= &zeroipn
;
5171 case IPFGENITER_NAT
:
5172 if (nextnat
!= NULL
) {
5173 MUTEX_ENTER(&nextnat
->nat_lock
);
5175 MUTEX_EXIT(&nextnat
->nat_lock
);
5176 t
->ipt_data
= nextnat
;
5178 bzero(&zeronat
, sizeof(zeronat
));
5186 * Now that we have ref, it's save to give up lock.
5188 RWLOCK_EXIT(&ipf_nat
);
5191 * Copy out data and clean up references and token as needed.
5193 switch (itp
->igi_type
)
5195 case IPFGENITER_HOSTMAP
:
5196 error
= COPYOUT(nexthm
, dst
, sizeof(*nexthm
));
5200 WRITE_ENTER(&ipf_nat
);
5202 RWLOCK_EXIT(&ipf_nat
);
5204 if (t
->ipt_data
!= NULL
) {
5205 if (nexthm
->hm_next
== NULL
) {
5209 dst
+= sizeof(*nexthm
);
5211 nexthm
= nexthm
->hm_next
;
5215 case IPFGENITER_IPNAT
:
5216 error
= COPYOUT(nextipnat
, dst
, sizeof(*nextipnat
));
5220 WRITE_ENTER(&ipf_nat
);
5221 fr_ipnatderef(&ipn
);
5222 RWLOCK_EXIT(&ipf_nat
);
5224 if (t
->ipt_data
!= NULL
) {
5225 if (nextipnat
->in_next
== NULL
) {
5229 dst
+= sizeof(*nextipnat
);
5231 nextipnat
= nextipnat
->in_next
;
5235 case IPFGENITER_NAT
:
5236 error
= COPYOUT(nextnat
, dst
, sizeof(*nextnat
));
5242 if (t
->ipt_data
!= NULL
) {
5243 if (nextnat
->nat_next
== NULL
) {
5247 dst
+= sizeof(*nextnat
);
5249 nextnat
= nextnat
->nat_next
;
5254 if ((count
== 1) || (error
!= 0))
5257 READ_ENTER(&ipf_nat
);
5264 /* ------------------------------------------------------------------------ */
5265 /* Function: nat_iterator */
5266 /* Returns: int - 0 == ok, else error */
5267 /* Parameters: token(I) - pointer to ipftoken structure */
5268 /* itp(I) - pointer to ipfgeniter_t structure */
5270 /* This function acts as a handler for the SIOCGENITER ioctls that use a */
5271 /* generic structure to iterate through a list. There are three different */
5272 /* linked lists of NAT related information to go through: NAT rules, active */
5273 /* NAT mappings and the NAT fragment cache. */
5274 /* ------------------------------------------------------------------------ */
5275 static int nat_iterator(token
, itp
)
5281 if (itp
->igi_data
== NULL
)
5284 token
->ipt_subtype
= itp
->igi_type
;
5286 switch (itp
->igi_type
)
5288 case IPFGENITER_HOSTMAP
:
5289 case IPFGENITER_IPNAT
:
5290 case IPFGENITER_NAT
:
5291 error
= nat_getnext(token
, itp
);
5294 case IPFGENITER_NATFRAG
:
5296 error
= fr_nextfrag(token
, itp
, &ipfr_natlist
,
5297 &ipfr_nattail
, &ipf_natfrag
);
5299 error
= fr_nextfrag(token
, itp
, &ipfr_natlist
, &ipfr_nattail
);
5311 /* ------------------------------------------------------------------------ */
5312 /* Function: nat_extraflush */
5313 /* Returns: int - 0 == success, -1 == failure */
5314 /* Parameters: which(I) - how to flush the active NAT table */
5315 /* Write Locks: ipf_nat */
5317 /* Flush nat tables. Three actions currently defined: */
5318 /* which == 0 : flush all nat table entries */
5319 /* which == 1 : flush TCP connections which have started to close but are */
5320 /* stuck for some reason. */
5321 /* which == 2 : flush TCP connections which have been idle for a long time, */
5322 /* starting at > 4 days idle and working back in successive half-*/
5323 /* days to at most 12 hours old. If this fails to free enough */
5324 /* slots then work backwards in half hour slots to 30 minutes. */
5325 /* If that too fails, then work backwards in 30 second intervals */
5326 /* for the last 30 minutes to at worst 30 seconds idle. */
5327 /* ------------------------------------------------------------------------ */
5328 static int nat_extraflush(which
)
5331 ipftq_t
*ifq
, *ifqnext
;
5345 * Style 0 flush removes everything...
5347 for (natp
= &nat_instances
; ((nat
= *natp
) != NULL
); ) {
5348 nat_delete(nat
, NL_FLUSH
);
5355 * Since we're only interested in things that are closing,
5356 * we can start with the appropriate timeout queue.
5358 for (ifq
= nat_tqb
+ IPF_TCPS_CLOSE_WAIT
; ifq
!= NULL
;
5359 ifq
= ifq
->ifq_next
) {
5361 for (tqn
= ifq
->ifq_head
; tqn
!= NULL
; ) {
5362 nat
= tqn
->tqe_parent
;
5363 tqn
= tqn
->tqe_next
;
5364 if (nat
->nat_p
!= IPPROTO_TCP
)
5366 nat_delete(nat
, NL_EXPIRE
);
5372 * Also need to look through the user defined queues.
5374 for (ifq
= nat_utqe
; ifq
!= NULL
; ifq
= ifqnext
) {
5375 ifqnext
= ifq
->ifq_next
;
5376 for (tqn
= ifq
->ifq_head
; tqn
!= NULL
; ) {
5377 nat
= tqn
->tqe_parent
;
5378 tqn
= tqn
->tqe_next
;
5379 if (nat
->nat_p
!= IPPROTO_TCP
)
5382 if ((nat
->nat_tcpstate
[0] >
5383 IPF_TCPS_ESTABLISHED
) &&
5384 (nat
->nat_tcpstate
[1] >
5385 IPF_TCPS_ESTABLISHED
)) {
5386 nat_delete(nat
, NL_EXPIRE
);
5394 * Args 5-11 correspond to flushing those particular states
5395 * for TCP connections.
5397 case IPF_TCPS_CLOSE_WAIT
:
5398 case IPF_TCPS_FIN_WAIT_1
:
5399 case IPF_TCPS_CLOSING
:
5400 case IPF_TCPS_LAST_ACK
:
5401 case IPF_TCPS_FIN_WAIT_2
:
5402 case IPF_TCPS_TIME_WAIT
:
5403 case IPF_TCPS_CLOSED
:
5404 tqn
= nat_tqb
[which
].ifq_head
;
5405 while (tqn
!= NULL
) {
5406 nat
= tqn
->tqe_parent
;
5407 tqn
= tqn
->tqe_next
;
5408 nat_delete(nat
, NL_FLUSH
);
5418 * Take a large arbitrary number to mean the number of seconds
5419 * for which which consider to be the maximum value we'll allow
5420 * the expiration to be.
5422 which
= IPF_TTLVAL(which
);
5423 for (natp
= &nat_instances
; ((nat
= *natp
) != NULL
); ) {
5424 if (fr_ticks
- nat
->nat_touched
> which
) {
5425 nat_delete(nat
, NL_FLUSH
);
5428 natp
= &nat
->nat_next
;
5439 * Asked to remove inactive entries because the table is full.
5441 if (fr_ticks
- nat_last_force_flush
> IPF_TTLVAL(5)) {
5442 nat_last_force_flush
= fr_ticks
;
5443 removed
= ipf_queueflush(nat_flush_entry
, nat_tqb
, nat_utqe
);
5451 /* ------------------------------------------------------------------------ */
5452 /* Function: nat_flush_entry */
5453 /* Returns: 0 - always succeeds */
5454 /* Parameters: entry(I) - pointer to NAT entry */
5455 /* Write Locks: ipf_nat */
5457 /* This function is a stepping stone between ipf_queueflush() and */
5458 /* nat_dlete(). It is used so we can provide a uniform interface via the */
5459 /* ipf_queueflush() function. Since the nat_delete() function returns void */
5460 /* we translate that to mean it always succeeds in deleting something. */
5461 /* ------------------------------------------------------------------------ */
5462 static int nat_flush_entry(entry
)
5465 nat_delete(entry
, NL_FLUSH
);
5470 /* ------------------------------------------------------------------------ */
5471 /* Function: nat_gettable */
5472 /* Returns: int - 0 = success, else error */
5473 /* Parameters: data(I) - pointer to ioctl data */
5475 /* This function handles ioctl requests for tables of nat information. */
5476 /* At present the only table it deals with is the hash bucket statistics. */
5477 /* ------------------------------------------------------------------------ */
5478 static int nat_gettable(data
)
5484 error
= fr_inobj(data
, &table
, IPFOBJ_GTABLE
);
5488 switch (table
.ita_type
)
5490 case IPFTABLE_BUCKETS_NATIN
:
5491 error
= COPYOUT(nat_stats
.ns_bucketlen
[0], table
.ita_table
,
5492 ipf_nattable_sz
* sizeof(u_long
));
5495 case IPFTABLE_BUCKETS_NATOUT
:
5496 error
= COPYOUT(nat_stats
.ns_bucketlen
[1], table
.ita_table
,
5497 ipf_nattable_sz
* sizeof(u_long
));
5511 /* ------------------------------------------------------------------------ */
5512 /* Function: nat_uncreate */
5514 /* Parameters: fin(I) - pointer to packet information */
5516 /* This function is used to remove a NAT entry from the NAT table when we */
5517 /* decide that the create was actually in error. It is thus assumed that */
5518 /* fin_flx will have both FI_NATED and FI_NATNEW set. Because we're dealing */
5519 /* with the translated packet (not the original), we have to reverse the */
5520 /* lookup. Although doing the lookup is expensive (relatively speaking), it */
5521 /* is not anticipated that this will be a frequent occurance for normal */
5522 /* traffic patterns. */
5523 /* ------------------------------------------------------------------------ */
5524 void nat_uncreate(fin
)
5543 WRITE_ENTER(&ipf_nat
);
5545 if (fin
->fin_out
== 0) {
5546 nat
= nat_outlookup(fin
, nflags
, (u_int
)fin
->fin_p
,
5547 fin
->fin_dst
, fin
->fin_src
);
5549 nat
= nat_inlookup(fin
, nflags
, (u_int
)fin
->fin_p
,
5550 fin
->fin_src
, fin
->fin_dst
);
5554 nat_stats
.ns_uncreate
[fin
->fin_out
][0]++;
5555 nat_delete(nat
, NL_DESTROY
);
5557 nat_stats
.ns_uncreate
[fin
->fin_out
][1]++;
5560 RWLOCK_EXIT(&ipf_nat
);