Fix up mix of man(7)/mdoc(7).
[netbsd-mini2440.git] / sys / dist / ipf / netinet / ip_nat.c
blob4c2fa6ac9e5da36642027cb9c496de452af2e0da
1 /* $NetBSD$ */
3 /*
4 * Copyright (C) 1995-2003 by Darren Reed.
6 * See the IPFILTER.LICENCE file for details on licencing.
8 * Copyright 2008 Sun Microsystems, Inc.
9 */
10 #if defined(KERNEL) || defined(_KERNEL)
11 # undef KERNEL
12 # undef _KERNEL
13 # define KERNEL 1
14 # define _KERNEL 1
15 #endif
16 #include <sys/errno.h>
17 #include <sys/types.h>
18 #include <sys/param.h>
19 #include <sys/time.h>
20 #include <sys/file.h>
21 #if defined(_KERNEL) && defined(__NetBSD_Version__) && \
22 (__NetBSD_Version__ >= 399002000)
23 # include <sys/kauth.h>
24 #endif
25 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
26 defined(_KERNEL)
27 #if defined(__NetBSD_Version__) && (__NetBSD_Version__ < 399001400)
28 # include "opt_ipfilter_log.h"
29 # else
30 # include "opt_ipfilter.h"
31 # endif
32 #endif
33 #if !defined(_KERNEL)
34 # include <stdio.h>
35 # include <string.h>
36 # include <stdlib.h>
37 # define _KERNEL
38 # ifdef __OpenBSD__
39 struct file;
40 # endif
41 # include <sys/uio.h>
42 # undef _KERNEL
43 #endif
44 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
45 # include <sys/filio.h>
46 # include <sys/fcntl.h>
47 #else
48 # include <sys/ioctl.h>
49 #endif
50 #if !defined(AIX)
51 # include <sys/fcntl.h>
52 #endif
53 #if !defined(linux)
54 # include <sys/protosw.h>
55 #endif
56 #include <sys/socket.h>
57 #if defined(_KERNEL)
58 # include <sys/systm.h>
59 # if !defined(__SVR4) && !defined(__svr4__)
60 # include <sys/mbuf.h>
61 # endif
62 #endif
63 #if defined(__SVR4) || defined(__svr4__)
64 # include <sys/filio.h>
65 # include <sys/byteorder.h>
66 # ifdef _KERNEL
67 # include <sys/dditypes.h>
68 # endif
69 # include <sys/stream.h>
70 # include <sys/kmem.h>
71 #endif
72 #if __FreeBSD_version >= 300000
73 # include <sys/queue.h>
74 #endif
75 #include <net/if.h>
76 #if __FreeBSD_version >= 300000
77 # include <net/if_var.h>
78 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
79 # include "opt_ipfilter.h"
80 # endif
81 #endif
82 #ifdef sun
83 # include <net/af.h>
84 #endif
85 #include <netinet/in.h>
86 #include <netinet/in_systm.h>
87 #include <netinet/ip.h>
89 #ifdef RFC1825
90 # include <vpn/md5.h>
91 # include <vpn/ipsec.h>
92 extern struct ifnet vpnif;
93 #endif
95 #if !defined(linux)
96 # include <netinet/ip_var.h>
97 #endif
98 #include <netinet/tcp.h>
99 #include <netinet/udp.h>
100 #include <netinet/ip_icmp.h>
101 #include "netinet/ip_compat.h"
102 #include <netinet/tcpip.h>
103 #include "netinet/ip_fil.h"
104 #include "netinet/ip_nat.h"
105 #include "netinet/ip_frag.h"
106 #include "netinet/ip_state.h"
107 #include "netinet/ip_proxy.h"
108 #ifdef IPFILTER_SYNC
109 #include "netinet/ip_sync.h"
110 #endif
111 #if (__FreeBSD_version >= 300000)
112 # include <sys/malloc.h>
113 #endif
114 /* END OF INCLUDES */
116 #undef SOCKADDR_IN
117 #define SOCKADDR_IN struct sockaddr_in
119 #if !defined(lint)
120 #if defined(__NetBSD__)
121 #include <sys/cdefs.h>
122 __KERNEL_RCSID(0, "$NetBSD$");
123 #else
124 static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed";
125 static const char rcsid[] = "@(#)Id: ip_nat.c,v 2.195.2.127 2009/07/21 09:40:55 darrenr Exp";
126 #endif
127 #endif
130 /* ======================================================================== */
131 /* How the NAT is organised and works. */
132 /* */
133 /* Inside (interface y) NAT Outside (interface x) */
134 /* -------------------- -+- ------------------------------------- */
135 /* Packet going | out, processsed by fr_checknatout() for x */
136 /* ------------> | ------------> */
137 /* src=10.1.1.1 | src=192.1.1.1 */
138 /* | */
139 /* | in, processed by fr_checknatin() for x */
140 /* <------------ | <------------ */
141 /* dst=10.1.1.1 | dst=192.1.1.1 */
142 /* -------------------- -+- ------------------------------------- */
143 /* fr_checknatout() - changes ip_src and if required, sport */
144 /* - creates a new mapping, if required. */
145 /* fr_checknatin() - changes ip_dst and if required, dport */
146 /* */
147 /* In the NAT table, internal source is recorded as "in" and externally */
148 /* seen as "out". */
149 /* ======================================================================== */
152 nat_t **nat_table[2] = { NULL, NULL },
153 *nat_instances = NULL;
154 ipnat_t *nat_list = NULL;
155 u_int ipf_nattable_max = NAT_TABLE_MAX;
156 u_int ipf_nattable_sz = NAT_TABLE_SZ;
157 u_int ipf_natrules_sz = NAT_SIZE;
158 u_int ipf_rdrrules_sz = RDR_SIZE;
159 u_int ipf_hostmap_sz = HOSTMAP_SIZE;
160 u_int fr_nat_maxbucket = 0,
161 fr_nat_maxbucket_reset = 1;
162 u_32_t nat_masks = 0;
163 u_32_t rdr_masks = 0;
164 u_long nat_last_force_flush = 0;
165 ipnat_t **nat_rules = NULL;
166 ipnat_t **rdr_rules = NULL;
167 hostmap_t **ipf_hm_maptable = NULL;
168 hostmap_t *ipf_hm_maplist = NULL;
169 ipftq_t nat_tqb[IPF_TCP_NSTATES];
170 ipftq_t nat_udptq;
171 ipftq_t nat_icmptq;
172 ipftq_t nat_iptq;
173 ipftq_t *nat_utqe = NULL;
174 int fr_nat_doflush = 0;
175 #ifdef IPFILTER_LOG
176 int nat_logging = 1;
177 #else
178 int nat_logging = 0;
179 #endif
181 u_long fr_defnatage = DEF_NAT_AGE,
182 fr_defnatipage = 120, /* 60 seconds */
183 fr_defnaticmpage = 6; /* 3 seconds */
184 natstat_t nat_stats;
185 int fr_nat_lock = 0;
186 int fr_nat_init = 0;
187 #if SOLARIS && !defined(_INET_IP_STACK_H)
188 extern int pfil_delayed_copy;
189 #endif
191 static int nat_flush_entry __P((void *));
192 static int nat_flushtable __P((void));
193 static int nat_clearlist __P((void));
194 static void nat_addnat __P((struct ipnat *));
195 static void nat_addrdr __P((struct ipnat *));
196 static void nat_delrdr __P((struct ipnat *));
197 static void nat_delnat __P((struct ipnat *));
198 static int fr_natgetent __P((void *, int));
199 static int fr_natgetsz __P((void *, int));
200 static int fr_natputent __P((void *, int));
201 static int nat_extraflush __P((int));
202 static int nat_gettable __P((char *));
203 static void nat_tabmove __P((nat_t *));
204 static int nat_match __P((fr_info_t *, ipnat_t *));
205 static INLINE int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
206 static INLINE int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
207 static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
208 struct in_addr, struct in_addr, u_32_t));
209 static int nat_icmpquerytype4 __P((int));
210 static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int));
211 static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int));
212 static int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
213 tcphdr_t *, nat_t **, int));
214 static int nat_resolverule __P((ipnat_t *));
215 static nat_t *fr_natclone __P((fr_info_t *, nat_t *));
216 static void nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *));
217 static int nat_wildok __P((nat_t *, int, int, int, int));
218 static int nat_getnext __P((ipftoken_t *, ipfgeniter_t *));
219 static int nat_iterator __P((ipftoken_t *, ipfgeniter_t *));
222 /* ------------------------------------------------------------------------ */
223 /* Function: fr_natinit */
224 /* Returns: int - 0 == success, -1 == failure */
225 /* Parameters: Nil */
226 /* */
227 /* Initialise all of the NAT locks, tables and other structures. */
228 /* ------------------------------------------------------------------------ */
229 int fr_natinit()
231 int i;
233 KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
234 if (nat_table[0] != NULL)
235 bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
236 else
237 return -1;
239 KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
240 if (nat_table[1] != NULL)
241 bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
242 else
243 return -2;
245 KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
246 if (nat_rules != NULL)
247 bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
248 else
249 return -3;
251 KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
252 if (rdr_rules != NULL)
253 bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
254 else
255 return -4;
257 KMALLOCS(ipf_hm_maptable, hostmap_t **, \
258 sizeof(hostmap_t *) * ipf_hostmap_sz);
259 if (ipf_hm_maptable != NULL)
260 bzero((char *)ipf_hm_maptable,
261 sizeof(hostmap_t *) * ipf_hostmap_sz);
262 else
263 return -5;
264 ipf_hm_maplist = NULL;
266 KMALLOCS(nat_stats.ns_bucketlen[0], u_long *,
267 ipf_nattable_sz * sizeof(u_long));
268 if (nat_stats.ns_bucketlen[0] == NULL)
269 return -6;
270 bzero((char *)nat_stats.ns_bucketlen[0],
271 ipf_nattable_sz * sizeof(u_long));
273 KMALLOCS(nat_stats.ns_bucketlen[1], u_long *,
274 ipf_nattable_sz * sizeof(u_long));
275 if (nat_stats.ns_bucketlen[1] == NULL)
276 return -7;
278 bzero((char *)nat_stats.ns_bucketlen[1],
279 ipf_nattable_sz * sizeof(u_long));
281 if (fr_nat_maxbucket == 0) {
282 for (i = ipf_nattable_sz; i > 0; i >>= 1)
283 fr_nat_maxbucket++;
284 fr_nat_maxbucket *= 2;
287 fr_sttab_init(nat_tqb);
289 * Increase this because we may have "keep state" following this too
290 * and packet storms can occur if this is removed too quickly.
292 nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = fr_tcplastack;
293 nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &nat_udptq;
294 nat_udptq.ifq_ttl = fr_defnatage;
295 nat_udptq.ifq_ref = 1;
296 nat_udptq.ifq_head = NULL;
297 nat_udptq.ifq_tail = &nat_udptq.ifq_head;
298 MUTEX_INIT(&nat_udptq.ifq_lock, "nat ipftq udp tab");
299 nat_udptq.ifq_next = &nat_icmptq;
300 nat_icmptq.ifq_ttl = fr_defnaticmpage;
301 nat_icmptq.ifq_ref = 1;
302 nat_icmptq.ifq_head = NULL;
303 nat_icmptq.ifq_tail = &nat_icmptq.ifq_head;
304 MUTEX_INIT(&nat_icmptq.ifq_lock, "nat icmp ipftq tab");
305 nat_icmptq.ifq_next = &nat_iptq;
306 nat_iptq.ifq_ttl = fr_defnatipage;
307 nat_iptq.ifq_ref = 1;
308 nat_iptq.ifq_head = NULL;
309 nat_iptq.ifq_tail = &nat_iptq.ifq_head;
310 MUTEX_INIT(&nat_iptq.ifq_lock, "nat ip ipftq tab");
311 nat_iptq.ifq_next = NULL;
313 for (i = 0; i < IPF_TCP_NSTATES; i++) {
314 if (nat_tqb[i].ifq_ttl < fr_defnaticmpage)
315 nat_tqb[i].ifq_ttl = fr_defnaticmpage;
316 #ifdef LARGE_NAT
317 else if (nat_tqb[i].ifq_ttl > fr_defnatage)
318 nat_tqb[i].ifq_ttl = fr_defnatage;
319 #endif
323 * Increase this because we may have "keep state" following
324 * this too and packet storms can occur if this is removed
325 * too quickly.
327 nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
329 RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock");
330 RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock");
331 MUTEX_INIT(&ipf_nat_new, "ipf nat new mutex");
332 MUTEX_INIT(&ipf_natio, "ipf nat io mutex");
334 fr_nat_init = 1;
336 return 0;
340 /* ------------------------------------------------------------------------ */
341 /* Function: nat_addrdr */
342 /* Returns: Nil */
343 /* Parameters: n(I) - pointer to NAT rule to add */
344 /* */
345 /* Adds a redirect rule to the hash table of redirect rules and the list of */
346 /* loaded NAT rules. Updates the bitmask indicating which netmasks are in */
347 /* use by redirect rules. */
348 /* ------------------------------------------------------------------------ */
349 static void nat_addrdr(n)
350 ipnat_t *n;
352 ipnat_t **np;
353 u_32_t j;
354 u_int hv;
355 int k;
357 k = count4bits(n->in_outmsk);
358 if ((k >= 0) && (k != 32))
359 rdr_masks |= 1 << k;
360 j = (n->in_outip & n->in_outmsk);
361 hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
362 np = rdr_rules + hv;
363 while (*np != NULL)
364 np = &(*np)->in_rnext;
365 n->in_rnext = NULL;
366 n->in_prnext = np;
367 n->in_hv = hv;
368 *np = n;
372 /* ------------------------------------------------------------------------ */
373 /* Function: nat_addnat */
374 /* Returns: Nil */
375 /* Parameters: n(I) - pointer to NAT rule to add */
376 /* */
377 /* Adds a NAT map rule to the hash table of rules and the list of loaded */
378 /* NAT rules. Updates the bitmask indicating which netmasks are in use by */
379 /* redirect rules. */
380 /* ------------------------------------------------------------------------ */
381 static void nat_addnat(n)
382 ipnat_t *n;
384 ipnat_t **np;
385 u_32_t j;
386 u_int hv;
387 int k;
389 k = count4bits(n->in_inmsk);
390 if ((k >= 0) && (k != 32))
391 nat_masks |= 1 << k;
392 j = (n->in_inip & n->in_inmsk);
393 hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
394 np = nat_rules + hv;
395 while (*np != NULL)
396 np = &(*np)->in_mnext;
397 n->in_mnext = NULL;
398 n->in_pmnext = np;
399 n->in_hv = hv;
400 *np = n;
404 /* ------------------------------------------------------------------------ */
405 /* Function: nat_delrdr */
406 /* Returns: Nil */
407 /* Parameters: n(I) - pointer to NAT rule to delete */
408 /* */
409 /* Removes a redirect rule from the hash table of redirect rules. */
410 /* ------------------------------------------------------------------------ */
411 static void nat_delrdr(n)
412 ipnat_t *n;
414 if (n->in_rnext)
415 n->in_rnext->in_prnext = n->in_prnext;
416 *n->in_prnext = n->in_rnext;
420 /* ------------------------------------------------------------------------ */
421 /* Function: nat_delnat */
422 /* Returns: Nil */
423 /* Parameters: n(I) - pointer to NAT rule to delete */
424 /* */
425 /* Removes a NAT map rule from the hash table of NAT map rules. */
426 /* ------------------------------------------------------------------------ */
427 static void nat_delnat(n)
428 ipnat_t *n;
430 if (n->in_mnext != NULL)
431 n->in_mnext->in_pmnext = n->in_pmnext;
432 *n->in_pmnext = n->in_mnext;
436 /* ------------------------------------------------------------------------ */
437 /* Function: nat_hostmap */
438 /* Returns: struct hostmap* - NULL if no hostmap could be created, */
439 /* else a pointer to the hostmapping to use */
440 /* Parameters: np(I) - pointer to NAT rule */
441 /* real(I) - real IP address */
442 /* map(I) - mapped IP address */
443 /* port(I) - destination port number */
444 /* Write Locks: ipf_nat */
445 /* */
446 /* Check if an ip address has already been allocated for a given mapping */
447 /* that is not doing port based translation. If is not yet allocated, then */
448 /* create a new entry if a non-NULL NAT rule pointer has been supplied. */
449 /* ------------------------------------------------------------------------ */
450 static struct hostmap *nat_hostmap(np, src, dst, map, port)
451 ipnat_t *np;
452 struct in_addr src;
453 struct in_addr dst;
454 struct in_addr map;
455 u_32_t port;
457 hostmap_t *hm;
458 u_int hv;
460 hv = (src.s_addr ^ dst.s_addr);
461 hv += src.s_addr;
462 hv += dst.s_addr;
463 hv %= HOSTMAP_SIZE;
464 for (hm = ipf_hm_maptable[hv]; hm; hm = hm->hm_hnext)
465 if ((hm->hm_srcip.s_addr == src.s_addr) &&
466 (hm->hm_dstip.s_addr == dst.s_addr) &&
467 ((np == NULL) || (np == hm->hm_ipnat)) &&
468 ((port == 0) || (port == hm->hm_port))) {
469 hm->hm_ref++;
470 return hm;
473 if (np == NULL)
474 return NULL;
476 KMALLOC(hm, hostmap_t *);
477 if (hm) {
478 hm->hm_next = ipf_hm_maplist;
479 hm->hm_pnext = &ipf_hm_maplist;
480 if (ipf_hm_maplist != NULL)
481 ipf_hm_maplist->hm_pnext = &hm->hm_next;
482 ipf_hm_maplist = hm;
483 hm->hm_hnext = ipf_hm_maptable[hv];
484 hm->hm_phnext = ipf_hm_maptable + hv;
485 if (ipf_hm_maptable[hv] != NULL)
486 ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext;
487 ipf_hm_maptable[hv] = hm;
488 hm->hm_ipnat = np;
489 hm->hm_srcip = src;
490 hm->hm_dstip = dst;
491 hm->hm_mapip = map;
492 hm->hm_ref = 1;
493 hm->hm_port = port;
495 return hm;
499 /* ------------------------------------------------------------------------ */
500 /* Function: fr_hostmapdel */
501 /* Returns: Nil */
502 /* Parameters: hmp(I) - pointer to hostmap structure pointer */
503 /* Write Locks: ipf_nat */
504 /* */
505 /* Decrement the references to this hostmap structure by one. If this */
506 /* reaches zero then remove it and free it. */
507 /* ------------------------------------------------------------------------ */
508 void fr_hostmapdel(hmp)
509 struct hostmap **hmp;
511 struct hostmap *hm;
513 hm = *hmp;
514 *hmp = NULL;
516 hm->hm_ref--;
517 if (hm->hm_ref == 0) {
518 if (hm->hm_hnext)
519 hm->hm_hnext->hm_phnext = hm->hm_phnext;
520 *hm->hm_phnext = hm->hm_hnext;
521 if (hm->hm_next)
522 hm->hm_next->hm_pnext = hm->hm_pnext;
523 *hm->hm_pnext = hm->hm_next;
524 KFREE(hm);
529 /* ------------------------------------------------------------------------ */
530 /* Function: fix_outcksum */
531 /* Returns: Nil */
532 /* Parameters: fin(I) - pointer to packet information */
533 /* sp(I) - location of 16bit checksum to update */
534 /* n((I) - amount to adjust checksum by */
535 /* */
536 /* Adjusts the 16bit checksum by "n" for packets going out. */
537 /* ------------------------------------------------------------------------ */
538 void fix_outcksum(fin, sp, n)
539 fr_info_t *fin;
540 u_short *sp;
541 u_32_t n;
543 u_short sumshort;
544 u_32_t sum1;
546 if (n == 0)
547 return;
549 if (n & NAT_HW_CKSUM) {
550 n &= 0xffff;
551 n += fin->fin_dlen;
552 n = (n & 0xffff) + (n >> 16);
553 *sp = n & 0xffff;
554 return;
556 sum1 = (~ntohs(*sp)) & 0xffff;
557 sum1 += (n);
558 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
559 /* Again */
560 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
561 sumshort = ~(u_short)sum1;
562 *(sp) = htons(sumshort);
566 /* ------------------------------------------------------------------------ */
567 /* Function: fix_incksum */
568 /* Returns: Nil */
569 /* Parameters: fin(I) - pointer to packet information */
570 /* sp(I) - location of 16bit checksum to update */
571 /* n((I) - amount to adjust checksum by */
572 /* */
573 /* Adjusts the 16bit checksum by "n" for packets going in. */
574 /* ------------------------------------------------------------------------ */
575 void fix_incksum(fin, sp, n)
576 fr_info_t *fin;
577 u_short *sp;
578 u_32_t n;
580 u_short sumshort;
581 u_32_t sum1;
583 if (n == 0)
584 return;
586 if (n & NAT_HW_CKSUM) {
587 n &= 0xffff;
588 n += fin->fin_dlen;
589 n = (n & 0xffff) + (n >> 16);
590 *sp = n & 0xffff;
591 return;
593 sum1 = (~ntohs(*sp)) & 0xffff;
594 sum1 += ~(n) & 0xffff;
595 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
596 /* Again */
597 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
598 sumshort = ~(u_short)sum1;
599 *(sp) = htons(sumshort);
603 /* ------------------------------------------------------------------------ */
604 /* Function: fix_datacksum */
605 /* Returns: Nil */
606 /* Parameters: sp(I) - location of 16bit checksum to update */
607 /* n((I) - amount to adjust checksum by */
608 /* */
609 /* Fix_datacksum is used *only* for the adjustments of checksums in the */
610 /* data section of an IP packet. */
611 /* */
612 /* The only situation in which you need to do this is when NAT'ing an */
613 /* ICMP error message. Such a message, contains in its body the IP header */
614 /* of the original IP packet, that causes the error. */
615 /* */
616 /* You can't use fix_incksum or fix_outcksum in that case, because for the */
617 /* kernel the data section of the ICMP error is just data, and no special */
618 /* processing like hardware cksum or ntohs processing have been done by the */
619 /* kernel on the data section. */
620 /* ------------------------------------------------------------------------ */
621 void fix_datacksum(sp, n)
622 u_short *sp;
623 u_32_t n;
625 u_short sumshort;
626 u_32_t sum1;
628 if (n == 0)
629 return;
631 sum1 = (~ntohs(*sp)) & 0xffff;
632 sum1 += (n);
633 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
634 /* Again */
635 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
636 sumshort = ~(u_short)sum1;
637 *(sp) = htons(sumshort);
641 /* ------------------------------------------------------------------------ */
642 /* Function: fr_nat_ioctl */
643 /* Returns: int - 0 == success, != 0 == failure */
644 /* Parameters: data(I) - pointer to ioctl data */
645 /* cmd(I) - ioctl command integer */
646 /* mode(I) - file mode bits used with open */
647 /* */
648 /* Processes an ioctl call made to operate on the IP Filter NAT device. */
649 /* ------------------------------------------------------------------------ */
650 int fr_nat_ioctl(data, cmd, mode, uid, ctx)
651 ioctlcmd_t cmd;
652 void * data;
653 int mode, uid;
654 void *ctx;
656 ipnat_t *nat, *nt, *n = NULL, **np = NULL;
657 int error = 0, ret, arg, getlock;
658 ipnat_t natd;
659 SPL_INT(s);
661 #if defined(BSD) && (BSD >= 199306) && defined(_KERNEL)
662 # if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 399002000)
663 if ((mode & FWRITE) &&
664 kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_FIREWALL,
665 KAUTH_REQ_NETWORK_FIREWALL_FW,
666 NULL, NULL, NULL)) {
667 return EPERM;
669 # else
670 if ((securelevel >= 2) && (mode & FWRITE)) {
671 return EPERM;
673 # endif
674 #endif
676 #if defined(__osf__) && defined(_KERNEL)
677 getlock = 0;
678 #else
679 getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
680 #endif
682 nat = NULL; /* XXX gcc -Wuninitialized */
683 if (cmd == (ioctlcmd_t)SIOCADNAT) {
684 KMALLOC(nt, ipnat_t *);
685 } else {
686 nt = NULL;
689 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
690 if (mode & NAT_SYSSPACE) {
691 bcopy(data, (char *)&natd, sizeof(natd));
692 error = 0;
693 } else {
694 error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
698 if (error != 0)
699 goto done;
702 * For add/delete, look to see if the NAT entry is already present
704 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
705 nat = &natd;
706 if (nat->in_v == 0) /* For backward compat. */
707 nat->in_v = 4;
708 nat->in_flags &= IPN_USERFLAGS;
709 if ((nat->in_redir & NAT_MAPBLK) == 0) {
710 if ((nat->in_flags & IPN_SPLIT) == 0)
711 nat->in_inip &= nat->in_inmsk;
712 if ((nat->in_flags & IPN_IPRANGE) == 0)
713 nat->in_outip &= nat->in_outmsk;
715 MUTEX_ENTER(&ipf_natio);
716 for (np = &nat_list; ((n = *np) != NULL); np = &n->in_next)
717 if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
718 IPN_CMPSIZ) == 0) {
719 if (nat->in_redir == NAT_REDIRECT &&
720 nat->in_pnext != n->in_pnext)
721 continue;
722 break;
726 switch (cmd)
728 #ifdef IPFILTER_LOG
729 case SIOCIPFFB :
731 int tmp;
733 if (!(mode & FWRITE))
734 error = EPERM;
735 else {
736 tmp = ipflog_clear(IPL_LOGNAT);
737 error = BCOPYOUT((char *)&tmp, (char *)data,
738 sizeof(tmp));
739 if (error != 0)
740 error = EFAULT;
742 break;
745 case SIOCSETLG :
746 if (!(mode & FWRITE))
747 error = EPERM;
748 else {
749 error = BCOPYIN((char *)data, (char *)&nat_logging,
750 sizeof(nat_logging));
751 if (error != 0)
752 error = EFAULT;
754 break;
756 case SIOCGETLG :
757 error = BCOPYOUT((char *)&nat_logging, (char *)data,
758 sizeof(nat_logging));
759 if (error != 0)
760 error = EFAULT;
761 break;
763 case FIONREAD :
764 arg = iplused[IPL_LOGNAT];
765 error = BCOPYOUT(&arg, data, sizeof(arg));
766 if (error != 0)
767 error = EFAULT;
768 break;
769 #endif
770 case SIOCADNAT :
771 if (!(mode & FWRITE)) {
772 error = EPERM;
773 } else if (n != NULL) {
774 error = EEXIST;
775 } else if (nt == NULL) {
776 error = ENOMEM;
778 if (error != 0) {
779 MUTEX_EXIT(&ipf_natio);
780 break;
782 bcopy((char *)nat, (char *)nt, sizeof(*n));
783 error = nat_siocaddnat(nt, np, getlock);
784 MUTEX_EXIT(&ipf_natio);
785 if (error == 0)
786 nt = NULL;
787 break;
789 case SIOCRMNAT :
790 if (!(mode & FWRITE)) {
791 error = EPERM;
792 n = NULL;
793 } else if (n == NULL) {
794 error = ESRCH;
797 if (error != 0) {
798 MUTEX_EXIT(&ipf_natio);
799 break;
801 nat_siocdelnat(n, np, getlock);
803 MUTEX_EXIT(&ipf_natio);
804 n = NULL;
805 break;
807 case SIOCGNATS :
808 nat_stats.ns_table[0] = nat_table[0];
809 nat_stats.ns_table[1] = nat_table[1];
810 nat_stats.ns_list = nat_list;
811 nat_stats.ns_maptable = ipf_hm_maptable;
812 nat_stats.ns_maplist = ipf_hm_maplist;
813 nat_stats.ns_nattab_sz = ipf_nattable_sz;
814 nat_stats.ns_nattab_max = ipf_nattable_max;
815 nat_stats.ns_rultab_sz = ipf_natrules_sz;
816 nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
817 nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
818 nat_stats.ns_instances = nat_instances;
819 nat_stats.ns_apslist = ap_sess_list;
820 nat_stats.ns_ticks = fr_ticks;
821 error = fr_outobj(data, &nat_stats, IPFOBJ_NATSTAT);
822 break;
824 case SIOCGNATL :
826 natlookup_t nl;
828 error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
829 if (error == 0) {
830 void *ptr;
832 if (getlock) {
833 READ_ENTER(&ipf_nat);
835 ptr = nat_lookupredir(&nl);
836 if (getlock) {
837 RWLOCK_EXIT(&ipf_nat);
839 if (ptr != NULL) {
840 error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
841 } else {
842 error = ESRCH;
845 break;
848 case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */
849 if (!(mode & FWRITE)) {
850 error = EPERM;
851 break;
853 if (getlock) {
854 WRITE_ENTER(&ipf_nat);
857 error = BCOPYIN(data, &arg, sizeof(arg));
858 if (error != 0)
859 error = EFAULT;
860 else {
861 if (arg == 0)
862 ret = nat_flushtable();
863 else if (arg == 1)
864 ret = nat_clearlist();
865 else
866 ret = nat_extraflush(arg);
869 if (getlock) {
870 RWLOCK_EXIT(&ipf_nat);
872 if (error == 0) {
873 error = BCOPYOUT(&ret, data, sizeof(ret));
875 break;
877 case SIOCPROXY :
878 error = appr_ioctl(data, cmd, mode, ctx);
879 break;
881 case SIOCSTLCK :
882 if (!(mode & FWRITE)) {
883 error = EPERM;
884 } else {
885 error = fr_lock(data, &fr_nat_lock);
887 break;
889 case SIOCSTPUT :
890 if ((mode & FWRITE) != 0) {
891 error = fr_natputent(data, getlock);
892 } else {
893 error = EACCES;
895 break;
897 case SIOCSTGSZ :
898 if (fr_nat_lock) {
899 error = fr_natgetsz(data, getlock);
900 } else
901 error = EACCES;
902 break;
904 case SIOCSTGET :
905 if (fr_nat_lock) {
906 error = fr_natgetent(data, getlock);
907 } else
908 error = EACCES;
909 break;
911 case SIOCGENITER :
913 ipfgeniter_t iter;
914 ipftoken_t *token;
916 SPL_SCHED(s);
917 error = fr_inobj(data, &iter, IPFOBJ_GENITER);
918 if (error == 0) {
919 token = ipf_findtoken(iter.igi_type, uid, ctx);
920 if (token != NULL) {
921 error = nat_iterator(token, &iter);
922 WRITE_ENTER(&ipf_tokens);
923 if (token->ipt_data == NULL)
924 ipf_freetoken(token);
925 else
926 ipf_dereftoken(token);
927 RWLOCK_EXIT(&ipf_tokens);
930 SPL_X(s);
931 break;
934 case SIOCIPFDELTOK :
935 error = BCOPYIN((void *)data, (void *)&arg, sizeof(arg));
936 if (error == 0) {
937 SPL_SCHED(s);
938 error = ipf_deltoken(arg, uid, ctx);
939 SPL_X(s);
940 } else {
941 error = EFAULT;
943 break;
945 case SIOCGTQTAB :
946 error = fr_outobj(data, nat_tqb, IPFOBJ_STATETQTAB);
947 break;
949 case SIOCGTABL :
950 error = nat_gettable(data);
951 break;
953 default :
954 error = EINVAL;
955 break;
957 done:
958 if (nt != NULL)
959 KFREE(nt);
960 return error;
964 /* ------------------------------------------------------------------------ */
965 /* Function: nat_siocaddnat */
966 /* Returns: int - 0 == success, != 0 == failure */
967 /* Parameters: n(I) - pointer to new NAT rule */
968 /* np(I) - pointer to where to insert new NAT rule */
969 /* getlock(I) - flag indicating if lock on ipf_nat is held */
970 /* Mutex Locks: ipf_natio */
971 /* */
972 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
973 /* from information passed to the kernel, then add it to the appropriate */
974 /* NAT rule table(s). */
975 /* ------------------------------------------------------------------------ */
976 static int nat_siocaddnat(n, np, getlock)
977 ipnat_t *n, **np;
978 int getlock;
980 int error = 0, i, j;
982 if (nat_resolverule(n) != 0)
983 return ENOENT;
985 if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
986 return EINVAL;
988 n->in_use = 0;
989 if (n->in_redir & NAT_MAPBLK)
990 n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
991 else if (n->in_flags & IPN_AUTOPORTMAP)
992 n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
993 else if (n->in_flags & IPN_IPRANGE)
994 n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
995 else if (n->in_flags & IPN_SPLIT)
996 n->in_space = 2;
997 else if (n->in_outmsk != 0)
998 n->in_space = ~ntohl(n->in_outmsk);
999 else
1000 n->in_space = 1;
1003 * Calculate the number of valid IP addresses in the output
1004 * mapping range. In all cases, the range is inclusive of
1005 * the start and ending IP addresses.
1006 * If to a CIDR address, lose 2: broadcast + network address
1007 * (so subtract 1)
1008 * If to a range, add one.
1009 * If to a single IP address, set to 1.
1011 if (n->in_space) {
1012 if ((n->in_flags & IPN_IPRANGE) != 0)
1013 n->in_space += 1;
1014 else
1015 n->in_space -= 1;
1016 } else
1017 n->in_space = 1;
1019 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
1020 ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
1021 n->in_nip = ntohl(n->in_outip) + 1;
1022 else if ((n->in_flags & IPN_SPLIT) &&
1023 (n->in_redir & NAT_REDIRECT))
1024 n->in_nip = ntohl(n->in_inip);
1025 else
1026 n->in_nip = ntohl(n->in_outip);
1027 if (n->in_redir & NAT_MAP) {
1028 n->in_pnext = ntohs(n->in_pmin);
1030 * Multiply by the number of ports made available.
1032 if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
1033 n->in_space *= (ntohs(n->in_pmax) -
1034 ntohs(n->in_pmin) + 1);
1036 * Because two different sources can map to
1037 * different destinations but use the same
1038 * local IP#/port #.
1039 * If the result is smaller than in_space, then
1040 * we may have wrapped around 32bits.
1042 i = n->in_inmsk;
1043 if ((i != 0) && (i != 0xffffffff)) {
1044 j = n->in_space * (~ntohl(i) + 1);
1045 if (j >= n->in_space)
1046 n->in_space = j;
1047 else
1048 n->in_space = 0xffffffff;
1052 * If no protocol is specified, multiple by 256 to allow for
1053 * at least one IP:IP mapping per protocol.
1055 if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1056 j = n->in_space * 256;
1057 if (j >= n->in_space)
1058 n->in_space = j;
1059 else
1060 n->in_space = 0xffffffff;
1064 /* Otherwise, these fields are preset */
1066 if (getlock) {
1067 WRITE_ENTER(&ipf_nat);
1069 n->in_next = NULL;
1070 *np = n;
1072 if (n->in_age[0] != 0)
1073 n->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, n->in_age[0]);
1075 if (n->in_age[1] != 0)
1076 n->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, n->in_age[1]);
1078 if (n->in_redir & NAT_REDIRECT) {
1079 n->in_flags &= ~IPN_NOTDST;
1080 nat_addrdr(n);
1082 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1083 n->in_flags &= ~IPN_NOTSRC;
1084 nat_addnat(n);
1086 MUTEX_INIT(&n->in_lock, "ipnat rule lock");
1088 n = NULL;
1089 nat_stats.ns_rules++;
1090 #if SOLARIS && !defined(_INET_IP_STACK_H)
1091 pfil_delayed_copy = 0;
1092 #endif
1093 if (getlock) {
1094 RWLOCK_EXIT(&ipf_nat); /* WRITE */
1097 return error;
1101 /* ------------------------------------------------------------------------ */
1102 /* Function: nat_resolvrule */
1103 /* Returns: Nil */
1104 /* Parameters: n(I) - pointer to NAT rule */
1105 /* */
1106 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
1107 /* from information passed to the kernel, then add it to the appropriate */
1108 /* NAT rule table(s). */
1109 /* ------------------------------------------------------------------------ */
1110 static int nat_resolverule(n)
1111 ipnat_t *n;
1113 n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1114 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
1116 n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1117 if (n->in_ifnames[1][0] == '\0') {
1118 (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1119 n->in_ifps[1] = n->in_ifps[0];
1120 } else {
1121 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
1124 if (n->in_plabel[0] != '\0') {
1125 n->in_apr = appr_lookup(n->in_p, n->in_plabel);
1126 if (n->in_apr == NULL)
1127 return -1;
1129 return 0;
1133 /* ------------------------------------------------------------------------ */
1134 /* Function: nat_siocdelnat */
1135 /* Returns: int - 0 == success, != 0 == failure */
1136 /* Parameters: n(I) - pointer to new NAT rule */
1137 /* np(I) - pointer to where to insert new NAT rule */
1138 /* getlock(I) - flag indicating if lock on ipf_nat is held */
1139 /* Mutex Locks: ipf_natio */
1140 /* */
1141 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
1142 /* from information passed to the kernel, then add it to the appropriate */
1143 /* NAT rule table(s). */
1144 /* ------------------------------------------------------------------------ */
1145 static void nat_siocdelnat(n, np, getlock)
1146 ipnat_t *n, **np;
1147 int getlock;
1149 if (getlock) {
1150 WRITE_ENTER(&ipf_nat);
1152 if (n->in_redir & NAT_REDIRECT)
1153 nat_delrdr(n);
1154 if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1155 nat_delnat(n);
1156 if (nat_list == NULL) {
1157 nat_masks = 0;
1158 rdr_masks = 0;
1161 if (n->in_tqehead[0] != NULL) {
1162 if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1163 fr_freetimeoutqueue(n->in_tqehead[1]);
1167 if (n->in_tqehead[1] != NULL) {
1168 if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1169 fr_freetimeoutqueue(n->in_tqehead[1]);
1173 *np = n->in_next;
1175 if (n->in_use == 0) {
1176 if (n->in_apr)
1177 appr_free(n->in_apr);
1178 MUTEX_DESTROY(&n->in_lock);
1179 KFREE(n);
1180 nat_stats.ns_rules--;
1181 #if SOLARIS && !defined(_INET_IP_STACK_H)
1182 if (nat_stats.ns_rules == 0)
1183 pfil_delayed_copy = 1;
1184 #endif
1185 } else {
1186 n->in_flags |= IPN_DELETE;
1187 n->in_next = NULL;
1189 if (getlock) {
1190 RWLOCK_EXIT(&ipf_nat); /* READ/WRITE */
1195 /* ------------------------------------------------------------------------ */
1196 /* Function: fr_natgetsz */
1197 /* Returns: int - 0 == success, != 0 is the error value. */
1198 /* Parameters: data(I) - pointer to natget structure with kernel pointer */
1199 /* get the size of. */
1200 /* */
1201 /* Handle SIOCSTGSZ. */
1202 /* Return the size of the nat list entry to be copied back to user space. */
1203 /* The size of the entry is stored in the ng_sz field and the enture natget */
1204 /* structure is copied back to the user. */
1205 /* ------------------------------------------------------------------------ */
1206 static int fr_natgetsz(data, getlock)
1207 void * data;
1208 int getlock;
1210 ap_session_t *aps;
1211 nat_t *nat, *n;
1212 natget_t ng;
1214 if (BCOPYIN(data, &ng, sizeof(ng)) != 0)
1215 return EFAULT;
1217 if (getlock) {
1218 READ_ENTER(&ipf_nat);
1221 nat = ng.ng_ptr;
1222 if (!nat) {
1223 nat = nat_instances;
1224 ng.ng_sz = 0;
1226 * Empty list so the size returned is 0. Simple.
1228 if (nat == NULL) {
1229 if (getlock) {
1230 RWLOCK_EXIT(&ipf_nat);
1232 if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1233 return EFAULT;
1234 return 0;
1236 } else {
1238 * Make sure the pointer we're copying from exists in the
1239 * current list of entries. Security precaution to prevent
1240 * copying of random kernel data.
1242 for (n = nat_instances; n; n = n->nat_next)
1243 if (n == nat)
1244 break;
1245 if (n == NULL) {
1246 if (getlock) {
1247 RWLOCK_EXIT(&ipf_nat);
1249 return ESRCH;
1254 * Incluse any space required for proxy data structures.
1256 ng.ng_sz = sizeof(nat_save_t);
1257 aps = nat->nat_aps;
1258 if (aps != NULL) {
1259 ng.ng_sz += sizeof(ap_session_t) - 4;
1260 if (aps->aps_data != 0)
1261 ng.ng_sz += aps->aps_psiz;
1263 if (getlock) {
1264 RWLOCK_EXIT(&ipf_nat);
1267 if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1268 return EFAULT;
1269 return 0;
1273 /* ------------------------------------------------------------------------ */
1274 /* Function: fr_natgetent */
1275 /* Returns: int - 0 == success, != 0 is the error value. */
1276 /* Parameters: data(I) - pointer to natget structure with kernel pointer */
1277 /* to NAT structure to copy out. */
1278 /* */
1279 /* Handle SIOCSTGET. */
1280 /* Copies out NAT entry to user space. Any additional data held for a */
1281 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1282 /* ------------------------------------------------------------------------ */
1283 static int fr_natgetent(data, getlock)
1284 void * data;
1285 int getlock;
1287 int error, outsize;
1288 ap_session_t *aps;
1289 nat_save_t *ipn, ipns;
1290 nat_t *n, *nat;
1292 error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1293 if (error != 0)
1294 return error;
1296 if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1297 return EINVAL;
1299 KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1300 if (ipn == NULL)
1301 return ENOMEM;
1303 if (getlock) {
1304 READ_ENTER(&ipf_nat);
1307 ipn->ipn_dsize = ipns.ipn_dsize;
1308 nat = ipns.ipn_next;
1309 if (nat == NULL) {
1310 nat = nat_instances;
1311 if (nat == NULL) {
1312 if (nat_instances == NULL)
1313 error = ENOENT;
1314 goto finished;
1316 } else {
1318 * Make sure the pointer we're copying from exists in the
1319 * current list of entries. Security precaution to prevent
1320 * copying of random kernel data.
1322 for (n = nat_instances; n; n = n->nat_next)
1323 if (n == nat)
1324 break;
1325 if (n == NULL) {
1326 error = ESRCH;
1327 goto finished;
1330 ipn->ipn_next = nat->nat_next;
1333 * Copy the NAT structure.
1335 bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1338 * If we have a pointer to the NAT rule it belongs to, save that too.
1340 if (nat->nat_ptr != NULL)
1341 bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1342 sizeof(ipn->ipn_ipnat));
1345 * If we also know the NAT entry has an associated filter rule,
1346 * save that too.
1348 if (nat->nat_fr != NULL)
1349 bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1350 sizeof(ipn->ipn_fr));
1353 * Last but not least, if there is an application proxy session set
1354 * up for this NAT entry, then copy that out too, including any
1355 * private data saved along side it by the proxy.
1357 aps = nat->nat_aps;
1358 outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1359 if (aps != NULL) {
1360 char *s;
1362 if (outsize < sizeof(*aps)) {
1363 error = ENOBUFS;
1364 goto finished;
1367 s = ipn->ipn_data;
1368 bcopy((char *)aps, s, sizeof(*aps));
1369 s += sizeof(*aps);
1370 outsize -= sizeof(*aps);
1371 if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1372 bcopy(aps->aps_data, s, aps->aps_psiz);
1373 else
1374 error = ENOBUFS;
1376 if (error == 0) {
1377 if (getlock) {
1378 RWLOCK_EXIT(&ipf_nat);
1379 getlock = 0;
1381 error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1384 finished:
1385 if (getlock) {
1386 RWLOCK_EXIT(&ipf_nat);
1388 if (ipn != NULL) {
1389 KFREES(ipn, ipns.ipn_dsize);
1391 return error;
1395 /* ------------------------------------------------------------------------ */
1396 /* Function: fr_natputent */
1397 /* Returns: int - 0 == success, != 0 is the error value. */
1398 /* Parameters: data(I) - pointer to natget structure with NAT */
1399 /* structure information to load into the kernel */
1400 /* getlock(I) - flag indicating whether or not a write lock */
1401 /* on ipf_nat is already held. */
1402 /* */
1403 /* Handle SIOCSTPUT. */
1404 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1405 /* firewall rule data structures, if pointers to them indicate so. */
1406 /* ------------------------------------------------------------------------ */
1407 static int fr_natputent(data, getlock)
1408 void * data;
1409 int getlock;
1411 nat_save_t *ipn, *ipnn;
1412 ap_session_t *aps;
1413 nat_t *n, *nat;
1414 frentry_t *fr;
1415 fr_info_t *fin;
1416 ipnat_t *in;
1417 int error;
1420 * Initialise early because of code at junkput label.
1422 in = NULL;
1423 aps = NULL;
1424 nat = NULL;
1425 ipnn = NULL;
1426 fin = NULL;
1427 fr = NULL;
1429 KMALLOC(ipn, nat_save_t *);
1430 if (ipn == NULL)
1431 return ENOMEM;
1432 error = fr_inobj(data, ipn, IPFOBJ_NATSAVE);
1433 if (error != 0)
1434 goto junkput;
1437 * New entry, copy in the rest of the NAT entry if it's size is more
1438 * than just the nat_t structure.
1440 if (ipn->ipn_dsize > sizeof(*ipn)) {
1441 if (ipn->ipn_dsize > 81920) {
1442 error = ENOMEM;
1443 goto junkput;
1446 KMALLOCS(ipnn, nat_save_t *, ipn->ipn_dsize);
1447 if (ipnn == NULL) {
1448 KFREE(ipn);
1449 return ENOMEM;
1452 error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn->ipn_dsize);
1453 if (error != 0) {
1454 error = EFAULT;
1455 goto junkput;
1457 } else
1458 ipnn = ipn;
1460 KMALLOC(nat, nat_t *);
1461 if (nat == NULL) {
1462 error = ENOMEM;
1463 goto junkput;
1466 bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1468 * Initialize all these so that nat_delete() doesn't cause a crash.
1470 bzero((char *)nat, offsetof(struct nat, nat_tqe));
1471 nat->nat_tqe.tqe_pnext = NULL;
1472 nat->nat_tqe.tqe_next = NULL;
1473 nat->nat_tqe.tqe_ifq = NULL;
1474 nat->nat_tqe.tqe_parent = nat;
1477 * Restore the rule associated with this nat session
1479 in = ipnn->ipn_nat.nat_ptr;
1480 if (in != NULL) {
1481 KMALLOC(in, ipnat_t *);
1482 nat->nat_ptr = in;
1483 if (in == NULL) {
1484 error = ENOMEM;
1485 goto junkput;
1487 bzero((char *)in, offsetof(struct ipnat, in_next6));
1488 bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1489 in->in_use = 1;
1490 in->in_flags |= IPN_DELETE;
1492 ATOMIC_INC(nat_stats.ns_rules);
1494 if (nat_resolverule(in) != 0) {
1495 error = ESRCH;
1496 goto junkput;
1501 * Check that the NAT entry doesn't already exist in the kernel.
1503 * For NAT_OUTBOUND, we're lookup for a duplicate MAP entry. To do
1504 * this, we check to see if the inbound combination of addresses and
1505 * ports is already known. Similar logic is applied for NAT_INBOUND.
1508 KMALLOC(fin, fr_info_t *);
1509 if (fin == NULL) {
1510 error = ENOMEM;
1511 goto junkput;
1513 bzero(fin, sizeof(*fin));
1514 fin->fin_p = nat->nat_p;
1515 fin->fin_ifp = nat->nat_ifps[0];
1516 if (nat->nat_dir == NAT_OUTBOUND) {
1517 fin->fin_data[0] = ntohs(nat->nat_oport);
1518 fin->fin_data[1] = ntohs(nat->nat_outport);
1519 fin->fin_ifp = nat->nat_ifps[0];
1520 if (getlock) {
1521 READ_ENTER(&ipf_nat);
1523 n = nat_inlookup(fin, nat->nat_flags, fin->fin_p,
1524 nat->nat_oip, nat->nat_inip);
1525 if (getlock) {
1526 RWLOCK_EXIT(&ipf_nat);
1528 if (n != NULL) {
1529 error = EEXIST;
1530 goto junkput;
1532 } else if (nat->nat_dir == NAT_INBOUND) {
1533 fin->fin_data[0] = ntohs(nat->nat_inport);
1534 fin->fin_data[1] = ntohs(nat->nat_oport);
1535 fin->fin_ifp = nat->nat_ifps[0];
1536 if (getlock) {
1537 READ_ENTER(&ipf_nat);
1539 n = nat_outlookup(fin, nat->nat_flags, fin->fin_p,
1540 nat->nat_outip, nat->nat_oip);
1541 if (getlock) {
1542 RWLOCK_EXIT(&ipf_nat);
1544 if (n != NULL) {
1545 error = EEXIST;
1546 goto junkput;
1548 } else {
1549 error = EINVAL;
1550 goto junkput;
1554 * Restore ap_session_t structure. Include the private data allocated
1555 * if it was there.
1557 aps = nat->nat_aps;
1558 if (aps != NULL) {
1559 KMALLOC(aps, ap_session_t *);
1560 nat->nat_aps = aps;
1561 if (aps == NULL) {
1562 error = ENOMEM;
1563 goto junkput;
1565 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1566 if (in != NULL)
1567 aps->aps_apr = in->in_apr;
1568 else
1569 aps->aps_apr = NULL;
1570 if (aps->aps_psiz != 0) {
1571 if (aps->aps_psiz > 81920) {
1572 error = ENOMEM;
1573 goto junkput;
1575 KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1576 if (aps->aps_data == NULL) {
1577 error = ENOMEM;
1578 goto junkput;
1580 bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1581 aps->aps_psiz);
1582 } else {
1583 aps->aps_psiz = 0;
1584 aps->aps_data = NULL;
1589 * If there was a filtering rule associated with this entry then
1590 * build up a new one.
1592 fr = nat->nat_fr;
1593 if (fr != NULL) {
1594 if ((nat->nat_flags & SI_NEWFR) != 0) {
1595 KMALLOC(fr, frentry_t *);
1596 nat->nat_fr = fr;
1597 if (fr == NULL) {
1598 error = ENOMEM;
1599 goto junkput;
1601 ipnn->ipn_nat.nat_fr = fr;
1602 fr->fr_ref = 1;
1603 (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1604 bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1606 fr->fr_ref = 1;
1607 fr->fr_dsize = 0;
1608 fr->fr_data = NULL;
1609 fr->fr_type = FR_T_NONE;
1611 MUTEX_NUKE(&fr->fr_lock);
1612 MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1613 } else {
1614 if (getlock) {
1615 READ_ENTER(&ipf_nat);
1617 for (n = nat_instances; n; n = n->nat_next)
1618 if (n->nat_fr == fr)
1619 break;
1621 if (n != NULL) {
1622 MUTEX_ENTER(&fr->fr_lock);
1623 fr->fr_ref++;
1624 MUTEX_EXIT(&fr->fr_lock);
1626 if (getlock) {
1627 RWLOCK_EXIT(&ipf_nat);
1630 if (!n) {
1631 error = ESRCH;
1632 goto junkput;
1637 if (ipnn != ipn) {
1638 KFREES(ipnn, ipn->ipn_dsize);
1639 ipnn = NULL;
1642 if (getlock) {
1643 WRITE_ENTER(&ipf_nat);
1645 error = nat_insert(nat, nat->nat_rev);
1646 if ((error == 0) && (aps != NULL)) {
1647 aps->aps_next = ap_sess_list;
1648 ap_sess_list = aps;
1650 if (getlock) {
1651 RWLOCK_EXIT(&ipf_nat);
1654 if (error == 0)
1655 return 0;
1657 error = ENOMEM;
1659 junkput:
1660 if (fin != NULL)
1661 KFREE(fin);
1662 if (fr != NULL)
1663 (void) fr_derefrule(&fr);
1665 if ((ipnn != NULL) && (ipnn != ipn)) {
1666 KFREES(ipnn, ipn->ipn_dsize);
1668 if (ipn != NULL)
1669 KFREE(ipn);
1670 if (nat != NULL) {
1671 if (aps != NULL) {
1672 if (aps->aps_data != NULL) {
1673 KFREES(aps->aps_data, aps->aps_psiz);
1675 KFREE(aps);
1677 if (in != NULL) {
1678 if (in->in_apr)
1679 appr_free(in->in_apr);
1680 KFREE(in);
1682 KFREE(nat);
1684 return error;
1688 /* ------------------------------------------------------------------------ */
1689 /* Function: nat_delete */
1690 /* Returns: Nil */
1691 /* Parameters: natd(I) - pointer to NAT structure to delete */
1692 /* logtype(I) - type of LOG record to create before deleting */
1693 /* Write Lock: ipf_nat */
1694 /* */
1695 /* Delete a nat entry from the various lists and table. If NAT logging is */
1696 /* enabled then generate a NAT log record for this event. */
1697 /* ------------------------------------------------------------------------ */
1698 void nat_delete(nat, logtype)
1699 struct nat *nat;
1700 int logtype;
1702 struct ipnat *ipn;
1703 int removed = 0;
1705 if (logtype != 0 && nat_logging != 0)
1706 nat_log(nat, logtype);
1707 #if defined(NEED_LOCAL_RAND) && defined(_KERNEL)
1708 ipf_rand_push(nat, sizeof(*nat));
1709 #endif
1712 * Take it as a general indication that all the pointers are set if
1713 * nat_pnext is set.
1715 if (nat->nat_pnext != NULL) {
1716 removed = 1;
1718 nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1719 nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1721 *nat->nat_pnext = nat->nat_next;
1722 if (nat->nat_next != NULL) {
1723 nat->nat_next->nat_pnext = nat->nat_pnext;
1724 nat->nat_next = NULL;
1726 nat->nat_pnext = NULL;
1728 *nat->nat_phnext[0] = nat->nat_hnext[0];
1729 if (nat->nat_hnext[0] != NULL) {
1730 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1731 nat->nat_hnext[0] = NULL;
1733 nat->nat_phnext[0] = NULL;
1735 *nat->nat_phnext[1] = nat->nat_hnext[1];
1736 if (nat->nat_hnext[1] != NULL) {
1737 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1738 nat->nat_hnext[1] = NULL;
1740 nat->nat_phnext[1] = NULL;
1742 if ((nat->nat_flags & SI_WILDP) != 0)
1743 nat_stats.ns_wilds--;
1746 if (nat->nat_me != NULL) {
1747 *nat->nat_me = NULL;
1748 nat->nat_me = NULL;
1751 if (nat->nat_tqe.tqe_ifq != NULL)
1752 fr_deletequeueentry(&nat->nat_tqe);
1754 if (logtype == NL_EXPIRE)
1755 nat_stats.ns_expire++;
1757 MUTEX_ENTER(&nat->nat_lock);
1759 * NL_DESTROY should only be passed in when we've got nat_ref >= 2.
1760 * This happens when a nat'd packet is blocked and we want to throw
1761 * away the NAT session.
1763 if (logtype == NL_DESTROY) {
1764 if (nat->nat_ref > 2) {
1765 nat->nat_ref -= 2;
1766 MUTEX_EXIT(&nat->nat_lock);
1767 if (removed)
1768 nat_stats.ns_orphans++;
1769 return;
1771 } else if (nat->nat_ref > 1) {
1772 nat->nat_ref--;
1773 MUTEX_EXIT(&nat->nat_lock);
1774 if (removed)
1775 nat_stats.ns_orphans++;
1776 return;
1778 MUTEX_EXIT(&nat->nat_lock);
1781 * At this point, nat_ref is 1, doing "--" would make it 0..
1783 nat->nat_ref = 0;
1784 if (!removed)
1785 nat_stats.ns_orphans--;
1787 #ifdef IPFILTER_SYNC
1788 if (nat->nat_sync)
1789 ipfsync_del(nat->nat_sync);
1790 #endif
1792 if (nat->nat_fr != NULL)
1793 (void) fr_derefrule(&nat->nat_fr);
1795 if (nat->nat_hm != NULL)
1796 fr_hostmapdel(&nat->nat_hm);
1799 * If there is an active reference from the nat entry to its parent
1800 * rule, decrement the rule's reference count and free it too if no
1801 * longer being used.
1803 ipn = nat->nat_ptr;
1804 if (ipn != NULL) {
1805 fr_ipnatderef(&ipn);
1808 MUTEX_DESTROY(&nat->nat_lock);
1810 aps_free(nat->nat_aps);
1811 nat_stats.ns_inuse--;
1814 * If there's a fragment table entry too for this nat entry, then
1815 * dereference that as well. This is after nat_lock is released
1816 * because of Tru64.
1818 fr_forgetnat((void *)nat);
1820 KFREE(nat);
1824 /* ------------------------------------------------------------------------ */
1825 /* Function: nat_flushtable */
1826 /* Returns: int - number of NAT rules deleted */
1827 /* Parameters: Nil */
1828 /* */
1829 /* Deletes all currently active NAT sessions. In deleting each NAT entry a */
1830 /* log record should be emitted in nat_delete() if NAT logging is enabled. */
1831 /* ------------------------------------------------------------------------ */
1833 * nat_flushtable - clear the NAT table of all mapping entries.
1835 static int nat_flushtable()
1837 nat_t *nat;
1838 int j = 0;
1841 * ALL NAT mappings deleted, so lets just make the deletions
1842 * quicker.
1844 if (nat_table[0] != NULL)
1845 bzero((char *)nat_table[0],
1846 sizeof(nat_table[0]) * ipf_nattable_sz);
1847 if (nat_table[1] != NULL)
1848 bzero((char *)nat_table[1],
1849 sizeof(nat_table[1]) * ipf_nattable_sz);
1851 while ((nat = nat_instances) != NULL) {
1852 nat_delete(nat, NL_FLUSH);
1853 j++;
1856 return j;
1860 /* ------------------------------------------------------------------------ */
1861 /* Function: nat_clearlist */
1862 /* Returns: int - number of NAT/RDR rules deleted */
1863 /* Parameters: Nil */
1864 /* */
1865 /* Delete all rules in the current list of rules. There is nothing elegant */
1866 /* about this cleanup: simply free all entries on the list of rules and */
1867 /* clear out the tables used for hashed NAT rule lookups. */
1868 /* ------------------------------------------------------------------------ */
1869 static int nat_clearlist()
1871 ipnat_t *n, **np = &nat_list;
1872 int i = 0;
1874 if (nat_rules != NULL)
1875 bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1876 if (rdr_rules != NULL)
1877 bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1879 while ((n = *np) != NULL) {
1880 *np = n->in_next;
1881 if (n->in_use == 0) {
1882 if (n->in_apr != NULL)
1883 appr_free(n->in_apr);
1884 MUTEX_DESTROY(&n->in_lock);
1885 KFREE(n);
1886 nat_stats.ns_rules--;
1887 } else {
1888 n->in_flags |= IPN_DELETE;
1889 n->in_next = NULL;
1891 i++;
1893 #if SOLARIS && !defined(_INET_IP_STACK_H)
1894 pfil_delayed_copy = 1;
1895 #endif
1896 nat_masks = 0;
1897 rdr_masks = 0;
1898 return i;
1902 /* ------------------------------------------------------------------------ */
1903 /* Function: nat_newmap */
1904 /* Returns: int - -1 == error, 0 == success */
1905 /* Parameters: fin(I) - pointer to packet information */
1906 /* nat(I) - pointer to NAT entry */
1907 /* ni(I) - pointer to structure with misc. information needed */
1908 /* to create new NAT entry. */
1909 /* */
1910 /* Given an empty NAT structure, populate it with new information about a */
1911 /* new NAT session, as defined by the matching NAT rule. */
1912 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1913 /* to the new IP address for the translation. */
1914 /* ------------------------------------------------------------------------ */
1915 static INLINE int nat_newmap(fin, nat, ni)
1916 fr_info_t *fin;
1917 nat_t *nat;
1918 natinfo_t *ni;
1920 u_short st_port, dport, sport, port, sp, dp;
1921 struct in_addr in, inb;
1922 hostmap_t *hm;
1923 u_32_t flags;
1924 u_32_t st_ip;
1925 ipnat_t *np;
1926 nat_t *natl;
1927 int l;
1930 * If it's an outbound packet which doesn't match any existing
1931 * record, then create a new port
1933 l = 0;
1934 hm = NULL;
1935 np = ni->nai_np;
1936 st_ip = np->in_nip;
1937 st_port = np->in_pnext;
1938 flags = ni->nai_flags;
1939 sport = ni->nai_sport;
1940 dport = ni->nai_dport;
1943 * Do a loop until we either run out of entries to try or we find
1944 * a NAT mapping that isn't currently being used. This is done
1945 * because the change to the source is not (usually) being fixed.
1947 do {
1948 port = 0;
1949 in.s_addr = htonl(np->in_nip);
1950 if (l == 0) {
1952 * Check to see if there is an existing NAT
1953 * setup for this IP address pair.
1955 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1956 in, 0);
1957 if (hm != NULL)
1958 in.s_addr = hm->hm_mapip.s_addr;
1959 } else if ((l == 1) && (hm != NULL)) {
1960 fr_hostmapdel(&hm);
1962 in.s_addr = ntohl(in.s_addr);
1964 nat->nat_hm = hm;
1966 if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1967 if (l > 0)
1968 return -1;
1971 if (np->in_redir == NAT_BIMAP &&
1972 np->in_inmsk == np->in_outmsk) {
1974 * map the address block in a 1:1 fashion
1976 in.s_addr = np->in_outip;
1977 in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1978 in.s_addr = ntohl(in.s_addr);
1980 } else if (np->in_redir & NAT_MAPBLK) {
1981 if ((l >= np->in_ppip) || ((l > 0) &&
1982 !(flags & IPN_TCPUDP)))
1983 return -1;
1985 * map-block - Calculate destination address.
1987 in.s_addr = ntohl(fin->fin_saddr);
1988 in.s_addr &= ntohl(~np->in_inmsk);
1989 inb.s_addr = in.s_addr;
1990 in.s_addr /= np->in_ippip;
1991 in.s_addr &= ntohl(~np->in_outmsk);
1992 in.s_addr += ntohl(np->in_outip);
1994 * Calculate destination port.
1996 if ((flags & IPN_TCPUDP) &&
1997 (np->in_ppip != 0)) {
1998 port = ntohs(sport) + l;
1999 port %= np->in_ppip;
2000 port += np->in_ppip *
2001 (inb.s_addr % np->in_ippip);
2002 port += MAPBLK_MINPORT;
2003 port = htons(port);
2006 } else if ((np->in_outip == 0) &&
2007 (np->in_outmsk == 0xffffffff)) {
2009 * 0/32 - use the interface's IP address.
2011 if ((l > 0) ||
2012 fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
2013 &in, NULL) == -1)
2014 return -1;
2015 in.s_addr = ntohl(in.s_addr);
2017 } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
2019 * 0/0 - use the original source address/port.
2021 if (l > 0)
2022 return -1;
2023 in.s_addr = ntohl(fin->fin_saddr);
2025 } else if ((np->in_outmsk != 0xffffffff) &&
2026 (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
2027 np->in_nip++;
2029 natl = NULL;
2031 if ((flags & IPN_TCPUDP) &&
2032 ((np->in_redir & NAT_MAPBLK) == 0) &&
2033 (np->in_flags & IPN_AUTOPORTMAP)) {
2035 * "ports auto" (without map-block)
2037 if ((l > 0) && (l % np->in_ppip == 0)) {
2038 if (l > np->in_space) {
2039 return -1;
2040 } else if ((l > np->in_ppip) &&
2041 np->in_outmsk != 0xffffffff)
2042 np->in_nip++;
2044 if (np->in_ppip != 0) {
2045 port = ntohs(sport);
2046 port += (l % np->in_ppip);
2047 port %= np->in_ppip;
2048 port += np->in_ppip *
2049 (ntohl(fin->fin_saddr) %
2050 np->in_ippip);
2051 port += MAPBLK_MINPORT;
2052 port = htons(port);
2055 } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2056 (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2058 * Standard port translation. Select next port.
2060 if (np->in_flags & IPN_SEQUENTIAL) {
2061 port = np->in_pnext;
2062 } else {
2063 port = ipf_random() % (ntohs(np->in_pmax) -
2064 ntohs(np->in_pmin));
2065 port += ntohs(np->in_pmin);
2067 port = htons(port);
2068 np->in_pnext++;
2070 if (np->in_pnext > ntohs(np->in_pmax)) {
2071 np->in_pnext = ntohs(np->in_pmin);
2072 if (np->in_outmsk != 0xffffffff)
2073 np->in_nip++;
2077 if (np->in_flags & IPN_IPRANGE) {
2078 if (np->in_nip > ntohl(np->in_outmsk))
2079 np->in_nip = ntohl(np->in_outip);
2080 } else {
2081 if ((np->in_outmsk != 0xffffffff) &&
2082 ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2083 ntohl(np->in_outip))
2084 np->in_nip = ntohl(np->in_outip) + 1;
2087 if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2088 port = sport;
2091 * Here we do a lookup of the connection as seen from
2092 * the outside. If an IP# pair already exists, try
2093 * again. So if you have A->B becomes C->B, you can
2094 * also have D->E become C->E but not D->B causing
2095 * another C->B. Also take protocol and ports into
2096 * account when determining whether a pre-existing
2097 * NAT setup will cause an external conflict where
2098 * this is appropriate.
2100 inb.s_addr = htonl(in.s_addr);
2101 sp = fin->fin_data[0];
2102 dp = fin->fin_data[1];
2103 fin->fin_data[0] = fin->fin_data[1];
2104 fin->fin_data[1] = htons(port);
2105 natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2106 (u_int)fin->fin_p, fin->fin_dst, inb);
2107 fin->fin_data[0] = sp;
2108 fin->fin_data[1] = dp;
2111 * Has the search wrapped around and come back to the
2112 * start ?
2114 if ((natl != NULL) &&
2115 (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2116 (np->in_nip != 0) && (st_ip == np->in_nip))
2117 return -1;
2118 l++;
2119 } while (natl != NULL);
2121 if (np->in_space > 0)
2122 np->in_space--;
2124 /* Setup the NAT table */
2125 nat->nat_inip = fin->fin_src;
2126 nat->nat_outip.s_addr = htonl(in.s_addr);
2127 nat->nat_oip = fin->fin_dst;
2128 if (nat->nat_hm == NULL)
2129 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2130 nat->nat_outip, 0);
2133 * The ICMP checksum does not have a pseudo header containing
2134 * the IP addresses
2136 ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2137 ni->nai_sum2 = LONG_SUM(in.s_addr);
2138 if ((flags & IPN_TCPUDP)) {
2139 ni->nai_sum1 += ntohs(sport);
2140 ni->nai_sum2 += ntohs(port);
2143 if (flags & IPN_TCPUDP) {
2144 nat->nat_inport = sport;
2145 nat->nat_outport = port; /* sport */
2146 nat->nat_oport = dport;
2147 ((tcphdr_t *)fin->fin_dp)->th_sport = port;
2148 } else if (flags & IPN_ICMPQUERY) {
2149 ((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2150 nat->nat_inport = port;
2151 nat->nat_outport = port;
2152 #if 0
2153 } else if (fin->fin_p == IPPROTO_GRE) {
2154 nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2155 if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2156 nat->nat_oport = 0;/*fin->fin_data[1];*/
2157 nat->nat_inport = 0;/*fin->fin_data[0];*/
2158 nat->nat_outport = 0;/*fin->fin_data[0];*/
2159 nat->nat_call[0] = fin->fin_data[0];
2160 nat->nat_call[1] = fin->fin_data[0];
2162 #endif
2164 ni->nai_ip.s_addr = in.s_addr;
2165 ni->nai_port = port;
2166 ni->nai_nport = dport;
2167 return 0;
2171 /* ------------------------------------------------------------------------ */
2172 /* Function: nat_newrdr */
2173 /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */
2174 /* allow rule to be moved if IPN_ROUNDR is set. */
2175 /* Parameters: fin(I) - pointer to packet information */
2176 /* nat(I) - pointer to NAT entry */
2177 /* ni(I) - pointer to structure with misc. information needed */
2178 /* to create new NAT entry. */
2179 /* */
2180 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2181 /* to the new IP address for the translation. */
2182 /* ------------------------------------------------------------------------ */
2183 static INLINE int nat_newrdr(fin, nat, ni)
2184 fr_info_t *fin;
2185 nat_t *nat;
2186 natinfo_t *ni;
2188 u_short nport, dport, sport;
2189 struct in_addr in, inb;
2190 u_short sp, dp;
2191 hostmap_t *hm;
2192 u_32_t flags;
2193 ipnat_t *np;
2194 nat_t *natl;
2195 int move;
2197 move = 1;
2198 hm = NULL;
2199 in.s_addr = 0;
2200 np = ni->nai_np;
2201 flags = ni->nai_flags;
2202 sport = ni->nai_sport;
2203 dport = ni->nai_dport;
2206 * If the matching rule has IPN_STICKY set, then we want to have the
2207 * same rule kick in as before. Why would this happen? If you have
2208 * a collection of rdr rules with "round-robin sticky", the current
2209 * packet might match a different one to the previous connection but
2210 * we want the same destination to be used.
2212 if (((np->in_flags & (IPN_ROUNDR|IPN_SPLIT)) != 0) &&
2213 ((np->in_flags & IPN_STICKY) != 0)) {
2214 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2215 (u_32_t)dport);
2216 if (hm != NULL) {
2217 in.s_addr = ntohl(hm->hm_mapip.s_addr);
2218 np = hm->hm_ipnat;
2219 ni->nai_np = np;
2220 move = 0;
2225 * Otherwise, it's an inbound packet. Most likely, we don't
2226 * want to rewrite source ports and source addresses. Instead,
2227 * we want to rewrite to a fixed internal address and fixed
2228 * internal port.
2230 if (np->in_flags & IPN_SPLIT) {
2231 in.s_addr = np->in_nip;
2233 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2234 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst,
2235 in, (u_32_t)dport);
2236 if (hm != NULL) {
2237 in.s_addr = hm->hm_mapip.s_addr;
2238 move = 0;
2242 if (hm == NULL || hm->hm_ref == 1) {
2243 if (np->in_inip == htonl(in.s_addr)) {
2244 np->in_nip = ntohl(np->in_inmsk);
2245 move = 0;
2246 } else {
2247 np->in_nip = ntohl(np->in_inip);
2251 } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2253 * 0/32 - use the interface's IP address.
2255 if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL) == -1)
2256 return -1;
2257 in.s_addr = ntohl(in.s_addr);
2259 } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2261 * 0/0 - use the original destination address/port.
2263 in.s_addr = ntohl(fin->fin_daddr);
2265 } else if (np->in_redir == NAT_BIMAP &&
2266 np->in_inmsk == np->in_outmsk) {
2268 * map the address block in a 1:1 fashion
2270 in.s_addr = np->in_inip;
2271 in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2272 in.s_addr = ntohl(in.s_addr);
2273 } else {
2274 in.s_addr = ntohl(np->in_inip);
2277 if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2278 nport = dport;
2279 else {
2281 * Whilst not optimized for the case where
2282 * pmin == pmax, the gain is not significant.
2284 if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2285 (np->in_pmin != np->in_pmax)) {
2286 nport = ntohs(dport) - ntohs(np->in_pmin) +
2287 ntohs(np->in_pnext);
2288 nport = htons(nport);
2289 } else
2290 nport = np->in_pnext;
2294 * When the redirect-to address is set to 0.0.0.0, just
2295 * assume a blank `forwarding' of the packet. We don't
2296 * setup any translation for this either.
2298 if (in.s_addr == 0) {
2299 if (nport == dport)
2300 return -1;
2301 in.s_addr = ntohl(fin->fin_daddr);
2305 * Check to see if this redirect mapping already exists and if
2306 * it does, return "failure" (allowing it to be created will just
2307 * cause one or both of these "connections" to stop working.)
2309 inb.s_addr = htonl(in.s_addr);
2310 sp = fin->fin_data[0];
2311 dp = fin->fin_data[1];
2312 fin->fin_data[1] = fin->fin_data[0];
2313 fin->fin_data[0] = ntohs(nport);
2314 natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2315 (u_int)fin->fin_p, inb, fin->fin_src);
2316 fin->fin_data[0] = sp;
2317 fin->fin_data[1] = dp;
2318 if (natl != NULL)
2319 return -1;
2321 nat->nat_inip.s_addr = htonl(in.s_addr);
2322 nat->nat_outip = fin->fin_dst;
2323 nat->nat_oip = fin->fin_src;
2324 if ((nat->nat_hm == NULL) && ((np->in_flags & IPN_STICKY) != 0))
2325 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, in,
2326 (u_32_t)dport);
2328 ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
2329 ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
2331 ni->nai_ip.s_addr = in.s_addr;
2332 ni->nai_nport = nport;
2333 ni->nai_port = sport;
2335 if (flags & IPN_TCPUDP) {
2336 nat->nat_inport = nport;
2337 nat->nat_outport = dport;
2338 nat->nat_oport = sport;
2339 ((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2340 } else if (flags & IPN_ICMPQUERY) {
2341 ((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2342 nat->nat_inport = nport;
2343 nat->nat_outport = nport;
2344 #if 0
2345 } else if (fin->fin_p == IPPROTO_GRE) {
2346 nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2347 if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2348 nat->nat_call[0] = fin->fin_data[0];
2349 nat->nat_call[1] = fin->fin_data[1];
2350 nat->nat_oport = 0; /*fin->fin_data[0];*/
2351 nat->nat_inport = 0; /*fin->fin_data[1];*/
2352 nat->nat_outport = 0; /*fin->fin_data[1];*/
2354 #endif
2357 return move;
2360 /* ------------------------------------------------------------------------ */
2361 /* Function: nat_new */
2362 /* Returns: nat_t* - NULL == failure to create new NAT structure, */
2363 /* else pointer to new NAT structure */
2364 /* Parameters: fin(I) - pointer to packet information */
2365 /* np(I) - pointer to NAT rule */
2366 /* natsave(I) - pointer to where to store NAT struct pointer */
2367 /* flags(I) - flags describing the current packet */
2368 /* direction(I) - direction of packet (in/out) */
2369 /* Write Lock: ipf_nat */
2370 /* */
2371 /* Attempts to create a new NAT entry. Does not actually change the packet */
2372 /* in any way. */
2373 /* */
2374 /* This fucntion is in three main parts: (1) deal with creating a new NAT */
2375 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */
2376 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2377 /* and (3) building that structure and putting it into the NAT table(s). */
2378 /* */
2379 /* NOTE: natsave should NOT be used top point back to an ipstate_t struct */
2380 /* as it can result in memory being corrupted. */
2381 /* ------------------------------------------------------------------------ */
2382 nat_t *nat_new(fin, np, natsave, flags, direction)
2383 fr_info_t *fin;
2384 ipnat_t *np;
2385 nat_t **natsave;
2386 u_int flags;
2387 int direction;
2389 u_short port = 0, sport = 0, dport = 0, nport = 0;
2390 tcphdr_t *tcp = NULL;
2391 hostmap_t *hm = NULL;
2392 struct in_addr in;
2393 nat_t *nat, *natl;
2394 u_int nflags;
2395 natinfo_t ni;
2396 u_32_t sumd;
2397 int move;
2398 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2399 qpktinfo_t *qpi = fin->fin_qpi;
2400 #endif
2402 memset(&ni, 0, sizeof ni); /* XXX gcc */
2404 if (nat_stats.ns_inuse >= ipf_nattable_max) {
2405 nat_stats.ns_memfail++;
2406 fr_nat_doflush = 1;
2407 return NULL;
2410 move = 1;
2411 nflags = np->in_flags & flags;
2412 nflags &= NAT_FROMRULE;
2414 ni.nai_np = np;
2415 ni.nai_nflags = nflags;
2416 ni.nai_flags = flags;
2417 ni.nai_dport = 0;
2418 ni.nai_sport = 0;
2420 /* Give me a new nat */
2421 KMALLOC(nat, nat_t *);
2422 if (nat == NULL) {
2423 nat_stats.ns_memfail++;
2425 * Try to automatically tune the max # of entries in the
2426 * table allowed to be less than what will cause kmem_alloc()
2427 * to fail and try to eliminate panics due to out of memory
2428 * conditions arising.
2430 if (ipf_nattable_max > ipf_nattable_sz) {
2431 ipf_nattable_max = nat_stats.ns_inuse - 100;
2432 printf("ipf_nattable_max reduced to %d\n",
2433 ipf_nattable_max);
2435 return NULL;
2438 if (flags & IPN_TCPUDP) {
2439 tcp = fin->fin_dp;
2440 ni.nai_sport = htons(fin->fin_sport);
2441 ni.nai_dport = htons(fin->fin_dport);
2442 } else if (flags & IPN_ICMPQUERY) {
2444 * In the ICMP query NAT code, we translate the ICMP id fields
2445 * to make them unique. This is indepedent of the ICMP type
2446 * (e.g. in the unlikely event that a host sends an echo and
2447 * an tstamp request with the same id, both packets will have
2448 * their ip address/id field changed in the same way).
2450 /* The icmp_id field is used by the sender to identify the
2451 * process making the icmp request. (the receiver justs
2452 * copies it back in its response). So, it closely matches
2453 * the concept of source port. We overlay sport, so we can
2454 * maximally reuse the existing code.
2456 ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2457 ni.nai_dport = ni.nai_sport;
2460 bzero((char *)nat, sizeof(*nat));
2461 nat->nat_flags = flags;
2462 nat->nat_redir = np->in_redir;
2465 * Search the current table for a match.
2467 if (direction == NAT_OUTBOUND) {
2469 * We can now arrange to call this for the same connection
2470 * because ipf_nat_new doesn't protect the code path into
2471 * this function.
2473 natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2474 fin->fin_src, fin->fin_dst);
2475 if (natl != NULL) {
2476 KFREE(nat);
2477 nat = natl;
2478 goto done;
2481 move = nat_newmap(fin, nat, &ni);
2482 if (move == -1)
2483 goto badnat;
2485 np = ni.nai_np;
2486 in = ni.nai_ip;
2487 } else {
2489 * NAT_INBOUND is used only for redirects rules
2491 natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2492 fin->fin_src, fin->fin_dst);
2493 if (natl != NULL) {
2494 KFREE(nat);
2495 nat = natl;
2496 goto done;
2499 move = nat_newrdr(fin, nat, &ni);
2500 if (move == -1)
2501 goto badnat;
2503 np = ni.nai_np;
2504 in = ni.nai_ip;
2506 port = ni.nai_port;
2507 nport = ni.nai_nport;
2509 if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2510 if (np->in_redir == NAT_REDIRECT) {
2511 nat_delrdr(np);
2512 nat_addrdr(np);
2513 } else if (np->in_redir == NAT_MAP) {
2514 nat_delnat(np);
2515 nat_addnat(np);
2519 if (flags & IPN_TCPUDP) {
2520 sport = ni.nai_sport;
2521 dport = ni.nai_dport;
2522 } else if (flags & IPN_ICMPQUERY) {
2523 sport = ni.nai_sport;
2524 dport = 0;
2527 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2528 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2529 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2530 if ((flags & IPN_TCP) && dohwcksum &&
2531 (((ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
2532 if (direction == NAT_OUTBOUND)
2533 ni.nai_sum1 = LONG_SUM(in.s_addr);
2534 else
2535 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2536 ni.nai_sum1 += LONG_SUM(ntohl(fin->fin_daddr));
2537 ni.nai_sum1 += 30;
2538 ni.nai_sum1 = (ni.nai_sum1 & 0xffff) + (ni.nai_sum1 >> 16);
2539 nat->nat_sumd[1] = NAT_HW_CKSUM|(ni.nai_sum1 & 0xffff);
2540 } else
2541 #endif
2542 nat->nat_sumd[1] = nat->nat_sumd[0];
2544 if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
2545 if (direction == NAT_OUTBOUND)
2546 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2547 else
2548 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
2550 ni.nai_sum2 = LONG_SUM(in.s_addr);
2552 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2553 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
2554 } else {
2555 nat->nat_ipsumd = nat->nat_sumd[0];
2556 if (!(flags & IPN_TCPUDPICMP)) {
2557 nat->nat_sumd[0] = 0;
2558 nat->nat_sumd[1] = 0;
2562 if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2563 fr_nat_doflush = 1;
2564 goto badnat;
2566 if (flags & SI_WILDP)
2567 nat_stats.ns_wilds++;
2568 fin->fin_flx |= FI_NEWNAT;
2569 goto done;
2570 badnat:
2571 nat_stats.ns_badnat++;
2572 if ((hm = nat->nat_hm) != NULL)
2573 fr_hostmapdel(&hm);
2574 KFREE(nat);
2575 nat = NULL;
2576 done:
2577 if (nat != NULL && np != NULL)
2578 np->in_hits++;
2579 return nat;
2583 /* ------------------------------------------------------------------------ */
2584 /* Function: nat_finalise */
2585 /* Returns: int - 0 == sucess, -1 == failure */
2586 /* Parameters: fin(I) - pointer to packet information */
2587 /* nat(I) - pointer to NAT entry */
2588 /* ni(I) - pointer to structure with misc. information needed */
2589 /* to create new NAT entry. */
2590 /* Write Lock: ipf_nat */
2591 /* */
2592 /* This is the tail end of constructing a new NAT entry and is the same */
2593 /* for both IPv4 and IPv6. */
2594 /* ------------------------------------------------------------------------ */
2595 /*ARGSUSED*/
2596 static int nat_finalise(
2597 fr_info_t *fin,
2598 nat_t *nat,
2599 natinfo_t *ni,
2600 tcphdr_t *tcp,
2601 nat_t **natsave,
2602 int direction
2605 frentry_t *fr;
2606 ipnat_t *np;
2608 np = ni->nai_np;
2610 if (np->in_ifps[0] != NULL) {
2611 COPYIFNAME(4, np->in_ifps[0], nat->nat_ifnames[0]);
2613 if (np->in_ifps[1] != NULL) {
2614 COPYIFNAME(4, np->in_ifps[1], nat->nat_ifnames[1]);
2616 #ifdef IPFILTER_SYNC
2617 if ((nat->nat_flags & SI_CLONE) == 0)
2618 nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2619 #endif
2621 nat->nat_me = natsave;
2622 nat->nat_dir = direction;
2623 nat->nat_ifps[0] = np->in_ifps[0];
2624 nat->nat_ifps[1] = np->in_ifps[1];
2625 nat->nat_ptr = np;
2626 nat->nat_p = fin->fin_p;
2627 nat->nat_mssclamp = np->in_mssclamp;
2628 if (nat->nat_flags & IPN_TCP)
2629 nat->nat_seqnext[0] = ntohl(tcp->th_seq);
2631 if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2632 if (appr_new(fin, nat) == -1)
2633 return -1;
2635 if (nat_insert(nat, fin->fin_rev) == 0) {
2636 if (nat_logging)
2637 nat_log(nat, (u_int)np->in_redir);
2638 np->in_use++;
2639 fr = fin->fin_fr;
2640 nat->nat_fr = fr;
2641 if (fr != NULL) {
2642 MUTEX_ENTER(&fr->fr_lock);
2643 fr->fr_ref++;
2644 MUTEX_EXIT(&fr->fr_lock);
2646 return 0;
2650 * nat_insert failed, so cleanup time...
2652 return -1;
2656 /* ------------------------------------------------------------------------ */
2657 /* Function: nat_insert */
2658 /* Returns: int - 0 == sucess, -1 == failure */
2659 /* Parameters: nat(I) - pointer to NAT structure */
2660 /* rev(I) - flag indicating forward/reverse direction of packet */
2661 /* Write Lock: ipf_nat */
2662 /* */
2663 /* Insert a NAT entry into the hash tables for searching and add it to the */
2664 /* list of active NAT entries. Adjust global counters when complete. */
2665 /* ------------------------------------------------------------------------ */
2666 int nat_insert(nat, rev)
2667 nat_t *nat;
2668 int rev;
2670 u_int hv1, hv2;
2671 nat_t **natp;
2674 * Try and return an error as early as possible, so calculate the hash
2675 * entry numbers first and then proceed.
2677 if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2678 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2679 0xffffffff);
2680 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2681 ipf_nattable_sz);
2682 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2683 0xffffffff);
2684 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2685 ipf_nattable_sz);
2686 } else {
2687 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2688 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz);
2689 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2690 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz);
2693 if (nat_stats.ns_bucketlen[0][hv1] >= fr_nat_maxbucket ||
2694 nat_stats.ns_bucketlen[1][hv2] >= fr_nat_maxbucket) {
2695 return -1;
2698 nat->nat_hv[0] = hv1;
2699 nat->nat_hv[1] = hv2;
2701 MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2703 nat->nat_rev = rev;
2704 nat->nat_ref = 1;
2706 nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2707 nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4);
2709 if (nat->nat_ifnames[1][0] != '\0') {
2710 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2711 nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4);
2712 } else {
2713 (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2714 LIFNAMSIZ);
2715 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2716 nat->nat_ifps[1] = nat->nat_ifps[0];
2719 nat->nat_next = nat_instances;
2720 nat->nat_pnext = &nat_instances;
2721 if (nat_instances)
2722 nat_instances->nat_pnext = &nat->nat_next;
2723 nat_instances = nat;
2726 * Bump this before the hash table inserts.
2728 nat_stats.ns_added++;
2730 natp = &nat_table[0][hv1];
2731 nat->nat_phnext[0] = natp;
2732 nat->nat_hnext[0] = *natp;
2733 if (*natp)
2734 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2735 *natp = nat;
2736 nat_stats.ns_bucketlen[0][hv1]++;
2738 natp = &nat_table[1][hv2];
2739 nat->nat_phnext[1] = natp;
2740 nat->nat_hnext[1] = *natp;
2741 if (*natp)
2742 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2743 *natp = nat;
2744 nat_stats.ns_bucketlen[1][hv2]++;
2746 fr_setnatqueue(nat, rev);
2748 nat_stats.ns_inuse++;
2749 return 0;
2753 /* ------------------------------------------------------------------------ */
2754 /* Function: nat_icmperrorlookup */
2755 /* Returns: nat_t* - point to matching NAT structure */
2756 /* Parameters: fin(I) - pointer to packet information */
2757 /* dir(I) - direction of packet (in/out) */
2758 /* */
2759 /* Check if the ICMP error message is related to an existing TCP, UDP or */
2760 /* ICMP query nat entry. It is assumed that the packet is already of the */
2761 /* the required length. */
2762 /* ------------------------------------------------------------------------ */
2763 nat_t *nat_icmperrorlookup(fin, dir)
2764 fr_info_t *fin;
2765 int dir;
2767 int flags = 0, type, minlen;
2768 icmphdr_t *icmp, *orgicmp;
2769 tcphdr_t *tcp = NULL;
2770 u_short data[2];
2771 nat_t *nat;
2772 ip_t *oip;
2773 u_int p;
2775 icmp = fin->fin_dp;
2776 type = icmp->icmp_type;
2778 * Does it at least have the return (basic) IP header ?
2779 * Only a basic IP header (no options) should be with an ICMP error
2780 * header. Also, if it's not an error type, then return.
2782 if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2783 return NULL;
2786 * Check packet size
2788 oip = (ip_t *)((char *)fin->fin_dp + 8);
2789 minlen = IP_HL(oip) << 2;
2790 if ((minlen < sizeof(ip_t)) ||
2791 (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2792 return NULL;
2794 * Is the buffer big enough for all of it ? It's the size of the IP
2795 * header claimed in the encapsulated part which is of concern. It
2796 * may be too big to be in this buffer but not so big that it's
2797 * outside the ICMP packet, leading to TCP deref's causing problems.
2798 * This is possible because we don't know how big oip_hl is when we
2799 * do the pullup early in fr_check() and thus can't gaurantee it is
2800 * all here now.
2802 #ifdef _KERNEL
2804 mb_t *m;
2806 m = fin->fin_m;
2807 # if defined(MENTAT)
2808 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2809 return NULL;
2810 # else
2811 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2812 (char *)fin->fin_ip + M_LEN(m))
2813 return NULL;
2814 # endif
2816 #endif
2818 if (fin->fin_daddr != oip->ip_src.s_addr)
2819 return NULL;
2821 p = oip->ip_p;
2822 if (p == IPPROTO_TCP)
2823 flags = IPN_TCP;
2824 else if (p == IPPROTO_UDP)
2825 flags = IPN_UDP;
2826 else if (p == IPPROTO_ICMP) {
2827 orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2829 /* see if this is related to an ICMP query */
2830 if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2831 data[0] = fin->fin_data[0];
2832 data[1] = fin->fin_data[1];
2833 fin->fin_data[0] = 0;
2834 fin->fin_data[1] = orgicmp->icmp_id;
2836 flags = IPN_ICMPERR|IPN_ICMPQUERY;
2838 * NOTE : dir refers to the direction of the original
2839 * ip packet. By definition the icmp error
2840 * message flows in the opposite direction.
2842 if (dir == NAT_INBOUND)
2843 nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2844 oip->ip_src);
2845 else
2846 nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2847 oip->ip_src);
2848 fin->fin_data[0] = data[0];
2849 fin->fin_data[1] = data[1];
2850 return nat;
2854 if (flags & IPN_TCPUDP) {
2855 minlen += 8; /* + 64bits of data to get ports */
2856 if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2857 return NULL;
2859 data[0] = fin->fin_data[0];
2860 data[1] = fin->fin_data[1];
2861 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2862 fin->fin_data[0] = ntohs(tcp->th_dport);
2863 fin->fin_data[1] = ntohs(tcp->th_sport);
2865 if (dir == NAT_INBOUND) {
2866 nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2867 oip->ip_src);
2868 } else {
2869 nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2870 oip->ip_src);
2872 fin->fin_data[0] = data[0];
2873 fin->fin_data[1] = data[1];
2874 return nat;
2876 if (dir == NAT_INBOUND)
2877 return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2878 else
2879 return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2883 /* ------------------------------------------------------------------------ */
2884 /* Function: nat_icmperror */
2885 /* Returns: nat_t* - point to matching NAT structure */
2886 /* Parameters: fin(I) - pointer to packet information */
2887 /* nflags(I) - NAT flags for this packet */
2888 /* dir(I) - direction of packet (in/out) */
2889 /* */
2890 /* Fix up an ICMP packet which is an error message for an existing NAT */
2891 /* session. This will correct both packet header data and checksums. */
2892 /* */
2893 /* This should *ONLY* be used for incoming ICMP error packets to make sure */
2894 /* a NAT'd ICMP packet gets correctly recognised. */
2895 /* ------------------------------------------------------------------------ */
2896 nat_t *nat_icmperror(fin, nflags, dir)
2897 fr_info_t *fin;
2898 u_int *nflags;
2899 int dir;
2901 u_32_t sum1, sum2, sumd, sumd2;
2902 struct in_addr a1, a2;
2903 int flags, dlen, odst;
2904 icmphdr_t *icmp;
2905 u_short *csump;
2906 tcphdr_t *tcp;
2907 nat_t *nat;
2908 ip_t *oip;
2909 void *dp;
2911 if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2912 return NULL;
2914 * nat_icmperrorlookup() will return NULL for `defective' packets.
2916 if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2917 return NULL;
2919 tcp = NULL;
2920 csump = NULL;
2921 flags = 0;
2922 sumd2 = 0;
2923 *nflags = IPN_ICMPERR;
2924 icmp = fin->fin_dp;
2925 oip = (ip_t *)&icmp->icmp_ip;
2926 dp = (((char *)oip) + (IP_HL(oip) << 2));
2927 if (oip->ip_p == IPPROTO_TCP) {
2928 tcp = (tcphdr_t *)dp;
2929 csump = (u_short *)&tcp->th_sum;
2930 flags = IPN_TCP;
2931 } else if (oip->ip_p == IPPROTO_UDP) {
2932 udphdr_t *udp;
2934 udp = (udphdr_t *)dp;
2935 tcp = (tcphdr_t *)dp;
2936 csump = (u_short *)&udp->uh_sum;
2937 flags = IPN_UDP;
2938 } else if (oip->ip_p == IPPROTO_ICMP)
2939 flags = IPN_ICMPQUERY;
2940 dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip);
2943 * Need to adjust ICMP header to include the real IP#'s and
2944 * port #'s. Only apply a checksum change relative to the
2945 * IP address change as it will be modified again in fr_checknatout
2946 * for both address and port. Two checksum changes are
2947 * necessary for the two header address changes. Be careful
2948 * to only modify the checksum once for the port # and twice
2949 * for the IP#.
2953 * Step 1
2954 * Fix the IP addresses in the offending IP packet. You also need
2955 * to adjust the IP header checksum of that offending IP packet.
2957 * Normally, you would expect that the ICMP checksum of the
2958 * ICMP error message needs to be adjusted as well for the
2959 * IP address change in oip.
2960 * However, this is a NOP, because the ICMP checksum is
2961 * calculated over the complete ICMP packet, which includes the
2962 * changed oip IP addresses and oip->ip_sum. However, these
2963 * two changes cancel each other out (if the delta for
2964 * the IP address is x, then the delta for ip_sum is minus x),
2965 * so no change in the icmp_cksum is necessary.
2967 * Inbound ICMP
2968 * ------------
2969 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2970 * - response to outgoing packet (a,b)=>(c,b) (OIP_SRC=c,OIP_DST=b)
2971 * - OIP_SRC(c)=nat_outip, OIP_DST(b)=nat_oip
2973 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2974 * - response to outgoing packet (c,a)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2975 * - OIP_SRC(b)=nat_outip, OIP_DST(a)=nat_oip
2977 * Outbound ICMP
2978 * -------------
2979 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2980 * - response to incoming packet (b,c)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2981 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2983 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2984 * - response to incoming packet (a,b)=>(a,c) (OIP_SRC=a,OIP_DST=c)
2985 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2988 odst = (oip->ip_dst.s_addr == nat->nat_oip.s_addr) ? 1 : 0;
2989 if (odst == 1) {
2990 a1.s_addr = ntohl(nat->nat_inip.s_addr);
2991 a2.s_addr = ntohl(oip->ip_src.s_addr);
2992 oip->ip_src.s_addr = htonl(a1.s_addr);
2993 } else {
2994 a1.s_addr = ntohl(nat->nat_outip.s_addr);
2995 a2.s_addr = ntohl(oip->ip_dst.s_addr);
2996 oip->ip_dst.s_addr = htonl(a1.s_addr);
2999 sumd = a2.s_addr - a1.s_addr;
3000 if (sumd != 0) {
3001 if (a1.s_addr > a2.s_addr)
3002 sumd--;
3003 sumd = ~sumd;
3005 fix_datacksum(&oip->ip_sum, sumd);
3008 sumd2 = sumd;
3009 sum1 = 0;
3010 sum2 = 0;
3013 * Fix UDP pseudo header checksum to compensate for the
3014 * IP address change.
3016 if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
3018 * Step 2 :
3019 * For offending TCP/UDP IP packets, translate the ports as
3020 * well, based on the NAT specification. Of course such
3021 * a change may be reflected in the ICMP checksum as well.
3023 * Since the port fields are part of the TCP/UDP checksum
3024 * of the offending IP packet, you need to adjust that checksum
3025 * as well... except that the change in the port numbers should
3026 * be offset by the checksum change. However, the TCP/UDP
3027 * checksum will also need to change if there has been an
3028 * IP address change.
3030 if (odst == 1) {
3031 sum1 = ntohs(nat->nat_inport);
3032 sum2 = ntohs(tcp->th_sport);
3034 tcp->th_sport = htons(sum1);
3035 } else {
3036 sum1 = ntohs(nat->nat_outport);
3037 sum2 = ntohs(tcp->th_dport);
3039 tcp->th_dport = htons(sum1);
3042 sumd += sum1 - sum2;
3043 if (sumd != 0 || sumd2 != 0) {
3045 * At this point, sumd is the delta to apply to the
3046 * TCP/UDP header, given the changes in both the IP
3047 * address and the ports and sumd2 is the delta to
3048 * apply to the ICMP header, given the IP address
3049 * change delta that may need to be applied to the
3050 * TCP/UDP checksum instead.
3052 * If we will both the IP and TCP/UDP checksums
3053 * then the ICMP checksum changes by the address
3054 * delta applied to the TCP/UDP checksum. If we
3055 * do not change the TCP/UDP checksum them we
3056 * apply the delta in ports to the ICMP checksum.
3058 if (oip->ip_p == IPPROTO_UDP) {
3059 if ((dlen >= 8) && (*csump != 0)) {
3060 fix_datacksum(csump, sumd);
3061 } else {
3062 sumd2 = sum1 - sum2;
3063 if (sum2 > sum1)
3064 sumd2--;
3066 } else if (oip->ip_p == IPPROTO_TCP) {
3067 if (dlen >= 18) {
3068 fix_datacksum(csump, sumd);
3069 } else {
3070 sumd2 = sum2 - sum1;
3071 if (sum1 > sum2)
3072 sumd2--;
3076 if (sumd2 != 0) {
3077 ipnat_t *np;
3079 np = nat->nat_ptr;
3080 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3081 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3082 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3084 if ((odst == 0) && (dir == NAT_OUTBOUND) &&
3085 (fin->fin_rev == 0) && (np != NULL) &&
3086 (np->in_redir & NAT_REDIRECT)) {
3087 fix_outcksum(fin, &icmp->icmp_cksum,
3088 sumd2);
3089 } else {
3090 fix_incksum(fin, &icmp->icmp_cksum,
3091 sumd2);
3095 } else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) {
3096 icmphdr_t *orgicmp;
3099 * XXX - what if this is bogus hl and we go off the end ?
3100 * In this case, nat_icmperrorlookup() will have returned NULL.
3102 orgicmp = (icmphdr_t *)dp;
3104 if (odst == 1) {
3105 if (orgicmp->icmp_id != nat->nat_inport) {
3108 * Fix ICMP checksum (of the offening ICMP
3109 * query packet) to compensate the change
3110 * in the ICMP id of the offending ICMP
3111 * packet.
3113 * Since you modify orgicmp->icmp_id with
3114 * a delta (say x) and you compensate that
3115 * in origicmp->icmp_cksum with a delta
3116 * minus x, you don't have to adjust the
3117 * overall icmp->icmp_cksum
3119 sum1 = ntohs(orgicmp->icmp_id);
3120 sum2 = ntohs(nat->nat_inport);
3121 CALC_SUMD(sum1, sum2, sumd);
3122 orgicmp->icmp_id = nat->nat_inport;
3123 fix_datacksum(&orgicmp->icmp_cksum, sumd);
3125 } /* nat_dir == NAT_INBOUND is impossible for icmp queries */
3127 return nat;
3132 * NB: these lookups don't lock access to the list, it assumed that it has
3133 * already been done!
3136 /* ------------------------------------------------------------------------ */
3137 /* Function: nat_inlookup */
3138 /* Returns: nat_t* - NULL == no match, */
3139 /* else pointer to matching NAT entry */
3140 /* Parameters: fin(I) - pointer to packet information */
3141 /* flags(I) - NAT flags for this packet */
3142 /* p(I) - protocol for this packet */
3143 /* src(I) - source IP address */
3144 /* mapdst(I) - destination IP address */
3145 /* */
3146 /* Lookup a nat entry based on the mapped destination ip address/port and */
3147 /* real source address/port. We use this lookup when receiving a packet, */
3148 /* we're looking for a table entry, based on the destination address. */
3149 /* */
3150 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */
3151 /* */
3152 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */
3153 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */
3154 /* */
3155 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */
3156 /* the packet is of said protocol */
3157 /* ------------------------------------------------------------------------ */
3158 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3159 fr_info_t *fin;
3160 u_int flags, p;
3161 struct in_addr src , mapdst;
3163 u_short sport, dport;
3164 grehdr_t *gre;
3165 ipnat_t *ipn;
3166 u_int sflags;
3167 nat_t *nat;
3168 int nflags;
3169 u_32_t dst;
3170 void *ifp;
3171 u_int hv;
3173 ifp = fin->fin_ifp;
3174 sport = 0;
3175 dport = 0;
3176 gre = NULL;
3177 dst = mapdst.s_addr;
3178 sflags = flags & NAT_TCPUDPICMP;
3180 switch (p)
3182 case IPPROTO_TCP :
3183 case IPPROTO_UDP :
3184 sport = htons(fin->fin_data[0]);
3185 dport = htons(fin->fin_data[1]);
3186 break;
3187 case IPPROTO_ICMP :
3188 if (flags & IPN_ICMPERR)
3189 sport = fin->fin_data[1];
3190 else
3191 dport = fin->fin_data[1];
3192 break;
3193 default :
3194 break;
3198 if ((flags & SI_WILDP) != 0)
3199 goto find_in_wild_ports;
3201 hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3202 hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
3203 nat = nat_table[1][hv];
3204 for (; nat; nat = nat->nat_hnext[1]) {
3205 if (nat->nat_ifps[0] != NULL) {
3206 if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3207 continue;
3210 nflags = nat->nat_flags;
3212 if (nat->nat_oip.s_addr == src.s_addr &&
3213 nat->nat_outip.s_addr == dst &&
3214 (((p == 0) &&
3215 (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3216 || (p == nat->nat_p))) {
3217 switch (p)
3219 #if 0
3220 case IPPROTO_GRE :
3221 if (nat->nat_call[1] != fin->fin_data[0])
3222 continue;
3223 break;
3224 #endif
3225 case IPPROTO_ICMP :
3226 if ((flags & IPN_ICMPERR) != 0) {
3227 if (nat->nat_outport != sport)
3228 continue;
3229 } else {
3230 if (nat->nat_outport != dport)
3231 continue;
3233 break;
3234 case IPPROTO_TCP :
3235 case IPPROTO_UDP :
3236 if (nat->nat_oport != sport)
3237 continue;
3238 if (nat->nat_outport != dport)
3239 continue;
3240 break;
3241 default :
3242 break;
3245 ipn = nat->nat_ptr;
3246 if ((ipn != NULL) && (nat->nat_aps != NULL))
3247 if (appr_match(fin, nat) != 0)
3248 continue;
3249 if ((nat->nat_ifps[0] == NULL) && (ifp != NULL))
3250 nat->nat_ifps[0] = ifp;
3251 return nat;
3256 * So if we didn't find it but there are wildcard members in the hash
3257 * table, go back and look for them. We do this search and update here
3258 * because it is modifying the NAT table and we want to do this only
3259 * for the first packet that matches. The exception, of course, is
3260 * for "dummy" (FI_IGNORE) lookups.
3262 find_in_wild_ports:
3263 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3264 return NULL;
3265 if (nat_stats.ns_wilds == 0)
3266 return NULL;
3268 RWLOCK_EXIT(&ipf_nat);
3270 hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3271 hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz);
3273 WRITE_ENTER(&ipf_nat);
3275 nat = nat_table[1][hv];
3276 for (; nat; nat = nat->nat_hnext[1]) {
3277 if (nat->nat_ifps[0] != NULL) {
3278 if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3279 continue;
3282 if (nat->nat_p != fin->fin_p)
3283 continue;
3284 if (nat->nat_oip.s_addr != src.s_addr ||
3285 nat->nat_outip.s_addr != dst)
3286 continue;
3288 nflags = nat->nat_flags;
3289 if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3290 continue;
3292 if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3293 NAT_INBOUND) == 1) {
3294 if ((fin->fin_flx & FI_IGNORE) != 0)
3295 break;
3296 if ((nflags & SI_CLONE) != 0) {
3297 nat = fr_natclone(fin, nat);
3298 if (nat == NULL)
3299 break;
3300 } else {
3301 MUTEX_ENTER(&ipf_nat_new);
3302 nat_stats.ns_wilds--;
3303 MUTEX_EXIT(&ipf_nat_new);
3305 if ((nat->nat_ifps[0] == NULL) && (ifp != NULL))
3306 nat->nat_ifps[0] = ifp;
3307 nat->nat_oport = sport;
3308 nat->nat_outport = dport;
3309 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3310 nat_tabmove(nat);
3311 break;
3315 MUTEX_DOWNGRADE(&ipf_nat);
3317 return nat;
3321 /* ------------------------------------------------------------------------ */
3322 /* Function: nat_tabmove */
3323 /* Returns: Nil */
3324 /* Parameters: nat(I) - pointer to NAT structure */
3325 /* Write Lock: ipf_nat */
3326 /* */
3327 /* This function is only called for TCP/UDP NAT table entries where the */
3328 /* original was placed in the table without hashing on the ports and we now */
3329 /* want to include hashing on port numbers. */
3330 /* ------------------------------------------------------------------------ */
3331 static void nat_tabmove(nat)
3332 nat_t *nat;
3334 nat_t **natp;
3335 u_int hv;
3337 if (nat->nat_flags & SI_CLONE)
3338 return;
3341 * Remove the NAT entry from the old location
3343 if (nat->nat_hnext[0])
3344 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3345 *nat->nat_phnext[0] = nat->nat_hnext[0];
3346 nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3348 if (nat->nat_hnext[1])
3349 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3350 *nat->nat_phnext[1] = nat->nat_hnext[1];
3351 nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3354 * Add into the NAT table in the new position
3356 hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3357 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3358 ipf_nattable_sz);
3359 nat->nat_hv[0] = hv;
3360 natp = &nat_table[0][hv];
3361 if (*natp)
3362 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3363 nat->nat_phnext[0] = natp;
3364 nat->nat_hnext[0] = *natp;
3365 *natp = nat;
3366 nat_stats.ns_bucketlen[0][hv]++;
3368 hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3369 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3370 ipf_nattable_sz);
3371 nat->nat_hv[1] = hv;
3372 natp = &nat_table[1][hv];
3373 if (*natp)
3374 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3375 nat->nat_phnext[1] = natp;
3376 nat->nat_hnext[1] = *natp;
3377 *natp = nat;
3378 nat_stats.ns_bucketlen[1][hv]++;
3382 /* ------------------------------------------------------------------------ */
3383 /* Function: nat_outlookup */
3384 /* Returns: nat_t* - NULL == no match, */
3385 /* else pointer to matching NAT entry */
3386 /* Parameters: fin(I) - pointer to packet information */
3387 /* flags(I) - NAT flags for this packet */
3388 /* p(I) - protocol for this packet */
3389 /* src(I) - source IP address */
3390 /* dst(I) - destination IP address */
3391 /* rw(I) - 1 == write lock on ipf_nat held, 0 == read lock. */
3392 /* */
3393 /* Lookup a nat entry based on the source 'real' ip address/port and */
3394 /* destination address/port. We use this lookup when sending a packet out, */
3395 /* we're looking for a table entry, based on the source address. */
3396 /* */
3397 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */
3398 /* */
3399 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */
3400 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */
3401 /* */
3402 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */
3403 /* the packet is of said protocol */
3404 /* ------------------------------------------------------------------------ */
3405 nat_t *nat_outlookup(fin, flags, p, src, dst)
3406 fr_info_t *fin;
3407 u_int flags, p;
3408 struct in_addr src , dst;
3410 u_short sport, dport;
3411 u_int sflags;
3412 ipnat_t *ipn;
3413 u_32_t srcip;
3414 nat_t *nat;
3415 int nflags;
3416 void *ifp;
3417 u_int hv;
3419 ifp = fin->fin_ifp;
3420 srcip = src.s_addr;
3421 sflags = flags & IPN_TCPUDPICMP;
3422 sport = 0;
3423 dport = 0;
3425 switch (p)
3427 case IPPROTO_TCP :
3428 case IPPROTO_UDP :
3429 sport = htons(fin->fin_data[0]);
3430 dport = htons(fin->fin_data[1]);
3431 break;
3432 case IPPROTO_ICMP :
3433 if (flags & IPN_ICMPERR)
3434 sport = fin->fin_data[1];
3435 else
3436 dport = fin->fin_data[1];
3437 break;
3438 default :
3439 break;
3442 if ((flags & SI_WILDP) != 0)
3443 goto find_out_wild_ports;
3445 hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3446 hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
3447 nat = nat_table[0][hv];
3448 for (; nat; nat = nat->nat_hnext[0]) {
3449 if (nat->nat_ifps[1] != NULL) {
3450 if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3451 continue;
3454 nflags = nat->nat_flags;
3456 if (nat->nat_inip.s_addr == srcip &&
3457 nat->nat_oip.s_addr == dst.s_addr &&
3458 (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3459 || (p == nat->nat_p))) {
3460 switch (p)
3462 #if 0
3463 case IPPROTO_GRE :
3464 if (nat->nat_call[1] != fin->fin_data[0])
3465 continue;
3466 break;
3467 #endif
3468 case IPPROTO_TCP :
3469 case IPPROTO_UDP :
3470 if (nat->nat_oport != dport)
3471 continue;
3472 if (nat->nat_inport != sport)
3473 continue;
3474 break;
3475 default :
3476 break;
3479 ipn = nat->nat_ptr;
3480 if ((ipn != NULL) && (nat->nat_aps != NULL))
3481 if (appr_match(fin, nat) != 0)
3482 continue;
3483 if ((nat->nat_ifps[1] == NULL) && (ifp != NULL))
3484 nat->nat_ifps[1] = ifp;
3485 return nat;
3490 * So if we didn't find it but there are wildcard members in the hash
3491 * table, go back and look for them. We do this search and update here
3492 * because it is modifying the NAT table and we want to do this only
3493 * for the first packet that matches. The exception, of course, is
3494 * for "dummy" (FI_IGNORE) lookups.
3496 find_out_wild_ports:
3497 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3498 return NULL;
3499 if (nat_stats.ns_wilds == 0)
3500 return NULL;
3502 RWLOCK_EXIT(&ipf_nat);
3504 hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3505 hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz);
3507 WRITE_ENTER(&ipf_nat);
3509 nat = nat_table[0][hv];
3510 for (; nat; nat = nat->nat_hnext[0]) {
3511 if (nat->nat_ifps[1] != NULL) {
3512 if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3513 continue;
3516 if (nat->nat_p != fin->fin_p)
3517 continue;
3518 if ((nat->nat_inip.s_addr != srcip) ||
3519 (nat->nat_oip.s_addr != dst.s_addr))
3520 continue;
3522 nflags = nat->nat_flags;
3523 if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3524 continue;
3526 if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3527 NAT_OUTBOUND) == 1) {
3528 if ((fin->fin_flx & FI_IGNORE) != 0)
3529 break;
3530 if ((nflags & SI_CLONE) != 0) {
3531 nat = fr_natclone(fin, nat);
3532 if (nat == NULL)
3533 break;
3534 } else {
3535 MUTEX_ENTER(&ipf_nat_new);
3536 nat_stats.ns_wilds--;
3537 MUTEX_EXIT(&ipf_nat_new);
3539 if ((nat->nat_ifps[1] == NULL) && (ifp != NULL))
3540 nat->nat_ifps[1] = ifp;
3541 nat->nat_inport = sport;
3542 nat->nat_oport = dport;
3543 if (nat->nat_outport == 0)
3544 nat->nat_outport = sport;
3545 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3546 nat_tabmove(nat);
3547 break;
3551 MUTEX_DOWNGRADE(&ipf_nat);
3553 return nat;
3557 /* ------------------------------------------------------------------------ */
3558 /* Function: nat_lookupredir */
3559 /* Returns: nat_t* - NULL == no match, */
3560 /* else pointer to matching NAT entry */
3561 /* Parameters: np(I) - pointer to description of packet to find NAT table */
3562 /* entry for. */
3563 /* */
3564 /* Lookup the NAT tables to search for a matching redirect */
3565 /* The contents of natlookup_t should imitate those found in a packet that */
3566 /* would be translated - ie a packet coming in for RDR or going out for MAP.*/
3567 /* We can do the lookup in one of two ways, imitating an inbound or */
3568 /* outbound packet. By default we assume outbound, unless IPN_IN is set. */
3569 /* For IN, the fields are set as follows: */
3570 /* nl_real* = source information */
3571 /* nl_out* = destination information (translated) */
3572 /* For an out packet, the fields are set like this: */
3573 /* nl_in* = source information (untranslated) */
3574 /* nl_out* = destination information (translated) */
3575 /* ------------------------------------------------------------------------ */
3576 nat_t *nat_lookupredir(np)
3577 natlookup_t *np;
3579 fr_info_t fi;
3580 nat_t *nat;
3582 bzero((char *)&fi, sizeof(fi));
3583 if (np->nl_flags & IPN_IN) {
3584 fi.fin_data[0] = ntohs(np->nl_realport);
3585 fi.fin_data[1] = ntohs(np->nl_outport);
3586 } else {
3587 fi.fin_data[0] = ntohs(np->nl_inport);
3588 fi.fin_data[1] = ntohs(np->nl_outport);
3590 if (np->nl_flags & IPN_TCP)
3591 fi.fin_p = IPPROTO_TCP;
3592 else if (np->nl_flags & IPN_UDP)
3593 fi.fin_p = IPPROTO_UDP;
3594 else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3595 fi.fin_p = IPPROTO_ICMP;
3598 * We can do two sorts of lookups:
3599 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3600 * - default: we have the `in' and `out' address, look for `real'.
3602 if (np->nl_flags & IPN_IN) {
3603 if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3604 np->nl_realip, np->nl_outip))) {
3605 np->nl_inip = nat->nat_inip;
3606 np->nl_inport = nat->nat_inport;
3608 } else {
3610 * If nl_inip is non null, this is a lookup based on the real
3611 * ip address. Else, we use the fake.
3613 if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3614 np->nl_inip, np->nl_outip))) {
3616 if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3617 fr_info_t fin;
3618 bzero((char *)&fin, sizeof(fin));
3619 fin.fin_p = nat->nat_p;
3620 fin.fin_data[0] = ntohs(nat->nat_outport);
3621 fin.fin_data[1] = ntohs(nat->nat_oport);
3622 if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3623 nat->nat_outip,
3624 nat->nat_oip) != NULL) {
3625 np->nl_flags &= ~IPN_FINDFORWARD;
3629 np->nl_realip = nat->nat_outip;
3630 np->nl_realport = nat->nat_outport;
3634 return nat;
3638 /* ------------------------------------------------------------------------ */
3639 /* Function: nat_match */
3640 /* Returns: int - 0 == no match, 1 == match */
3641 /* Parameters: fin(I) - pointer to packet information */
3642 /* np(I) - pointer to NAT rule */
3643 /* */
3644 /* Pull the matching of a packet against a NAT rule out of that complex */
3645 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3646 /* ------------------------------------------------------------------------ */
3647 static int nat_match(fin, np)
3648 fr_info_t *fin;
3649 ipnat_t *np;
3651 frtuc_t *ft;
3653 if (fin->fin_v != 4)
3654 return 0;
3656 if (np->in_p && fin->fin_p != np->in_p)
3657 return 0;
3659 if (fin->fin_out) {
3660 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3661 return 0;
3662 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3663 ^ ((np->in_flags & IPN_NOTSRC) != 0))
3664 return 0;
3665 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3666 ^ ((np->in_flags & IPN_NOTDST) != 0))
3667 return 0;
3668 } else {
3669 if (!(np->in_redir & NAT_REDIRECT))
3670 return 0;
3671 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3672 ^ ((np->in_flags & IPN_NOTSRC) != 0))
3673 return 0;
3674 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3675 ^ ((np->in_flags & IPN_NOTDST) != 0))
3676 return 0;
3679 ft = &np->in_tuc;
3680 if (!(fin->fin_flx & FI_TCPUDP) ||
3681 (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3682 if (ft->ftu_scmp || ft->ftu_dcmp)
3683 return 0;
3684 return 1;
3687 return fr_tcpudpchk(fin, ft);
3691 /* ------------------------------------------------------------------------ */
3692 /* Function: nat_update */
3693 /* Returns: Nil */
3694 /* Parameters: nat(I) - pointer to NAT structure */
3695 /* np(I) - pointer to NAT rule */
3696 /* Locks: nat_lock */
3697 /* */
3698 /* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */
3699 /* called with fin_rev updated - i.e. after calling nat_proto(). */
3700 /* */
3701 /* This must be called *after* nat_proto() because we need fin_rev set. */
3702 /* ------------------------------------------------------------------------ */
3703 void nat_update(fin, nat)
3704 fr_info_t *fin;
3705 nat_t *nat;
3707 ipftq_t *ifq, *ifq2;
3708 ipftqent_t *tqe;
3709 ipnat_t *np = nat->nat_ptr;
3711 tqe = &nat->nat_tqe;
3712 ifq = tqe->tqe_ifq;
3715 * We allow over-riding of NAT timeouts from NAT rules, even for
3716 * TCP, however, if it is TCP and there is no rule timeout set,
3717 * then do not update the timeout here.
3719 if (np != NULL)
3720 ifq2 = np->in_tqehead[fin->fin_rev];
3721 else
3722 ifq2 = NULL;
3724 if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3725 u_32_t end, ack;
3726 u_char tcpflags;
3727 tcphdr_t *tcp;
3728 int dsize;
3730 tcp = fin->fin_dp;
3731 tcpflags = tcp->th_flags;
3732 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
3733 ((tcpflags & TH_SYN) ? 1 : 0) +
3734 ((tcpflags & TH_FIN) ? 1 : 0);
3736 ack = ntohl(tcp->th_ack);
3737 end = ntohl(tcp->th_seq) + dsize;
3739 if (SEQ_GT(ack, nat->nat_seqnext[1 - fin->fin_rev]))
3740 nat->nat_seqnext[1 - fin->fin_rev] = ack;
3742 if (nat->nat_seqnext[fin->fin_rev] == 0)
3743 nat->nat_seqnext[fin->fin_rev] = end;
3745 (void) fr_tcp_age(&nat->nat_tqe, fin, nat_tqb, 0);
3746 } else {
3747 if (ifq2 == NULL) {
3748 if (nat->nat_p == IPPROTO_UDP)
3749 ifq2 = &nat_udptq;
3750 else if (nat->nat_p == IPPROTO_ICMP)
3751 ifq2 = &nat_icmptq;
3752 else
3753 ifq2 = &nat_iptq;
3756 fr_movequeue(tqe, ifq, ifq2);
3761 /* ------------------------------------------------------------------------ */
3762 /* Function: fr_checknatout */
3763 /* Returns: int - -1 == packet failed NAT checks so block it, */
3764 /* 0 == no packet translation occurred, */
3765 /* 1 == packet was successfully translated. */
3766 /* Parameters: fin(I) - pointer to packet information */
3767 /* passp(I) - pointer to filtering result flags */
3768 /* */
3769 /* Check to see if an outcoming packet should be changed. ICMP packets are */
3770 /* first checked to see if they match an existing entry (if an error), */
3771 /* otherwise a search of the current NAT table is made. If neither results */
3772 /* in a match then a search for a matching NAT rule is made. Create a new */
3773 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */
3774 /* packet header(s) as required. */
3775 /* ------------------------------------------------------------------------ */
3776 int fr_checknatout(fin, passp)
3777 fr_info_t *fin;
3778 u_32_t *passp;
3780 ipnat_t *np = NULL, *npnext;
3781 struct ifnet *ifp, *sifp;
3782 icmphdr_t *icmp = NULL;
3783 tcphdr_t *tcp = NULL;
3784 int rval, natfailed;
3785 u_int nflags = 0;
3786 u_32_t ipa, iph;
3787 int natadd = 1;
3788 frentry_t *fr;
3789 nat_t *nat;
3791 if (fr_nat_lock != 0)
3792 return 0;
3793 if (nat_stats.ns_rules == 0 && nat_instances == NULL)
3794 return 0;
3796 natfailed = 0;
3797 fr = fin->fin_fr;
3798 sifp = fin->fin_ifp;
3799 if (fr != NULL) {
3800 ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3801 if ((ifp != NULL) && (ifp != (void *)-1))
3802 fin->fin_ifp = ifp;
3804 ifp = fin->fin_ifp;
3806 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3807 switch (fin->fin_p)
3809 case IPPROTO_TCP :
3810 nflags = IPN_TCP;
3811 break;
3812 case IPPROTO_UDP :
3813 nflags = IPN_UDP;
3814 break;
3815 case IPPROTO_ICMP :
3816 icmp = fin->fin_dp;
3819 * This is an incoming packet, so the destination is
3820 * the icmp_id and the source port equals 0
3822 if (nat_icmpquerytype4(icmp->icmp_type))
3823 nflags = IPN_ICMPQUERY;
3824 break;
3825 default :
3826 break;
3829 if ((nflags & IPN_TCPUDP))
3830 tcp = fin->fin_dp;
3833 ipa = fin->fin_saddr;
3835 READ_ENTER(&ipf_nat);
3837 if (((fin->fin_flx & FI_ICMPERR) != 0) &&
3838 (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3839 /*EMPTY*/;
3840 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3841 natadd = 0;
3842 else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3843 fin->fin_src, fin->fin_dst))) {
3844 nflags = nat->nat_flags;
3845 } else if (fin->fin_off == 0) {
3846 u_32_t hv, msk, nmsk;
3848 msk = 0xffffffff;
3849 nmsk = nat_masks;
3851 * If there is no current entry in the nat table for this IP#,
3852 * create one for it (if there is a matching rule).
3854 maskloop:
3855 iph = ipa & htonl(msk);
3856 hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
3857 for (np = nat_rules[hv]; np; np = npnext) {
3858 npnext = np->in_mnext;
3859 if (np->in_ifps[1] && (np->in_ifps[1] != ifp))
3860 continue;
3861 if (np->in_v != fin->fin_v)
3862 continue;
3863 if (np->in_p && (np->in_p != fin->fin_p))
3864 continue;
3865 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3866 continue;
3867 if (np->in_flags & IPN_FILTER) {
3868 if (!nat_match(fin, np))
3869 continue;
3870 } else if ((ipa & np->in_inmsk) != np->in_inip)
3871 continue;
3873 if ((fr != NULL) &&
3874 !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3875 continue;
3877 if (*np->in_plabel != '\0') {
3878 if (((np->in_flags & IPN_FILTER) == 0) &&
3879 (np->in_dport != tcp->th_dport))
3880 continue;
3881 if (appr_ok(fin, tcp, np) == 0)
3882 continue;
3885 MUTEX_ENTER(&ipf_nat_new);
3886 nat = nat_new(fin, np, NULL, nflags, NAT_OUTBOUND);
3887 MUTEX_EXIT(&ipf_nat_new);
3888 if (nat != NULL) {
3889 natfailed = 0;
3890 break;
3892 natfailed = -1;
3894 if ((np == NULL) && (nmsk != 0)) {
3895 while (nmsk) {
3896 msk <<= 1;
3897 if (nmsk & 0x80000000)
3898 break;
3899 nmsk <<= 1;
3901 if (nmsk != 0) {
3902 nmsk <<= 1;
3903 goto maskloop;
3908 if (nat != NULL) {
3909 rval = fr_natout(fin, nat, natadd, nflags);
3910 if (rval == 1) {
3911 MUTEX_ENTER(&nat->nat_lock);
3912 nat_update(fin, nat);
3913 nat->nat_bytes[1] += fin->fin_plen;
3914 nat->nat_pkts[1]++;
3915 fin->fin_pktnum = nat->nat_pkts[1];
3916 MUTEX_EXIT(&nat->nat_lock);
3918 } else
3919 rval = natfailed;
3920 RWLOCK_EXIT(&ipf_nat);
3922 if (rval == -1) {
3923 if (passp != NULL)
3924 *passp = FR_BLOCK;
3925 fin->fin_flx |= FI_BADNAT;
3927 fin->fin_ifp = sifp;
3928 return rval;
3931 /* ------------------------------------------------------------------------ */
3932 /* Function: fr_natout */
3933 /* Returns: int - -1 == packet failed NAT checks so block it, */
3934 /* 1 == packet was successfully translated. */
3935 /* Parameters: fin(I) - pointer to packet information */
3936 /* nat(I) - pointer to NAT structure */
3937 /* natadd(I) - flag indicating if it is safe to add frag cache */
3938 /* nflags(I) - NAT flags set for this packet */
3939 /* */
3940 /* Translate a packet coming "out" on an interface. */
3941 /* ------------------------------------------------------------------------ */
3942 int fr_natout(fin, nat, natadd, nflags)
3943 fr_info_t *fin;
3944 nat_t *nat;
3945 int natadd;
3946 u_32_t nflags;
3948 icmphdr_t *icmp;
3949 tcphdr_t *tcp;
3950 ipnat_t *np;
3951 int i;
3953 tcp = NULL;
3954 icmp = NULL;
3955 np = nat->nat_ptr;
3957 if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL))
3958 (void) fr_nat_newfrag(fin, 0, nat);
3961 * Fix up checksums, not by recalculating them, but
3962 * simply computing adjustments.
3963 * This is only done for STREAMS based IP implementations where the
3964 * checksum has already been calculated by IP. In all other cases,
3965 * IPFilter is called before the checksum needs calculating so there
3966 * is no call to modify whatever is in the header now.
3968 if (fin->fin_v == 4) {
3969 if (nflags == IPN_ICMPERR) {
3970 u_32_t s1, s2, sumd;
3972 s1 = LONG_SUM(ntohl(fin->fin_saddr));
3973 s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3974 CALC_SUMD(s1, s2, sumd);
3975 fix_outcksum(fin, &fin->fin_ip->ip_sum, sumd);
3977 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3978 defined(linux) || defined(BRIDGE_IPF)
3979 else {
3981 * Strictly speaking, this isn't necessary on BSD
3982 * kernels because they do checksum calculation after
3983 * this code has run BUT if ipfilter is being used
3984 * to do NAT as a bridge, that code doesn't exist.
3986 if (nat->nat_dir == NAT_OUTBOUND)
3987 fix_outcksum(fin, &fin->fin_ip->ip_sum,
3988 nat->nat_ipsumd);
3989 else
3990 fix_incksum(fin, &fin->fin_ip->ip_sum,
3991 nat->nat_ipsumd);
3993 #endif
3996 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3997 u_short *csump;
3999 if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
4000 tcp = fin->fin_dp;
4002 tcp->th_sport = nat->nat_outport;
4003 fin->fin_data[0] = ntohs(nat->nat_outport);
4006 if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
4007 icmp = fin->fin_dp;
4008 icmp->icmp_id = nat->nat_outport;
4011 csump = nat_proto(fin, nat, nflags);
4014 * The above comments do not hold for layer 4 (or higher)
4015 * checksums...
4017 if (csump != NULL) {
4018 if (nat->nat_dir == NAT_OUTBOUND)
4019 fix_outcksum(fin, csump, nat->nat_sumd[1]);
4020 else
4021 fix_incksum(fin, csump, nat->nat_sumd[1]);
4025 fin->fin_ip->ip_src = nat->nat_outip;
4026 #ifdef IPFILTER_SYNC
4027 ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4028 #endif
4029 /* ------------------------------------------------------------- */
4030 /* A few quick notes: */
4031 /* Following are test conditions prior to calling the */
4032 /* appr_check routine. */
4033 /* */
4034 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */
4035 /* with a redirect rule, we attempt to match the packet's */
4036 /* source port against in_dport, otherwise we'd compare the */
4037 /* packet's destination. */
4038 /* ------------------------------------------------------------- */
4039 if ((np != NULL) && (np->in_apr != NULL)) {
4040 i = appr_check(fin, nat);
4041 if (i == 0)
4042 i = 1;
4043 } else
4044 i = 1;
4045 ATOMIC_INCL(nat_stats.ns_mapped[1]);
4046 fin->fin_flx |= FI_NATED;
4047 return i;
4051 /* ------------------------------------------------------------------------ */
4052 /* Function: fr_checknatin */
4053 /* Returns: int - -1 == packet failed NAT checks so block it, */
4054 /* 0 == no packet translation occurred, */
4055 /* 1 == packet was successfully translated. */
4056 /* Parameters: fin(I) - pointer to packet information */
4057 /* passp(I) - pointer to filtering result flags */
4058 /* */
4059 /* Check to see if an incoming packet should be changed. ICMP packets are */
4060 /* first checked to see if they match an existing entry (if an error), */
4061 /* otherwise a search of the current NAT table is made. If neither results */
4062 /* in a match then a search for a matching NAT rule is made. Create a new */
4063 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */
4064 /* packet header(s) as required. */
4065 /* ------------------------------------------------------------------------ */
4066 int fr_checknatin(fin, passp)
4067 fr_info_t *fin;
4068 u_32_t *passp;
4070 ipnat_t *np, *npnext;
4071 u_int nflags, natadd;
4072 int rval, natfailed;
4073 struct ifnet *ifp;
4074 struct in_addr in;
4075 icmphdr_t *icmp;
4076 tcphdr_t *tcp;
4077 u_short dport;
4078 nat_t *nat;
4079 u_32_t iph;
4081 if (fr_nat_lock != 0)
4082 return 0;
4083 if (nat_stats.ns_rules == 0 && nat_instances == NULL)
4084 return 0;
4086 tcp = NULL;
4087 icmp = NULL;
4088 dport = 0;
4089 natadd = 1;
4090 nflags = 0;
4091 natfailed = 0;
4092 ifp = fin->fin_ifp;
4094 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4095 switch (fin->fin_p)
4097 case IPPROTO_TCP :
4098 nflags = IPN_TCP;
4099 break;
4100 case IPPROTO_UDP :
4101 nflags = IPN_UDP;
4102 break;
4103 case IPPROTO_ICMP :
4104 icmp = fin->fin_dp;
4107 * This is an incoming packet, so the destination is
4108 * the icmp_id and the source port equals 0
4110 if (nat_icmpquerytype4(icmp->icmp_type)) {
4111 nflags = IPN_ICMPQUERY;
4112 dport = icmp->icmp_id;
4113 } break;
4114 default :
4115 break;
4118 if ((nflags & IPN_TCPUDP)) {
4119 tcp = fin->fin_dp;
4120 dport = tcp->th_dport;
4124 in = fin->fin_dst;
4126 READ_ENTER(&ipf_nat);
4128 if (((fin->fin_flx & FI_ICMPERR) != 0) &&
4129 (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4130 /*EMPTY*/;
4131 else if ((fin->fin_flx & FI_FRAG) &&
4132 (nat = fr_nat_knownfrag(fin)))
4133 natadd = 0;
4134 else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4135 fin->fin_src, in))) {
4136 nflags = nat->nat_flags;
4137 } else if (fin->fin_off == 0) {
4138 u_32_t hv, msk, rmsk;
4140 msk = 0xffffffff;
4141 rmsk = rdr_masks;
4143 * If there is no current entry in the nat table for this IP#,
4144 * create one for it (if there is a matching rule).
4146 maskloop:
4147 iph = in.s_addr & htonl(msk);
4148 hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
4149 for (np = rdr_rules[hv]; np; np = npnext) {
4150 npnext = np->in_rnext;
4151 if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4152 continue;
4153 if (np->in_v != fin->fin_v)
4154 continue;
4155 if (np->in_p && (np->in_p != fin->fin_p))
4156 continue;
4157 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4158 continue;
4159 if (np->in_flags & IPN_FILTER) {
4160 if (!nat_match(fin, np))
4161 continue;
4162 } else {
4163 if ((in.s_addr & np->in_outmsk) != np->in_outip)
4164 continue;
4165 if (np->in_pmin &&
4166 ((ntohs(np->in_pmax) < ntohs(dport)) ||
4167 (ntohs(dport) < ntohs(np->in_pmin))))
4168 continue;
4171 if (*np->in_plabel != '\0') {
4172 if (!appr_ok(fin, tcp, np)) {
4173 continue;
4178 * If we've matched a round-robin rule but it has
4179 * moved in the list since we got it, start over as
4180 * this is now no longer correct.
4182 MUTEX_ENTER(&ipf_nat_new);
4183 if ((npnext != np->in_rnext) &&
4184 (np->in_flags & IPN_ROUNDR)) {
4185 MUTEX_EXIT(&ipf_nat_new);
4186 goto maskloop;
4188 nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4189 MUTEX_EXIT(&ipf_nat_new);
4190 if (nat != NULL) {
4191 natfailed = 0;
4192 break;
4194 natfailed = -1;
4197 if ((np == NULL) && (rmsk != 0)) {
4198 while (rmsk) {
4199 msk <<= 1;
4200 if (rmsk & 0x80000000)
4201 break;
4202 rmsk <<= 1;
4204 if (rmsk != 0) {
4205 rmsk <<= 1;
4206 goto maskloop;
4211 if (nat != NULL) {
4212 rval = fr_natin(fin, nat, natadd, nflags);
4213 if (rval == 1) {
4214 MUTEX_ENTER(&nat->nat_lock);
4215 nat_update(fin, nat);
4216 nat->nat_bytes[0] += fin->fin_plen;
4217 nat->nat_pkts[0]++;
4218 fin->fin_pktnum = nat->nat_pkts[0];
4219 MUTEX_EXIT(&nat->nat_lock);
4221 } else
4222 rval = natfailed;
4223 RWLOCK_EXIT(&ipf_nat);
4225 if (rval == -1) {
4226 if (passp != NULL)
4227 *passp = FR_BLOCK;
4228 fin->fin_flx |= FI_BADNAT;
4230 return rval;
4234 /* ------------------------------------------------------------------------ */
4235 /* Function: fr_natin */
4236 /* Returns: int - -1 == packet failed NAT checks so block it, */
4237 /* 1 == packet was successfully translated. */
4238 /* Parameters: fin(I) - pointer to packet information */
4239 /* nat(I) - pointer to NAT structure */
4240 /* natadd(I) - flag indicating if it is safe to add frag cache */
4241 /* nflags(I) - NAT flags set for this packet */
4242 /* Locks Held: ipf_nat (READ) */
4243 /* */
4244 /* Translate a packet coming "in" on an interface. */
4245 /* ------------------------------------------------------------------------ */
4246 int fr_natin(fin, nat, natadd, nflags)
4247 fr_info_t *fin;
4248 nat_t *nat;
4249 int natadd;
4250 u_32_t nflags;
4252 icmphdr_t *icmp;
4253 tcphdr_t *tcp;
4254 ipnat_t *np;
4255 int i;
4257 tcp = NULL;
4258 np = nat->nat_ptr;
4259 fin->fin_fr = nat->nat_fr;
4261 if (np != NULL) {
4262 if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4263 (void) fr_nat_newfrag(fin, 0, nat);
4265 /* ------------------------------------------------------------- */
4266 /* A few quick notes: */
4267 /* Following are test conditions prior to calling the */
4268 /* appr_check routine. */
4269 /* */
4270 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */
4271 /* with a map rule, we attempt to match the packet's */
4272 /* source port against in_dport, otherwise we'd compare the */
4273 /* packet's destination. */
4274 /* ------------------------------------------------------------- */
4275 if (np->in_apr != NULL) {
4276 i = appr_check(fin, nat);
4277 if (i == -1) {
4278 return -1;
4283 #ifdef IPFILTER_SYNC
4284 ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4285 #endif
4287 fin->fin_ip->ip_dst = nat->nat_inip;
4288 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4289 if (nflags & IPN_TCPUDP)
4290 tcp = fin->fin_dp;
4293 * Fix up checksums, not by recalculating them, but
4294 * simply computing adjustments.
4295 * Why only do this for some platforms on inbound packets ?
4296 * Because for those that it is done, IP processing is yet to happen
4297 * and so the IPv4 header checksum has not yet been evaluated.
4298 * Perhaps it should always be done for the benefit of things like
4299 * fast forwarding (so that it doesn't need to be recomputed) but with
4300 * header checksum offloading, perhaps it is a moot point.
4302 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4303 defined(__osf__) || defined(linux)
4304 if (nat->nat_dir == NAT_OUTBOUND)
4305 fix_incksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4306 else
4307 fix_outcksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4308 #endif
4310 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4311 u_short *csump;
4313 if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4314 tcp->th_dport = nat->nat_inport;
4315 fin->fin_data[1] = ntohs(nat->nat_inport);
4319 if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4320 icmp = fin->fin_dp;
4322 icmp->icmp_id = nat->nat_inport;
4325 csump = nat_proto(fin, nat, nflags);
4328 * The above comments do not hold for layer 4 (or higher)
4329 * checksums...
4331 if (csump != NULL) {
4332 if (nat->nat_dir == NAT_OUTBOUND)
4333 fix_incksum(fin, csump, nat->nat_sumd[0]);
4334 else
4335 fix_outcksum(fin, csump, nat->nat_sumd[0]);
4338 ATOMIC_INCL(nat_stats.ns_mapped[0]);
4339 fin->fin_flx |= FI_NATED;
4340 if (np != NULL && np->in_tag.ipt_num[0] != 0)
4341 fin->fin_nattag = &np->in_tag;
4342 return 1;
4346 /* ------------------------------------------------------------------------ */
4347 /* Function: nat_proto */
4348 /* Returns: u_short* - pointer to transport header checksum to update, */
4349 /* NULL if the transport protocol is not recognised */
4350 /* as needing a checksum update. */
4351 /* Parameters: fin(I) - pointer to packet information */
4352 /* nat(I) - pointer to NAT structure */
4353 /* nflags(I) - NAT flags set for this packet */
4354 /* */
4355 /* Return the pointer to the checksum field for each protocol so understood.*/
4356 /* If support for making other changes to a protocol header is required, */
4357 /* that is not strictly 'address' translation, such as clamping the MSS in */
4358 /* TCP down to a specific value, then do it from here. */
4359 /* ------------------------------------------------------------------------ */
4360 u_short *nat_proto(fin, nat, nflags)
4361 fr_info_t *fin;
4362 nat_t *nat;
4363 u_int nflags;
4365 icmphdr_t *icmp;
4366 u_short *csump;
4367 tcphdr_t *tcp;
4368 udphdr_t *udp;
4370 csump = NULL;
4371 if (fin->fin_out == 0) {
4372 fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4373 } else {
4374 fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4377 switch (fin->fin_p)
4379 case IPPROTO_TCP :
4380 tcp = fin->fin_dp;
4382 csump = &tcp->th_sum;
4385 * Do a MSS CLAMPING on a SYN packet,
4386 * only deal IPv4 for now.
4388 if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4389 nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump);
4391 break;
4393 case IPPROTO_UDP :
4394 udp = fin->fin_dp;
4396 if (udp->uh_sum)
4397 csump = &udp->uh_sum;
4398 break;
4400 case IPPROTO_ICMP :
4401 icmp = fin->fin_dp;
4403 if ((nflags & IPN_ICMPQUERY) != 0) {
4404 if (icmp->icmp_cksum != 0)
4405 csump = &icmp->icmp_cksum;
4407 break;
4409 return csump;
4413 /* ------------------------------------------------------------------------ */
4414 /* Function: fr_natunload */
4415 /* Returns: Nil */
4416 /* Parameters: Nil */
4417 /* */
4418 /* Free all memory used by NAT structures allocated at runtime. */
4419 /* ------------------------------------------------------------------------ */
4420 void fr_natunload()
4422 ipftq_t *ifq, *ifqnext;
4424 (void) nat_clearlist();
4425 (void) nat_flushtable();
4428 * Proxy timeout queues are not cleaned here because although they
4429 * exist on the NAT list, appr_unload is called after fr_natunload
4430 * and the proxies actually are responsible for them being created.
4431 * Should the proxy timeouts have their own list? There's no real
4432 * justification as this is the only complication.
4434 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4435 ifqnext = ifq->ifq_next;
4436 if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4437 (fr_deletetimeoutqueue(ifq) == 0))
4438 fr_freetimeoutqueue(ifq);
4441 if (nat_table[0] != NULL) {
4442 KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
4443 nat_table[0] = NULL;
4445 if (nat_table[1] != NULL) {
4446 KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
4447 nat_table[1] = NULL;
4449 if (nat_rules != NULL) {
4450 KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
4451 nat_rules = NULL;
4453 if (rdr_rules != NULL) {
4454 KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
4455 rdr_rules = NULL;
4457 if (ipf_hm_maptable != NULL) {
4458 KFREES(ipf_hm_maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
4459 ipf_hm_maptable = NULL;
4461 if (nat_stats.ns_bucketlen[0] != NULL) {
4462 KFREES(nat_stats.ns_bucketlen[0],
4463 sizeof(u_long *) * ipf_nattable_sz);
4464 nat_stats.ns_bucketlen[0] = NULL;
4466 if (nat_stats.ns_bucketlen[1] != NULL) {
4467 KFREES(nat_stats.ns_bucketlen[1],
4468 sizeof(u_long *) * ipf_nattable_sz);
4469 nat_stats.ns_bucketlen[1] = NULL;
4472 if (fr_nat_maxbucket_reset == 1)
4473 fr_nat_maxbucket = 0;
4475 if (fr_nat_init == 1) {
4476 fr_nat_init = 0;
4477 fr_sttab_destroy(nat_tqb);
4479 RW_DESTROY(&ipf_natfrag);
4480 RW_DESTROY(&ipf_nat);
4482 MUTEX_DESTROY(&ipf_nat_new);
4483 MUTEX_DESTROY(&ipf_natio);
4485 MUTEX_DESTROY(&nat_udptq.ifq_lock);
4486 MUTEX_DESTROY(&nat_icmptq.ifq_lock);
4487 MUTEX_DESTROY(&nat_iptq.ifq_lock);
4492 /* ------------------------------------------------------------------------ */
4493 /* Function: fr_natexpire */
4494 /* Returns: Nil */
4495 /* Parameters: Nil */
4496 /* */
4497 /* Check all of the timeout queues for entries at the top which need to be */
4498 /* expired. */
4499 /* ------------------------------------------------------------------------ */
4500 void fr_natexpire()
4502 ipftq_t *ifq, *ifqnext;
4503 ipftqent_t *tqe, *tqn;
4504 int i;
4505 SPL_INT(s);
4507 SPL_NET(s);
4508 WRITE_ENTER(&ipf_nat);
4509 for (ifq = nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4510 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4511 if (tqe->tqe_die > fr_ticks)
4512 break;
4513 tqn = tqe->tqe_next;
4514 nat_delete(tqe->tqe_parent, NL_EXPIRE);
4518 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4519 ifqnext = ifq->ifq_next;
4521 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4522 if (tqe->tqe_die > fr_ticks)
4523 break;
4524 tqn = tqe->tqe_next;
4525 nat_delete(tqe->tqe_parent, NL_EXPIRE);
4529 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4530 ifqnext = ifq->ifq_next;
4532 if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4533 (ifq->ifq_ref == 0)) {
4534 fr_freetimeoutqueue(ifq);
4538 if (fr_nat_doflush != 0) {
4539 nat_extraflush(2);
4540 fr_nat_doflush = 0;
4543 RWLOCK_EXIT(&ipf_nat);
4544 SPL_X(s);
4548 /* ------------------------------------------------------------------------ */
4549 /* Function: fr_natsync */
4550 /* Returns: Nil */
4551 /* Parameters: ifp(I) - pointer to network interface */
4552 /* */
4553 /* Walk through all of the currently active NAT sessions, looking for those */
4554 /* which need to have their translated address updated. */
4555 /* ------------------------------------------------------------------------ */
4556 void fr_natsync(ifp)
4557 void *ifp;
4559 u_32_t sum1, sum2, sumd;
4560 struct in_addr in;
4561 ipnat_t *n;
4562 nat_t *nat;
4563 void *ifp2;
4564 SPL_INT(s);
4566 if (fr_running <= 0)
4567 return;
4570 * Change IP addresses for NAT sessions for any protocol except TCP
4571 * since it will break the TCP connection anyway. The only rules
4572 * which will get changed are those which are "map ... -> 0/32",
4573 * where the rule specifies the address is taken from the interface.
4575 SPL_NET(s);
4576 WRITE_ENTER(&ipf_nat);
4578 if (fr_running <= 0) {
4579 RWLOCK_EXIT(&ipf_nat);
4580 return;
4583 for (nat = nat_instances; nat; nat = nat->nat_next) {
4584 if ((nat->nat_flags & IPN_TCP) != 0)
4585 continue;
4586 n = nat->nat_ptr;
4587 if ((n == NULL) ||
4588 (n->in_outip != 0) || (n->in_outmsk != 0xffffffff))
4589 continue;
4590 if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) ||
4591 (ifp == nat->nat_ifps[1]))) {
4592 nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], 4);
4593 if (nat->nat_ifnames[1][0] != '\0') {
4594 nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1],
4596 } else
4597 nat->nat_ifps[1] = nat->nat_ifps[0];
4598 ifp2 = nat->nat_ifps[0];
4599 if (ifp2 == NULL)
4600 continue;
4603 * Change the map-to address to be the same as the
4604 * new one.
4606 sum1 = nat->nat_outip.s_addr;
4607 if (fr_ifpaddr(4, FRI_NORMAL, ifp2, &in, NULL) != -1)
4608 nat->nat_outip = in;
4609 sum2 = nat->nat_outip.s_addr;
4611 if (sum1 == sum2)
4612 continue;
4614 * Readjust the checksum adjustment to take into
4615 * account the new IP#.
4617 CALC_SUMD(sum1, sum2, sumd);
4618 /* XXX - dont change for TCP when solaris does
4619 * hardware checksumming.
4621 sumd += nat->nat_sumd[0];
4622 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4623 nat->nat_sumd[1] = nat->nat_sumd[0];
4627 for (n = nat_list; (n != NULL); n = n->in_next) {
4628 if ((ifp == NULL) || (n->in_ifps[0] == ifp))
4629 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
4630 if ((ifp == NULL) || (n->in_ifps[1] == ifp))
4631 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
4633 RWLOCK_EXIT(&ipf_nat);
4634 SPL_X(s);
4638 /* ------------------------------------------------------------------------ */
4639 /* Function: nat_icmpquerytype4 */
4640 /* Returns: int - 1 == success, 0 == failure */
4641 /* Parameters: icmptype(I) - ICMP type number */
4642 /* */
4643 /* Tests to see if the ICMP type number passed is a query/response type or */
4644 /* not. */
4645 /* ------------------------------------------------------------------------ */
4646 static int nat_icmpquerytype4(icmptype)
4647 int icmptype;
4651 * For the ICMP query NAT code, it is essential that both the query
4652 * and the reply match on the NAT rule. Because the NAT structure
4653 * does not keep track of the icmptype, and a single NAT structure
4654 * is used for all icmp types with the same src, dest and id, we
4655 * simply define the replies as queries as well. The funny thing is,
4656 * altough it seems silly to call a reply a query, this is exactly
4657 * as it is defined in the IPv4 specification
4660 switch (icmptype)
4663 case ICMP_ECHOREPLY:
4664 case ICMP_ECHO:
4665 /* route aedvertisement/solliciation is currently unsupported: */
4666 /* it would require rewriting the ICMP data section */
4667 case ICMP_TSTAMP:
4668 case ICMP_TSTAMPREPLY:
4669 case ICMP_IREQ:
4670 case ICMP_IREQREPLY:
4671 case ICMP_MASKREQ:
4672 case ICMP_MASKREPLY:
4673 return 1;
4674 default:
4675 return 0;
4680 /* ------------------------------------------------------------------------ */
4681 /* Function: nat_log */
4682 /* Returns: Nil */
4683 /* Parameters: nat(I) - pointer to NAT structure */
4684 /* type(I) - type of log entry to create */
4685 /* */
4686 /* Creates a NAT log entry. */
4687 /* ------------------------------------------------------------------------ */
4688 void nat_log(nat, type)
4689 struct nat *nat;
4690 u_int type;
4692 #ifdef IPFILTER_LOG
4693 # ifndef LARGE_NAT
4694 struct ipnat *np;
4695 int rulen;
4696 # endif
4697 struct natlog natl;
4698 void *items[1];
4699 size_t sizes[1];
4700 int types[1];
4702 natl.nl_inip = nat->nat_inip;
4703 natl.nl_outip = nat->nat_outip;
4704 natl.nl_origip = nat->nat_oip;
4705 natl.nl_bytes[0] = nat->nat_bytes[0];
4706 natl.nl_bytes[1] = nat->nat_bytes[1];
4707 natl.nl_pkts[0] = nat->nat_pkts[0];
4708 natl.nl_pkts[1] = nat->nat_pkts[1];
4709 natl.nl_origport = nat->nat_oport;
4710 natl.nl_inport = nat->nat_inport;
4711 natl.nl_outport = nat->nat_outport;
4712 natl.nl_p = nat->nat_p;
4713 natl.nl_type = type;
4714 natl.nl_rule = -1;
4715 # ifndef LARGE_NAT
4716 if (nat->nat_ptr != NULL) {
4717 for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
4718 if (np == nat->nat_ptr) {
4719 natl.nl_rule = rulen;
4720 break;
4723 # endif
4724 items[0] = &natl;
4725 sizes[0] = sizeof(natl);
4726 types[0] = 0;
4728 (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
4729 #endif
4733 #if defined(__OpenBSD__)
4734 /* ------------------------------------------------------------------------ */
4735 /* Function: nat_ifdetach */
4736 /* Returns: Nil */
4737 /* Parameters: ifp(I) - pointer to network interface */
4738 /* */
4739 /* Compatibility interface for OpenBSD to trigger the correct updating of */
4740 /* interface references within IPFilter. */
4741 /* ------------------------------------------------------------------------ */
4742 void nat_ifdetach(ifp)
4743 void *ifp;
4745 frsync(ifp);
4746 return;
4748 #endif
4751 /* ------------------------------------------------------------------------ */
4752 /* Function: fr_ipnatderef */
4753 /* Returns: Nil */
4754 /* Parameters: isp(I) - pointer to pointer to NAT rule */
4755 /* Write Locks: ipf_nat */
4756 /* */
4757 /* ------------------------------------------------------------------------ */
4758 void fr_ipnatderef(inp)
4759 ipnat_t **inp;
4761 ipnat_t *in;
4763 in = *inp;
4764 *inp = NULL;
4765 in->in_space++;
4766 in->in_use--;
4767 if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4768 if (in->in_apr)
4769 appr_free(in->in_apr);
4770 MUTEX_DESTROY(&in->in_lock);
4771 KFREE(in);
4772 nat_stats.ns_rules--;
4773 #if SOLARIS && !defined(_INET_IP_STACK_H)
4774 if (nat_stats.ns_rules == 0)
4775 pfil_delayed_copy = 1;
4776 #endif
4781 /* ------------------------------------------------------------------------ */
4782 /* Function: fr_natderef */
4783 /* Returns: Nil */
4784 /* Parameters: isp(I) - pointer to pointer to NAT table entry */
4785 /* */
4786 /* Decrement the reference counter for this NAT table entry and free it if */
4787 /* there are no more things using it. */
4788 /* */
4789 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4790 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
4791 /* incremented. If nat_ref == 1 then we shouldn't decrement it here */
4792 /* because nat_delete() will do that and send nat_ref to -1. */
4793 /* */
4794 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
4795 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
4796 /* ------------------------------------------------------------------------ */
4797 void fr_natderef(natp)
4798 nat_t **natp;
4800 nat_t *nat;
4802 nat = *natp;
4803 *natp = NULL;
4805 MUTEX_ENTER(&nat->nat_lock);
4806 if (nat->nat_ref > 1) {
4807 nat->nat_ref--;
4808 MUTEX_EXIT(&nat->nat_lock);
4809 return;
4811 MUTEX_EXIT(&nat->nat_lock);
4813 WRITE_ENTER(&ipf_nat);
4814 nat_delete(nat, NL_EXPIRE);
4815 RWLOCK_EXIT(&ipf_nat);
4819 /* ------------------------------------------------------------------------ */
4820 /* Function: fr_natclone */
4821 /* Returns: ipstate_t* - NULL == cloning failed, */
4822 /* else pointer to new state structure */
4823 /* Parameters: fin(I) - pointer to packet information */
4824 /* is(I) - pointer to master state structure */
4825 /* Write Lock: ipf_nat */
4826 /* */
4827 /* Create a "duplcate" state table entry from the master. */
4828 /* ------------------------------------------------------------------------ */
4829 static nat_t *fr_natclone(fin, nat)
4830 fr_info_t *fin;
4831 nat_t *nat;
4833 frentry_t *fr;
4834 nat_t *clone;
4835 ipnat_t *np;
4837 KMALLOC(clone, nat_t *);
4838 if (clone == NULL)
4839 return NULL;
4840 bcopy((char *)nat, (char *)clone, sizeof(*clone));
4842 MUTEX_NUKE(&clone->nat_lock);
4844 clone->nat_aps = NULL;
4846 * Initialize all these so that nat_delete() doesn't cause a crash.
4848 clone->nat_tqe.tqe_pnext = NULL;
4849 clone->nat_tqe.tqe_next = NULL;
4850 clone->nat_tqe.tqe_ifq = NULL;
4851 clone->nat_tqe.tqe_parent = clone;
4853 clone->nat_flags &= ~SI_CLONE;
4854 clone->nat_flags |= SI_CLONED;
4856 if (clone->nat_hm)
4857 clone->nat_hm->hm_ref++;
4859 if (nat_insert(clone, fin->fin_rev) == -1) {
4860 KFREE(clone);
4861 return NULL;
4863 np = clone->nat_ptr;
4864 if (np != NULL) {
4865 if (nat_logging)
4866 nat_log(clone, (u_int)np->in_redir);
4867 np->in_use++;
4869 fr = clone->nat_fr;
4870 if (fr != NULL) {
4871 MUTEX_ENTER(&fr->fr_lock);
4872 fr->fr_ref++;
4873 MUTEX_EXIT(&fr->fr_lock);
4877 * Because the clone is created outside the normal loop of things and
4878 * TCP has special needs in terms of state, initialise the timeout
4879 * state of the new NAT from here.
4881 if (clone->nat_p == IPPROTO_TCP) {
4882 (void) fr_tcp_age(&clone->nat_tqe, fin, nat_tqb,
4883 clone->nat_flags);
4885 #ifdef IPFILTER_SYNC
4886 clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4887 #endif
4888 if (nat_logging)
4889 nat_log(clone, NL_CLONE);
4890 return clone;
4894 /* ------------------------------------------------------------------------ */
4895 /* Function: nat_wildok */
4896 /* Returns: int - 1 == packet's ports match wildcards */
4897 /* 0 == packet's ports don't match wildcards */
4898 /* Parameters: nat(I) - NAT entry */
4899 /* sport(I) - source port */
4900 /* dport(I) - destination port */
4901 /* flags(I) - wildcard flags */
4902 /* dir(I) - packet direction */
4903 /* */
4904 /* Use NAT entry and packet direction to determine which combination of */
4905 /* wildcard flags should be used. */
4906 /* ------------------------------------------------------------------------ */
4907 static int nat_wildok(nat, sport, dport, flags, dir)
4908 nat_t *nat;
4909 int sport;
4910 int dport;
4911 int flags;
4912 int dir;
4915 * When called by dir is set to
4916 * nat_inlookup NAT_INBOUND (0)
4917 * nat_outlookup NAT_OUTBOUND (1)
4919 * We simply combine the packet's direction in dir with the original
4920 * "intended" direction of that NAT entry in nat->nat_dir to decide
4921 * which combination of wildcard flags to allow.
4924 switch ((dir << 1) | nat->nat_dir)
4926 case 3: /* outbound packet / outbound entry */
4927 if (((nat->nat_inport == sport) ||
4928 (flags & SI_W_SPORT)) &&
4929 ((nat->nat_oport == dport) ||
4930 (flags & SI_W_DPORT)))
4931 return 1;
4932 break;
4933 case 2: /* outbound packet / inbound entry */
4934 if (((nat->nat_outport == sport) ||
4935 (flags & SI_W_DPORT)) &&
4936 ((nat->nat_oport == dport) ||
4937 (flags & SI_W_SPORT)))
4938 return 1;
4939 break;
4940 case 1: /* inbound packet / outbound entry */
4941 if (((nat->nat_oport == sport) ||
4942 (flags & SI_W_DPORT)) &&
4943 ((nat->nat_outport == dport) ||
4944 (flags & SI_W_SPORT)))
4945 return 1;
4946 break;
4947 case 0: /* inbound packet / inbound entry */
4948 if (((nat->nat_oport == sport) ||
4949 (flags & SI_W_SPORT)) &&
4950 ((nat->nat_outport == dport) ||
4951 (flags & SI_W_DPORT)))
4952 return 1;
4953 break;
4954 default:
4955 break;
4958 return(0);
4962 /* ------------------------------------------------------------------------ */
4963 /* Function: nat_mssclamp */
4964 /* Returns: Nil */
4965 /* Parameters: tcp(I) - pointer to TCP header */
4966 /* maxmss(I) - value to clamp the TCP MSS to */
4967 /* fin(I) - pointer to packet information */
4968 /* csump(I) - pointer to TCP checksum */
4969 /* */
4970 /* Check for MSS option and clamp it if necessary. If found and changed, */
4971 /* then the TCP header checksum will be updated to reflect the change in */
4972 /* the MSS. */
4973 /* ------------------------------------------------------------------------ */
4974 static void nat_mssclamp(tcp, maxmss, fin, csump)
4975 tcphdr_t *tcp;
4976 u_32_t maxmss;
4977 fr_info_t *fin;
4978 u_short *csump;
4980 u_char *cp, *ep, opt;
4981 int hlen, advance;
4982 u_32_t mss, sumd;
4984 hlen = TCP_OFF(tcp) << 2;
4985 if (hlen > sizeof(*tcp)) {
4986 cp = (u_char *)tcp + sizeof(*tcp);
4987 ep = (u_char *)tcp + hlen;
4989 while (cp < ep) {
4990 opt = cp[0];
4991 if (opt == TCPOPT_EOL)
4992 break;
4993 else if (opt == TCPOPT_NOP) {
4994 cp++;
4995 continue;
4998 if (cp + 1 >= ep)
4999 break;
5000 advance = cp[1];
5001 if ((cp + advance > ep) || (advance <= 0))
5002 break;
5003 switch (opt)
5005 case TCPOPT_MAXSEG:
5006 if (advance != 4)
5007 break;
5008 mss = cp[2] * 256 + cp[3];
5009 if (mss > maxmss) {
5010 cp[2] = maxmss / 256;
5011 cp[3] = maxmss & 0xff;
5012 CALC_SUMD(mss, maxmss, sumd);
5013 fix_outcksum(fin, csump, sumd);
5015 break;
5016 default:
5017 /* ignore unknown options */
5018 break;
5021 cp += advance;
5027 /* ------------------------------------------------------------------------ */
5028 /* Function: fr_setnatqueue */
5029 /* Returns: Nil */
5030 /* Parameters: nat(I)- pointer to NAT structure */
5031 /* rev(I) - forward(0) or reverse(1) direction */
5032 /* Locks: ipf_nat (read or write) */
5033 /* */
5034 /* Put the NAT entry on its default queue entry, using rev as a helped in */
5035 /* determining which queue it should be placed on. */
5036 /* ------------------------------------------------------------------------ */
5037 void fr_setnatqueue(nat, rev)
5038 nat_t *nat;
5039 int rev;
5041 ipftq_t *oifq, *nifq;
5043 if (nat->nat_ptr != NULL)
5044 nifq = nat->nat_ptr->in_tqehead[rev];
5045 else
5046 nifq = NULL;
5048 if (nifq == NULL) {
5049 switch (nat->nat_p)
5051 case IPPROTO_UDP :
5052 nifq = &nat_udptq;
5053 break;
5054 case IPPROTO_ICMP :
5055 nifq = &nat_icmptq;
5056 break;
5057 case IPPROTO_TCP :
5058 nifq = nat_tqb + nat->nat_tqe.tqe_state[rev];
5059 break;
5060 default :
5061 nifq = &nat_iptq;
5062 break;
5066 oifq = nat->nat_tqe.tqe_ifq;
5068 * If it's currently on a timeout queue, move it from one queue to
5069 * another, else put it on the end of the newly determined queue.
5071 if (oifq != NULL)
5072 fr_movequeue(&nat->nat_tqe, oifq, nifq);
5073 else
5074 fr_queueappend(&nat->nat_tqe, nifq, nat);
5075 return;
5079 /* ------------------------------------------------------------------------ */
5080 /* Function: nat_getnext */
5081 /* Returns: int - 0 == ok, else error */
5082 /* Parameters: t(I) - pointer to ipftoken structure */
5083 /* itp(I) - pointer to ipfgeniter_t structure */
5084 /* */
5085 /* Fetch the next nat/ipnat structure pointer from the linked list and */
5086 /* copy it out to the storage space pointed to by itp. The next item */
5087 /* in the list to look at is put back in the ipftoken struture. */
5088 /* ------------------------------------------------------------------------ */
5089 static int nat_getnext(t, itp)
5090 ipftoken_t *t;
5091 ipfgeniter_t *itp;
5093 hostmap_t *hm = NULL, *nexthm = NULL, zerohm;
5094 ipnat_t *ipn = NULL, *nextipnat = NULL, zeroipn;
5095 nat_t *nat = NULL, *nextnat = NULL, zeronat;
5096 int error = 0, count;
5097 char *dst;
5099 if (itp->igi_nitems < 1)
5100 return ENOSPC;
5102 READ_ENTER(&ipf_nat);
5105 * Get "previous" entry from the token and find the next entry.
5107 switch (itp->igi_type)
5109 case IPFGENITER_HOSTMAP :
5110 hm = t->ipt_data;
5111 if (hm == NULL) {
5112 nexthm = ipf_hm_maplist;
5113 } else {
5114 nexthm = hm->hm_next;
5116 break;
5118 case IPFGENITER_IPNAT :
5119 ipn = t->ipt_data;
5120 if (ipn == NULL) {
5121 nextipnat = nat_list;
5122 } else {
5123 nextipnat = ipn->in_next;
5125 break;
5127 case IPFGENITER_NAT :
5128 nat = t->ipt_data;
5129 if (nat == NULL) {
5130 nextnat = nat_instances;
5131 } else {
5132 nextnat = nat->nat_next;
5134 break;
5136 default :
5137 RWLOCK_EXIT(&ipf_nat);
5138 return EINVAL;
5141 dst = itp->igi_data;
5142 for (count = itp->igi_nitems; count > 0; count--) {
5144 * If we found an entry, add a reference and update the token.
5145 * Otherwise, zero out data to be returned and NULL out token.
5147 switch (itp->igi_type)
5149 case IPFGENITER_HOSTMAP :
5150 if (nexthm != NULL) {
5151 ATOMIC_INC32(nexthm->hm_ref);
5152 t->ipt_data = nexthm;
5153 } else {
5154 bzero(&zerohm, sizeof(zerohm));
5155 nexthm = &zerohm;
5156 t->ipt_data = NULL;
5158 break;
5160 case IPFGENITER_IPNAT :
5161 if (nextipnat != NULL) {
5162 ATOMIC_INC32(nextipnat->in_use);
5163 t->ipt_data = nextipnat;
5164 } else {
5165 bzero(&zeroipn, sizeof(zeroipn));
5166 nextipnat = &zeroipn;
5167 t->ipt_data = NULL;
5169 break;
5171 case IPFGENITER_NAT :
5172 if (nextnat != NULL) {
5173 MUTEX_ENTER(&nextnat->nat_lock);
5174 nextnat->nat_ref++;
5175 MUTEX_EXIT(&nextnat->nat_lock);
5176 t->ipt_data = nextnat;
5177 } else {
5178 bzero(&zeronat, sizeof(zeronat));
5179 nextnat = &zeronat;
5180 t->ipt_data = NULL;
5182 break;
5186 * Now that we have ref, it's save to give up lock.
5188 RWLOCK_EXIT(&ipf_nat);
5191 * Copy out data and clean up references and token as needed.
5193 switch (itp->igi_type)
5195 case IPFGENITER_HOSTMAP :
5196 error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5197 if (error != 0)
5198 error = EFAULT;
5199 if (hm != NULL) {
5200 WRITE_ENTER(&ipf_nat);
5201 fr_hostmapdel(&hm);
5202 RWLOCK_EXIT(&ipf_nat);
5204 if (t->ipt_data != NULL) {
5205 if (nexthm->hm_next == NULL) {
5206 t->ipt_data = NULL;
5207 break;
5209 dst += sizeof(*nexthm);
5210 hm = nexthm;
5211 nexthm = nexthm->hm_next;
5213 break;
5215 case IPFGENITER_IPNAT :
5216 error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5217 if (error != 0)
5218 error = EFAULT;
5219 if (ipn != NULL) {
5220 WRITE_ENTER(&ipf_nat);
5221 fr_ipnatderef(&ipn);
5222 RWLOCK_EXIT(&ipf_nat);
5224 if (t->ipt_data != NULL) {
5225 if (nextipnat->in_next == NULL) {
5226 t->ipt_data = NULL;
5227 break;
5229 dst += sizeof(*nextipnat);
5230 ipn = nextipnat;
5231 nextipnat = nextipnat->in_next;
5233 break;
5235 case IPFGENITER_NAT :
5236 error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5237 if (error != 0)
5238 error = EFAULT;
5239 if (nat != NULL) {
5240 fr_natderef(&nat);
5242 if (t->ipt_data != NULL) {
5243 if (nextnat->nat_next == NULL) {
5244 t->ipt_data = NULL;
5245 break;
5247 dst += sizeof(*nextnat);
5248 nat = nextnat;
5249 nextnat = nextnat->nat_next;
5251 break;
5254 if ((count == 1) || (error != 0))
5255 break;
5257 READ_ENTER(&ipf_nat);
5260 return error;
5264 /* ------------------------------------------------------------------------ */
5265 /* Function: nat_iterator */
5266 /* Returns: int - 0 == ok, else error */
5267 /* Parameters: token(I) - pointer to ipftoken structure */
5268 /* itp(I) - pointer to ipfgeniter_t structure */
5269 /* */
5270 /* This function acts as a handler for the SIOCGENITER ioctls that use a */
5271 /* generic structure to iterate through a list. There are three different */
5272 /* linked lists of NAT related information to go through: NAT rules, active */
5273 /* NAT mappings and the NAT fragment cache. */
5274 /* ------------------------------------------------------------------------ */
5275 static int nat_iterator(token, itp)
5276 ipftoken_t *token;
5277 ipfgeniter_t *itp;
5279 int error;
5281 if (itp->igi_data == NULL)
5282 return EFAULT;
5284 token->ipt_subtype = itp->igi_type;
5286 switch (itp->igi_type)
5288 case IPFGENITER_HOSTMAP :
5289 case IPFGENITER_IPNAT :
5290 case IPFGENITER_NAT :
5291 error = nat_getnext(token, itp);
5292 break;
5294 case IPFGENITER_NATFRAG :
5295 #ifdef USE_MUTEXES
5296 error = fr_nextfrag(token, itp, &ipfr_natlist,
5297 &ipfr_nattail, &ipf_natfrag);
5298 #else
5299 error = fr_nextfrag(token, itp, &ipfr_natlist, &ipfr_nattail);
5300 #endif
5301 break;
5302 default :
5303 error = EINVAL;
5304 break;
5307 return error;
5311 /* ------------------------------------------------------------------------ */
5312 /* Function: nat_extraflush */
5313 /* Returns: int - 0 == success, -1 == failure */
5314 /* Parameters: which(I) - how to flush the active NAT table */
5315 /* Write Locks: ipf_nat */
5316 /* */
5317 /* Flush nat tables. Three actions currently defined: */
5318 /* which == 0 : flush all nat table entries */
5319 /* which == 1 : flush TCP connections which have started to close but are */
5320 /* stuck for some reason. */
5321 /* which == 2 : flush TCP connections which have been idle for a long time, */
5322 /* starting at > 4 days idle and working back in successive half-*/
5323 /* days to at most 12 hours old. If this fails to free enough */
5324 /* slots then work backwards in half hour slots to 30 minutes. */
5325 /* If that too fails, then work backwards in 30 second intervals */
5326 /* for the last 30 minutes to at worst 30 seconds idle. */
5327 /* ------------------------------------------------------------------------ */
5328 static int nat_extraflush(which)
5329 int which;
5331 ipftq_t *ifq, *ifqnext;
5332 nat_t *nat, **natp;
5333 ipftqent_t *tqn;
5334 int removed;
5335 SPL_INT(s);
5337 removed = 0;
5339 SPL_NET(s);
5341 switch (which)
5343 case 0 :
5345 * Style 0 flush removes everything...
5347 for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5348 nat_delete(nat, NL_FLUSH);
5349 removed++;
5351 break;
5353 case 1 :
5355 * Since we're only interested in things that are closing,
5356 * we can start with the appropriate timeout queue.
5358 for (ifq = nat_tqb + IPF_TCPS_CLOSE_WAIT; ifq != NULL;
5359 ifq = ifq->ifq_next) {
5361 for (tqn = ifq->ifq_head; tqn != NULL; ) {
5362 nat = tqn->tqe_parent;
5363 tqn = tqn->tqe_next;
5364 if (nat->nat_p != IPPROTO_TCP)
5365 break;
5366 nat_delete(nat, NL_EXPIRE);
5367 removed++;
5372 * Also need to look through the user defined queues.
5374 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
5375 ifqnext = ifq->ifq_next;
5376 for (tqn = ifq->ifq_head; tqn != NULL; ) {
5377 nat = tqn->tqe_parent;
5378 tqn = tqn->tqe_next;
5379 if (nat->nat_p != IPPROTO_TCP)
5380 continue;
5382 if ((nat->nat_tcpstate[0] >
5383 IPF_TCPS_ESTABLISHED) &&
5384 (nat->nat_tcpstate[1] >
5385 IPF_TCPS_ESTABLISHED)) {
5386 nat_delete(nat, NL_EXPIRE);
5387 removed++;
5391 break;
5394 * Args 5-11 correspond to flushing those particular states
5395 * for TCP connections.
5397 case IPF_TCPS_CLOSE_WAIT :
5398 case IPF_TCPS_FIN_WAIT_1 :
5399 case IPF_TCPS_CLOSING :
5400 case IPF_TCPS_LAST_ACK :
5401 case IPF_TCPS_FIN_WAIT_2 :
5402 case IPF_TCPS_TIME_WAIT :
5403 case IPF_TCPS_CLOSED :
5404 tqn = nat_tqb[which].ifq_head;
5405 while (tqn != NULL) {
5406 nat = tqn->tqe_parent;
5407 tqn = tqn->tqe_next;
5408 nat_delete(nat, NL_FLUSH);
5409 removed++;
5411 break;
5413 default :
5414 if (which < 30)
5415 break;
5418 * Take a large arbitrary number to mean the number of seconds
5419 * for which which consider to be the maximum value we'll allow
5420 * the expiration to be.
5422 which = IPF_TTLVAL(which);
5423 for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5424 if (fr_ticks - nat->nat_touched > which) {
5425 nat_delete(nat, NL_FLUSH);
5426 removed++;
5427 } else
5428 natp = &nat->nat_next;
5430 break;
5433 if (which != 2) {
5434 SPL_X(s);
5435 return removed;
5439 * Asked to remove inactive entries because the table is full.
5441 if (fr_ticks - nat_last_force_flush > IPF_TTLVAL(5)) {
5442 nat_last_force_flush = fr_ticks;
5443 removed = ipf_queueflush(nat_flush_entry, nat_tqb, nat_utqe);
5446 SPL_X(s);
5447 return removed;
5451 /* ------------------------------------------------------------------------ */
5452 /* Function: nat_flush_entry */
5453 /* Returns: 0 - always succeeds */
5454 /* Parameters: entry(I) - pointer to NAT entry */
5455 /* Write Locks: ipf_nat */
5456 /* */
5457 /* This function is a stepping stone between ipf_queueflush() and */
5458 /* nat_dlete(). It is used so we can provide a uniform interface via the */
5459 /* ipf_queueflush() function. Since the nat_delete() function returns void */
5460 /* we translate that to mean it always succeeds in deleting something. */
5461 /* ------------------------------------------------------------------------ */
5462 static int nat_flush_entry(entry)
5463 void *entry;
5465 nat_delete(entry, NL_FLUSH);
5466 return 0;
5470 /* ------------------------------------------------------------------------ */
5471 /* Function: nat_gettable */
5472 /* Returns: int - 0 = success, else error */
5473 /* Parameters: data(I) - pointer to ioctl data */
5474 /* */
5475 /* This function handles ioctl requests for tables of nat information. */
5476 /* At present the only table it deals with is the hash bucket statistics. */
5477 /* ------------------------------------------------------------------------ */
5478 static int nat_gettable(data)
5479 char *data;
5481 ipftable_t table;
5482 int error;
5484 error = fr_inobj(data, &table, IPFOBJ_GTABLE);
5485 if (error != 0)
5486 return error;
5488 switch (table.ita_type)
5490 case IPFTABLE_BUCKETS_NATIN :
5491 error = COPYOUT(nat_stats.ns_bucketlen[0], table.ita_table,
5492 ipf_nattable_sz * sizeof(u_long));
5493 break;
5495 case IPFTABLE_BUCKETS_NATOUT :
5496 error = COPYOUT(nat_stats.ns_bucketlen[1], table.ita_table,
5497 ipf_nattable_sz * sizeof(u_long));
5498 break;
5500 default :
5501 return EINVAL;
5504 if (error != 0) {
5505 error = EFAULT;
5507 return error;
5511 /* ------------------------------------------------------------------------ */
5512 /* Function: nat_uncreate */
5513 /* Returns: Nil */
5514 /* Parameters: fin(I) - pointer to packet information */
5515 /* */
5516 /* This function is used to remove a NAT entry from the NAT table when we */
5517 /* decide that the create was actually in error. It is thus assumed that */
5518 /* fin_flx will have both FI_NATED and FI_NATNEW set. Because we're dealing */
5519 /* with the translated packet (not the original), we have to reverse the */
5520 /* lookup. Although doing the lookup is expensive (relatively speaking), it */
5521 /* is not anticipated that this will be a frequent occurance for normal */
5522 /* traffic patterns. */
5523 /* ------------------------------------------------------------------------ */
5524 void nat_uncreate(fin)
5525 fr_info_t *fin;
5527 int nflags;
5528 nat_t *nat;
5530 switch (fin->fin_p)
5532 case IPPROTO_TCP :
5533 nflags = IPN_TCP;
5534 break;
5535 case IPPROTO_UDP :
5536 nflags = IPN_UDP;
5537 break;
5538 default :
5539 nflags = 0;
5540 break;
5543 WRITE_ENTER(&ipf_nat);
5545 if (fin->fin_out == 0) {
5546 nat = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
5547 fin->fin_dst, fin->fin_src);
5548 } else {
5549 nat = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
5550 fin->fin_src, fin->fin_dst);
5553 if (nat != NULL) {
5554 nat_stats.ns_uncreate[fin->fin_out][0]++;
5555 nat_delete(nat, NL_DESTROY);
5556 } else {
5557 nat_stats.ns_uncreate[fin->fin_out][1]++;
5560 RWLOCK_EXIT(&ipf_nat);