1 /* $NetBSD: ip_state.c,v 1.34 2009/08/19 08:36:13 darrenr Exp $ */
4 * Copyright (C) 1995-2003 by Darren Reed.
6 * See the IPFILTER.LICENCE file for details on licencing.
8 * Copyright 2008 Sun Microsystems, Inc.
10 #if defined(KERNEL) || defined(_KERNEL)
16 #include <sys/errno.h>
17 #include <sys/types.h>
18 #include <sys/param.h>
20 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
22 # if (__NetBSD_Version__ < 399001400)
23 # include "opt_ipfilter_log.h"
25 # include "opt_ipfilter.h"
28 #if defined(_KERNEL) && defined(__FreeBSD_version) && \
29 (__FreeBSD_version >= 400000) && !defined(KLD_MODULE)
30 #include "opt_inet6.h"
32 #if !defined(_KERNEL) && !defined(__KERNEL__)
43 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
44 # include <sys/filio.h>
45 # include <sys/fcntl.h>
46 # if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM)
47 # include "opt_ipfilter.h"
50 # include <sys/ioctl.h>
54 # include <sys/protosw.h>
56 #include <sys/socket.h>
58 # include <sys/systm.h>
59 # if !defined(__SVR4) && !defined(__svr4__)
60 # include <sys/mbuf.h>
63 #if defined(__SVR4) || defined(__svr4__)
64 # include <sys/filio.h>
65 # include <sys/byteorder.h>
67 # include <sys/dditypes.h>
69 # include <sys/stream.h>
70 # include <sys/kmem.h>
77 #include <netinet/in.h>
78 #include <netinet/in_systm.h>
79 #include <netinet/ip.h>
80 #include <netinet/tcp.h>
82 # include <netinet/ip_var.h>
84 #if !defined(__hpux) && !defined(linux)
85 # include <netinet/tcp_fsm.h>
87 #include <netinet/udp.h>
88 #include <netinet/ip_icmp.h>
89 #include "netinet/ip_compat.h"
90 #include <netinet/tcpip.h>
91 #include "netinet/ip_fil.h"
92 #include "netinet/ip_nat.h"
93 #include "netinet/ip_frag.h"
94 #include "netinet/ip_state.h"
95 #include "netinet/ip_proxy.h"
97 #include "netinet/ip_sync.h"
100 #include "netinet/ip_scan.h"
103 #include <netinet/icmp6.h>
105 #if (__FreeBSD_version >= 300000)
106 # include <sys/malloc.h>
107 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
108 # include <sys/libkern.h>
109 # include <sys/systm.h>
112 /* END OF INCLUDES */
116 #if defined(__NetBSD__)
117 #include <sys/cdefs.h>
118 __KERNEL_RCSID(0, "$NetBSD: ip_state.c,v 1.34 2009/08/19 08:36:13 darrenr Exp $");
120 static const char sccsid
[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed";
121 static const char rcsid
[] = "@(#)Id: ip_state.c,v 2.186.2.98 2009/07/21 09:40:56 darrenr Exp";
125 static ipstate_t
**ips_table
= NULL
;
126 static u_long
*ips_seed
= NULL
;
127 static int ips_num
= 0;
128 static u_long ips_last_force_flush
= 0;
129 ips_stat_t ips_stats
;
132 static ipstate_t
*fr_checkicmp6matchingstate
__P((fr_info_t
*));
134 static ipstate_t
*fr_matchsrcdst
__P((fr_info_t
*, ipstate_t
*, i6addr_t
*,
135 i6addr_t
*, tcphdr_t
*, u_32_t
));
136 static ipstate_t
*fr_checkicmpmatchingstate
__P((fr_info_t
*));
137 static int fr_state_flush_entry
__P((void *));
138 static ips_stat_t
*fr_statetstats
__P((void));
139 static int fr_delstate
__P((ipstate_t
*, int));
140 static int fr_state_remove
__P((void *));
141 static int ipf_state_match
__P((ipstate_t
*is1
, ipstate_t
*is2
));
142 static int ipf_state_matchaddresses
__P((ipstate_t
*is1
, ipstate_t
*is2
));
143 static int ipf_state_matchipv4addrs
__P((ipstate_t
*is1
, ipstate_t
*is2
));
144 static int ipf_state_matchipv6addrs
__P((ipstate_t
*is1
, ipstate_t
*is2
));
145 static int ipf_state_matchisps
__P((ipstate_t
*is1
, ipstate_t
*is2
));
146 static int ipf_state_matchports
__P((udpinfo_t
*is1
, udpinfo_t
*is2
));
147 static void fr_ipsmove
__P((ipstate_t
*, u_int
));
148 static int fr_tcpstate
__P((fr_info_t
*, tcphdr_t
*, ipstate_t
*));
149 static int fr_tcpoptions
__P((fr_info_t
*, tcphdr_t
*, tcpdata_t
*));
150 static ipstate_t
*fr_stclone
__P((fr_info_t
*, tcphdr_t
*, ipstate_t
*));
151 static void fr_fixinisn
__P((fr_info_t
*, ipstate_t
*));
152 static void fr_fixoutisn
__P((fr_info_t
*, ipstate_t
*));
153 static void fr_checknewisn
__P((fr_info_t
*, ipstate_t
*));
154 static int fr_stateiter
__P((ipftoken_t
*, ipfgeniter_t
*));
155 static int fr_stgettable
__P((char *));
157 int fr_stputent
__P((void *));
158 int fr_stgetent
__P((void *));
160 #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */
161 #define FIVE_DAYS (5 * ONE_DAY)
162 #define DOUBLE_HASH(x) (((x) + ips_seed[(x) % fr_statesize]) % fr_statesize)
164 u_long fr_tcpidletimeout
= FIVE_DAYS
,
165 fr_tcpclosewait
= IPF_TTLVAL(2 * TCP_MSL
),
166 fr_tcplastack
= IPF_TTLVAL(30),
167 fr_tcptimeout
= IPF_TTLVAL(2 * TCP_MSL
),
168 fr_tcptimewait
= IPF_TTLVAL(2 * TCP_MSL
),
169 fr_tcpclosed
= IPF_TTLVAL(30),
170 fr_tcphalfclosed
= IPF_TTLVAL(2 * 3600), /* 2 hours */
171 fr_udptimeout
= IPF_TTLVAL(120),
172 fr_udpacktimeout
= IPF_TTLVAL(12),
173 fr_icmptimeout
= IPF_TTLVAL(60),
174 fr_icmpacktimeout
= IPF_TTLVAL(6),
175 fr_iptimeout
= IPF_TTLVAL(60);
176 int fr_statemax
= IPSTATE_MAX
,
177 fr_statesize
= IPSTATE_SIZE
;
178 int fr_state_doflush
= 0,
180 fr_state_maxbucket
= 0,
181 fr_state_maxbucket_reset
= 1,
183 ipftq_t ips_tqtqb
[IPF_TCP_NSTATES
],
192 int ipstate_logging
= 1;
194 int ipstate_logging
= 0;
196 ipstate_t
*ips_list
= NULL
;
199 /* ------------------------------------------------------------------------ */
200 /* Function: fr_stateinit */
201 /* Returns: int - 0 == success, -1 == failure */
202 /* Parameters: Nil */
204 /* Initialise all the global variables used within the state code. */
205 /* This action also includes initiailising locks. */
206 /* ------------------------------------------------------------------------ */
209 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL)
214 KMALLOCS(ips_table
, ipstate_t
**, fr_statesize
* sizeof(ipstate_t
*));
215 if (ips_table
== NULL
)
217 bzero((char *)ips_table
, fr_statesize
* sizeof(ipstate_t
*));
219 KMALLOCS(ips_seed
, u_long
*, fr_statesize
* sizeof(*ips_seed
));
220 if (ips_seed
== NULL
)
222 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL)
226 for (i
= 0; i
< fr_statesize
; i
++) {
228 * XXX - ips_seed[X] should be a random number of sorts.
230 #if !defined(NEED_LOCAL_RAND) && defined(_KERNEL)
231 ips_seed
[i
] = arc4random();
233 ips_seed
[i
] = ((u_long
)ips_seed
+ i
) * fr_statesize
;
234 ips_seed
[i
] += tv
.tv_sec
;
235 ips_seed
[i
] *= (u_long
)ips_seed
;
236 ips_seed
[i
] ^= 0x5a5aa5a5;
237 ips_seed
[i
] *= fr_statemax
;
240 #if defined(NEED_LOCAL_RAND) && defined(_KERNEL)
241 ipf_rand_push(ips_seed
, fr_statesize
* sizeof(*ips_seed
));
244 /* fill icmp reply type table */
245 for (i
= 0; i
<= ICMP_MAXTYPE
; i
++)
246 icmpreplytype4
[i
] = -1;
247 icmpreplytype4
[ICMP_ECHO
] = ICMP_ECHOREPLY
;
248 icmpreplytype4
[ICMP_TSTAMP
] = ICMP_TSTAMPREPLY
;
249 icmpreplytype4
[ICMP_IREQ
] = ICMP_IREQREPLY
;
250 icmpreplytype4
[ICMP_MASKREQ
] = ICMP_MASKREPLY
;
252 /* fill icmp reply type table */
253 for (i
= 0; i
<= ICMP6_MAXTYPE
; i
++)
254 icmpreplytype6
[i
] = -1;
255 icmpreplytype6
[ICMP6_ECHO_REQUEST
] = ICMP6_ECHO_REPLY
;
256 icmpreplytype6
[ICMP6_MEMBERSHIP_QUERY
] = ICMP6_MEMBERSHIP_REPORT
;
257 icmpreplytype6
[ICMP6_NI_QUERY
] = ICMP6_NI_REPLY
;
258 icmpreplytype6
[ND_ROUTER_SOLICIT
] = ND_ROUTER_ADVERT
;
259 icmpreplytype6
[ND_NEIGHBOR_SOLICIT
] = ND_NEIGHBOR_ADVERT
;
262 KMALLOCS(ips_stats
.iss_bucketlen
, u_long
*,
263 fr_statesize
* sizeof(u_long
));
264 if (ips_stats
.iss_bucketlen
== NULL
)
266 bzero((char *)ips_stats
.iss_bucketlen
, fr_statesize
* sizeof(u_long
));
268 if (fr_state_maxbucket
== 0) {
269 for (i
= fr_statesize
; i
> 0; i
>>= 1)
270 fr_state_maxbucket
++;
271 fr_state_maxbucket
*= 2;
274 ips_stats
.iss_tcptab
= ips_tqtqb
;
275 fr_sttab_init(ips_tqtqb
);
276 ips_tqtqb
[IPF_TCP_NSTATES
- 1].ifq_next
= &ips_udptq
;
277 ips_udptq
.ifq_ttl
= (u_long
)fr_udptimeout
;
278 ips_udptq
.ifq_ref
= 1;
279 ips_udptq
.ifq_head
= NULL
;
280 ips_udptq
.ifq_tail
= &ips_udptq
.ifq_head
;
281 MUTEX_INIT(&ips_udptq
.ifq_lock
, "ipftq udp tab");
282 ips_udptq
.ifq_next
= &ips_udpacktq
;
283 ips_udpacktq
.ifq_ttl
= (u_long
)fr_udpacktimeout
;
284 ips_udpacktq
.ifq_ref
= 1;
285 ips_udpacktq
.ifq_head
= NULL
;
286 ips_udpacktq
.ifq_tail
= &ips_udpacktq
.ifq_head
;
287 MUTEX_INIT(&ips_udpacktq
.ifq_lock
, "ipftq udpack tab");
288 ips_udpacktq
.ifq_next
= &ips_icmptq
;
289 ips_icmptq
.ifq_ttl
= (u_long
)fr_icmptimeout
;
290 ips_icmptq
.ifq_ref
= 1;
291 ips_icmptq
.ifq_head
= NULL
;
292 ips_icmptq
.ifq_tail
= &ips_icmptq
.ifq_head
;
293 MUTEX_INIT(&ips_icmptq
.ifq_lock
, "ipftq icmp tab");
294 ips_icmptq
.ifq_next
= &ips_icmpacktq
;
295 ips_icmpacktq
.ifq_ttl
= (u_long
)fr_icmpacktimeout
;
296 ips_icmpacktq
.ifq_ref
= 1;
297 ips_icmpacktq
.ifq_head
= NULL
;
298 ips_icmpacktq
.ifq_tail
= &ips_icmpacktq
.ifq_head
;
299 MUTEX_INIT(&ips_icmpacktq
.ifq_lock
, "ipftq icmpack tab");
300 ips_icmpacktq
.ifq_next
= &ips_iptq
;
301 ips_iptq
.ifq_ttl
= (u_long
)fr_iptimeout
;
302 ips_iptq
.ifq_ref
= 1;
303 ips_iptq
.ifq_head
= NULL
;
304 ips_iptq
.ifq_tail
= &ips_iptq
.ifq_head
;
305 MUTEX_INIT(&ips_iptq
.ifq_lock
, "ipftq ip tab");
306 ips_iptq
.ifq_next
= &ips_deletetq
;
307 ips_deletetq
.ifq_ttl
= (u_long
)1;
308 ips_deletetq
.ifq_ref
= 1;
309 ips_deletetq
.ifq_head
= NULL
;
310 ips_deletetq
.ifq_tail
= &ips_deletetq
.ifq_head
;
311 MUTEX_INIT(&ips_deletetq
.ifq_lock
, "state delete queue");
312 ips_deletetq
.ifq_next
= NULL
;
314 RWLOCK_INIT(&ipf_state
, "ipf IP state rwlock");
315 MUTEX_INIT(&ipf_stinsert
, "ipf state insert mutex");
318 ips_last_force_flush
= fr_ticks
;
323 /* ------------------------------------------------------------------------ */
324 /* Function: fr_stateunload */
326 /* Parameters: Nil */
328 /* Release and destroy any resources acquired or initialised so that */
329 /* IPFilter can be unloaded or re-initialised. */
330 /* ------------------------------------------------------------------------ */
331 void fr_stateunload()
333 ipftq_t
*ifq
, *ifqnext
;
336 while ((is
= ips_list
) != NULL
)
337 fr_delstate(is
, ISL_UNLOAD
);
340 * Proxy timeout queues are not cleaned here because although they
341 * exist on the state list, appr_unload is called after fr_stateunload
342 * and the proxies actually are responsible for them being created.
343 * Should the proxy timeouts have their own list? There's no real
344 * justification as this is the only complicationA
346 for (ifq
= ips_utqe
; ifq
!= NULL
; ifq
= ifqnext
) {
347 ifqnext
= ifq
->ifq_next
;
348 if (((ifq
->ifq_flags
& IFQF_PROXY
) == 0) &&
349 (fr_deletetimeoutqueue(ifq
) == 0))
350 fr_freetimeoutqueue(ifq
);
353 ips_stats
.iss_inuse
= 0;
356 if (fr_state_init
== 1) {
357 fr_sttab_destroy(ips_tqtqb
);
358 MUTEX_DESTROY(&ips_udptq
.ifq_lock
);
359 MUTEX_DESTROY(&ips_icmptq
.ifq_lock
);
360 MUTEX_DESTROY(&ips_udpacktq
.ifq_lock
);
361 MUTEX_DESTROY(&ips_icmpacktq
.ifq_lock
);
362 MUTEX_DESTROY(&ips_iptq
.ifq_lock
);
363 MUTEX_DESTROY(&ips_deletetq
.ifq_lock
);
366 if (ips_table
!= NULL
) {
367 KFREES(ips_table
, fr_statesize
* sizeof(*ips_table
));
371 if (ips_seed
!= NULL
) {
372 KFREES(ips_seed
, fr_statesize
* sizeof(*ips_seed
));
376 if (ips_stats
.iss_bucketlen
!= NULL
) {
377 KFREES(ips_stats
.iss_bucketlen
, fr_statesize
* sizeof(u_long
));
378 ips_stats
.iss_bucketlen
= NULL
;
381 if (fr_state_maxbucket_reset
== 1)
382 fr_state_maxbucket
= 0;
384 if (fr_state_init
== 1) {
386 RW_DESTROY(&ipf_state
);
387 MUTEX_DESTROY(&ipf_stinsert
);
392 /* ------------------------------------------------------------------------ */
393 /* Function: fr_statetstats */
394 /* Returns: ips_state_t* - pointer to state stats structure */
395 /* Parameters: Nil */
397 /* Put all the current numbers and pointers into a single struct and return */
398 /* a pointer to it. */
399 /* ------------------------------------------------------------------------ */
400 static ips_stat_t
*fr_statetstats()
402 ips_stats
.iss_active
= ips_num
;
403 ips_stats
.iss_statesize
= fr_statesize
;
404 ips_stats
.iss_statemax
= fr_statemax
;
405 ips_stats
.iss_table
= ips_table
;
406 ips_stats
.iss_list
= ips_list
;
407 ips_stats
.iss_ticks
= fr_ticks
;
411 /* ------------------------------------------------------------------------ */
412 /* Function: fr_state_remove */
413 /* Returns: int - 0 == success, != 0 == failure */
414 /* Parameters: data(I) - pointer to state structure to delete from table */
416 /* Search for a state structure that matches the one passed, according to */
417 /* the IP addresses and other protocol specific information. */
418 /* ------------------------------------------------------------------------ */
419 static int fr_state_remove(data
)
426 error
= fr_inobj(data
, &st
, IPFOBJ_IPSTATE
);
430 WRITE_ENTER(&ipf_state
);
431 for (sp
= ips_list
; sp
; sp
= sp
->is_next
)
432 if ((sp
->is_p
== st
.is_p
) && (sp
->is_v
== st
.is_v
) &&
433 !bcmp((void *)&sp
->is_src
, (void *)&st
.is_src
,
434 sizeof(st
.is_src
)) &&
435 !bcmp((void *)&sp
->is_dst
, (void *)&st
.is_src
,
436 sizeof(st
.is_dst
)) &&
437 !bcmp((void *)&sp
->is_ps
, (void *)&st
.is_ps
,
439 fr_delstate(sp
, ISL_REMOVE
);
440 RWLOCK_EXIT(&ipf_state
);
443 RWLOCK_EXIT(&ipf_state
);
448 /* ------------------------------------------------------------------------ */
449 /* Function: fr_state_ioctl */
450 /* Returns: int - 0 == success, != 0 == failure */
451 /* Parameters: data(I) - pointer to ioctl data */
452 /* cmd(I) - ioctl command integer */
453 /* mode(I) - file mode bits used with open */
455 /* Processes an ioctl call made to operate on the IP Filter state device. */
456 /* ------------------------------------------------------------------------ */
457 int fr_state_ioctl(data
, cmd
, mode
, uid
, ctx
)
463 int arg
, ret
, error
= 0;
469 * Delete an entry from the state table.
472 error
= fr_state_remove(data
);
476 * Flush the state table
479 error
= BCOPYIN(data
, (char *)&arg
, sizeof(arg
));
483 WRITE_ENTER(&ipf_state
);
484 ret
= fr_state_flush(arg
, 4);
485 RWLOCK_EXIT(&ipf_state
);
486 error
= BCOPYOUT((char *)&ret
, data
, sizeof(ret
));
494 error
= BCOPYIN(data
, (char *)&arg
, sizeof(arg
));
498 WRITE_ENTER(&ipf_state
);
499 ret
= fr_state_flush(arg
, 6);
500 RWLOCK_EXIT(&ipf_state
);
501 error
= BCOPYOUT((char *)&ret
, data
, sizeof(ret
));
509 * Flush the state log.
512 if (!(mode
& FWRITE
))
517 tmp
= ipflog_clear(IPL_LOGSTATE
);
518 error
= BCOPYOUT((char *)&tmp
, data
, sizeof(tmp
));
525 * Turn logging of state information on/off.
528 if (!(mode
& FWRITE
))
531 error
= BCOPYIN((char *)data
, (char *)&ipstate_logging
,
532 sizeof(ipstate_logging
));
539 * Return the current state of logging.
542 error
= BCOPYOUT((char *)&ipstate_logging
, (char *)data
,
543 sizeof(ipstate_logging
));
549 * Return the number of bytes currently waiting to be read.
552 arg
= iplused
[IPL_LOGSTATE
]; /* returned in an int */
553 error
= BCOPYOUT((char *)&arg
, data
, sizeof(arg
));
560 * Get the current state statistics.
563 error
= fr_outobj(data
, fr_statetstats(), IPFOBJ_STATESTAT
);
567 * Lock/Unlock the state table. (Locking prevents any changes, which
568 * means no packets match).
571 if (!(mode
& FWRITE
)) {
574 error
= fr_lock(data
, &fr_state_lock
);
579 * Add an entry to the current state table.
582 if (!fr_state_lock
|| !(mode
&FWRITE
)) {
586 error
= fr_stputent(data
);
590 * Get a state table entry.
593 if (!fr_state_lock
) {
597 error
= fr_stgetent(data
);
601 * Return a copy of the hash table bucket lengths
604 error
= BCOPYOUT(ips_stats
.iss_bucketlen
, data
,
605 fr_statesize
* sizeof(u_long
));
615 error
= fr_inobj(data
, &iter
, IPFOBJ_GENITER
);
620 token
= ipf_findtoken(IPFGENITER_STATE
, uid
, ctx
);
622 error
= fr_stateiter(token
, &iter
);
623 WRITE_ENTER(&ipf_tokens
);
624 if (token
->ipt_data
== NULL
)
625 ipf_freetoken(token
);
627 ipf_dereftoken(token
);
628 RWLOCK_EXIT(&ipf_tokens
);
637 error
= fr_stgettable(data
);
641 error
= BCOPYIN(data
, (char *)&arg
, sizeof(arg
));
646 error
= ipf_deltoken(arg
, uid
, ctx
);
652 error
= fr_outobj(data
, ips_tqtqb
, IPFOBJ_STATETQTAB
);
663 /* ------------------------------------------------------------------------ */
664 /* Function: fr_stgetent */
665 /* Returns: int - 0 == success, != 0 == failure */
666 /* Parameters: data(I) - pointer to state structure to retrieve from table */
668 /* Copy out state information from the kernel to a user space process. If */
669 /* there is a filter rule associated with the state entry, copy that out */
670 /* as well. The entry to copy out is taken from the value of "ips_next" in */
671 /* the struct passed in and if not null and not found in the list of current*/
672 /* state entries, the retrieval fails. */
673 /* ------------------------------------------------------------------------ */
674 int fr_stgetent(data
)
681 error
= fr_inobj(data
, &ips
, IPFOBJ_STATESAVE
);
685 READ_ENTER(&ipf_state
);
690 RWLOCK_EXIT(&ipf_state
);
691 if (ips
.ips_next
== NULL
)
697 * Make sure the pointer we're copying from exists in the
698 * current list of entries. Security precaution to prevent
699 * copying of random kernel data.
701 for (is
= ips_list
; is
; is
= is
->is_next
)
705 RWLOCK_EXIT(&ipf_state
);
709 ips
.ips_next
= isn
->is_next
;
710 bcopy((char *)isn
, (char *)&ips
.ips_is
, sizeof(ips
.ips_is
));
711 ips
.ips_rule
= isn
->is_rule
;
712 if (isn
->is_rule
!= NULL
)
713 bcopy((char *)isn
->is_rule
, (char *)&ips
.ips_fr
,
715 RWLOCK_EXIT(&ipf_state
);
716 error
= fr_outobj(data
, &ips
, IPFOBJ_STATESAVE
);
721 /* ------------------------------------------------------------------------ */
722 /* Function: fr_stputent */
723 /* Returns: int - 0 == success, != 0 == failure */
724 /* Parameters: data(I) - pointer to state information struct */
726 /* This function implements the SIOCSTPUT ioctl: insert a state entry into */
727 /* the state table. If the state info. includes a pointer to a filter rule */
728 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */
730 /* ------------------------------------------------------------------------ */
731 int fr_stputent(data
)
740 error
= fr_inobj(data
, &ips
, IPFOBJ_STATESAVE
);
744 KMALLOC(isn
, ipstate_t
*);
748 bcopy((char *)&ips
.ips_is
, (char *)isn
, sizeof(*isn
));
749 bzero((char *)isn
, offsetof(struct ipstate
, is_pkts
));
750 isn
->is_sti
.tqe_pnext
= NULL
;
751 isn
->is_sti
.tqe_next
= NULL
;
752 isn
->is_sti
.tqe_ifq
= NULL
;
753 isn
->is_sti
.tqe_parent
= isn
;
754 isn
->is_ifp
[0] = NULL
;
755 isn
->is_ifp
[1] = NULL
;
756 isn
->is_ifp
[2] = NULL
;
757 isn
->is_ifp
[3] = NULL
;
762 READ_ENTER(&ipf_state
);
764 MUTEX_EXIT(&isn
->is_lock
);
765 RWLOCK_EXIT(&ipf_state
);
769 if (isn
->is_flags
& SI_NEWFR
) {
770 KMALLOC(fr
, frentry_t
*);
775 bcopy((char *)&ips
.ips_fr
, (char *)fr
, sizeof(*fr
));
776 out
= fr
->fr_flags
& FR_OUTQUE
? 1 : 0;
778 ips
.ips_is
.is_rule
= fr
;
779 MUTEX_NUKE(&fr
->fr_lock
);
780 MUTEX_INIT(&fr
->fr_lock
, "state filter rule lock");
783 * Look up all the interface names in the rule.
785 for (i
= 0; i
< 4; i
++) {
786 name
= fr
->fr_ifnames
[i
];
787 fr
->fr_ifas
[i
] = fr_resolvenic(name
, fr
->fr_v
);
788 name
= isn
->is_ifname
[i
];
789 isn
->is_ifp
[i
] = fr_resolvenic(name
, isn
->is_v
);
795 fr
->fr_type
= FR_T_NONE
;
797 fr_resolvedest(&fr
->fr_tifs
[0], fr
->fr_v
);
798 fr_resolvedest(&fr
->fr_tifs
[1], fr
->fr_v
);
799 fr_resolvedest(&fr
->fr_dif
, fr
->fr_v
);
802 * send a copy back to userland of what we ended up
803 * to allow for verification.
805 error
= fr_outobj(data
, &ips
, IPFOBJ_STATESAVE
);
808 MUTEX_DESTROY(&fr
->fr_lock
);
812 READ_ENTER(&ipf_state
);
814 MUTEX_EXIT(&isn
->is_lock
);
815 RWLOCK_EXIT(&ipf_state
);
818 READ_ENTER(&ipf_state
);
819 for (is
= ips_list
; is
; is
= is
->is_next
)
820 if (is
->is_rule
== fr
) {
822 MUTEX_EXIT(&isn
->is_lock
);
830 RWLOCK_EXIT(&ipf_state
);
832 return (isn
== NULL
) ? ESRCH
: 0;
839 /* ------------------------------------------------------------------------ */
840 /* Function: fr_stinsert */
842 /* Parameters: is(I) - pointer to state structure */
843 /* rev(I) - flag indicating forward/reverse direction of packet */
845 /* Inserts a state structure into the hash table (for lookups) and the list */
846 /* of state entries (for enumeration). Resolves all of the interface names */
847 /* to pointers and adjusts running stats for the hash table as appropriate. */
849 /* Locking: it is assumed that some kind of lock on ipf_state is held. */
850 /* Exits with is_lock initialised and held. */
851 /* ------------------------------------------------------------------------ */
852 void fr_stinsert(is
, rev
)
860 MUTEX_INIT(&is
->is_lock
, "ipf state entry");
864 MUTEX_ENTER(&fr
->fr_lock
);
867 MUTEX_EXIT(&fr
->fr_lock
);
871 * Look up all the interface names in the state entry.
873 for (i
= 0; i
< 4; i
++) {
874 if (is
->is_ifp
[i
] != NULL
)
876 is
->is_ifp
[i
] = fr_resolvenic(is
->is_ifname
[i
], is
->is_v
);
880 * If we could trust is_hv, then the modulous would not be needed, but
881 * when running with IPFILTER_SYNC, this stops bad values.
883 hv
= is
->is_hv
% fr_statesize
;
887 * We need to get both of these locks...the first because it is
888 * possible that once the insert is complete another packet might
889 * come along, match the entry and want to update it.
891 MUTEX_ENTER(&is
->is_lock
);
892 MUTEX_ENTER(&ipf_stinsert
);
895 * add into list table.
897 if (ips_list
!= NULL
)
898 ips_list
->is_pnext
= &is
->is_next
;
899 is
->is_pnext
= &ips_list
;
900 is
->is_next
= ips_list
;
903 if (ips_table
[hv
] != NULL
)
904 ips_table
[hv
]->is_phnext
= &is
->is_hnext
;
906 ips_stats
.iss_inuse
++;
907 is
->is_phnext
= ips_table
+ hv
;
908 is
->is_hnext
= ips_table
[hv
];
910 ips_stats
.iss_bucketlen
[hv
]++;
912 MUTEX_EXIT(&ipf_stinsert
);
914 fr_setstatequeue(is
, rev
);
918 /* ------------------------------------------------------------------------ */
919 /* Function: ipf_state_matchipv4addrs */
920 /* Returns: int - 2 addresses match (strong match), 1 reverse match, */
922 /* Parameters: is1, is2 pointers to states we are checking */
924 /* Function matches IPv4 addresses it returns strong match for ICMP proto */
925 /* even there is only reverse match */
926 /* ------------------------------------------------------------------------ */
928 ipf_state_matchipv4addrs(is1
, is2
)
929 ipstate_t
*is1
, *is2
;
933 if (is1
->is_saddr
== is2
->is_saddr
&& is1
->is_daddr
== is2
->is_daddr
)
935 else if (is1
->is_saddr
== is2
->is_daddr
&&
936 is1
->is_daddr
== is2
->is_saddr
) {
937 /* force strong match for ICMP protocol */
938 rv
= (is1
->is_p
== IPPROTO_ICMP
) ? 2 : 1;
947 /* ------------------------------------------------------------------------ */
948 /* Function: ipf_state_matchipv6addrs */
949 /* Returns: int - 2 addresses match (strong match), 1 reverse match, */
951 /* Parameters: is1, is2 pointers to states we are checking */
953 /* Function matches IPv6 addresses it returns strong match for ICMP proto */
954 /* even there is only reverse match */
955 /* ------------------------------------------------------------------------ */
957 ipf_state_matchipv6addrs(is1
, is2
)
958 ipstate_t
*is1
, *is2
;
962 if (IP6_EQ(&is1
->is_src
, &is2
->is_src
) &&
963 IP6_EQ(&is1
->is_dst
, &is2
->is_dst
))
965 else if (IP6_EQ(&is1
->is_src
, &is2
->is_dst
) &&
966 IP6_EQ(&is1
->is_dst
, &is2
->is_src
)) {
967 /* force strong match for ICMPv6 protocol */
968 rv
= (is1
->is_p
== IPPROTO_ICMPV6
) ? 2 : 1;
977 /* ------------------------------------------------------------------------ */
978 /* Function: ipf_state_matchaddresses */
979 /* Returns: int - 2 addresses match, 1 reverse match, zero no match */
980 /* Parameters: is1, is2 pointers to states we are checking */
982 /* function retruns true if two pairs of addresses belong to single */
983 /* connection. suppose there are two endpoints: */
984 /* endpoint1 1.1.1.1 */
985 /* endpoint2 1.1.1.2 */
987 /* the state is established by packet flying from .1 to .2 so we see: */
988 /* is1->src = 1.1.1.1 */
989 /* is1->dst = 1.1.1.2 */
990 /* now endpoint 1.1.1.2 sends answer */
991 /* retreives is1 record created by first packat and compares it with is2 */
992 /* temporal record, is2 is initialized as follows: */
993 /* is2->src = 1.1.1.2 */
994 /* is2->dst = 1.1.1.1 */
995 /* in this case 1 will be returned */
997 /* the ipf_matchaddresses() assumes those two records to be same. of course */
998 /* the ipf_matchaddresses() also assume records are same in case you pass */
999 /* identical arguments (i.e. ipf_matchaddress(is1, is1) would return 2 */
1000 /* ------------------------------------------------------------------------ */
1002 ipf_state_matchaddresses(is1
, is2
)
1003 ipstate_t
*is1
, *is2
;
1007 if (is1
->is_v
== 4) {
1008 rv
= ipf_state_matchipv4addrs(is1
, is2
);
1011 rv
= ipf_state_matchipv6addrs(is1
, is2
);
1018 /* ------------------------------------------------------------------------ */
1019 /* Function: ipf_matchports */
1020 /* Returns: int - 2 match, 1 rverse match, 0 no match */
1021 /* Parameters: ppairs1, ppairs - src, dst ports we want to match */
1023 /* performs the same match for isps members as for addresses */
1024 /* ------------------------------------------------------------------------ */
1026 ipf_state_matchports(ppairs1
, ppairs2
)
1027 udpinfo_t
*ppairs1
, *ppairs2
;
1031 if (ppairs1
->us_sport
== ppairs2
->us_sport
&&
1032 ppairs1
->us_dport
== ppairs2
->us_dport
)
1034 else if (ppairs1
->us_sport
== ppairs2
->us_dport
&&
1035 ppairs1
->us_dport
== ppairs2
->us_sport
)
1044 /* ------------------------------------------------------------------------ */
1045 /* Function: ipf_matchisps */
1046 /* Returns: int - nonzero if isps members match, 0 nomatch */
1047 /* Parameters: is1, is2 - states we want to match */
1049 /* performs the same match for isps members as for addresses */
1050 /* ------------------------------------------------------------------------ */
1052 ipf_state_matchisps(is1
, is2
)
1053 ipstate_t
*is1
, *is2
;
1057 if (is1
->is_p
== is2
->is_p
) {
1062 /* greinfo_t can be also interprted as port pair */
1063 rv
= ipf_state_matchports(&is1
->is_ps
.is_us
,
1068 case IPPROTO_ICMPV6
:
1069 /* force strong match for ICMP datagram. */
1070 if (bcmp(&is1
->is_ps
, &is2
->is_ps
,
1071 sizeof(icmpinfo_t
)) == 0) {
1089 /* ------------------------------------------------------------------------ */
1090 /* Function: ipf_state_match */
1091 /* Returns: int - nonzero match, zero no match */
1092 /* Parameters: is1, is2 - states we want to match */
1094 /* ------------------------------------------------------------------------ */
1095 static int ipf_state_match(is1
, is2
)
1096 ipstate_t
*is1
, *is2
;
1102 if (bcmp(&is1
->is_pass
, &is2
->is_pass
,
1103 offsetof(struct ipstate
, is_authmsk
) -
1104 offsetof(struct ipstate
, is_pass
)) == 0) {
1106 portmatch
= ipf_state_matchisps(is1
, is2
);
1107 addrmatch
= ipf_state_matchaddresses(is1
, is2
);
1108 rv
= (addrmatch
!= 0) && (addrmatch
== portmatch
);
1117 /* ------------------------------------------------------------------------ */
1118 /* Function: fr_addstate */
1119 /* Returns: ipstate_t* - NULL == failure, else pointer to new state */
1120 /* Parameters: fin(I) - pointer to packet information */
1121 /* stsave(O) - pointer to place to save pointer to created */
1122 /* state structure. */
1123 /* flags(I) - flags to use when creating the structure */
1125 /* Creates a new IP state structure from the packet information collected. */
1126 /* Inserts it into the state table and appends to the bottom of the active */
1127 /* list. If the capacity of the table has reached the maximum allowed then */
1128 /* the call will fail and a flush is scheduled for the next timeout call. */
1130 /* NOTE: The use of stsave to point to nat_state will result in memory */
1131 /* corruption. It should only be used to point to objects that will */
1132 /* either outlive this (not expired) or will deref the ip_state_t */
1133 /* when they are deleted. */
1134 /* ------------------------------------------------------------------------ */
1135 ipstate_t
*fr_addstate(fin
, stsave
, flags
)
1149 * If a packet that was created locally is trying to go out but we
1150 * do not match here here because of this lock, it is likely that
1151 * the policy will block it and return network unreachable back up
1152 * the stack. To mitigate this error, EAGAIN is returned instead,
1153 * telling the IP stack to try sending this packet again later.
1155 if (fr_state_lock
) {
1156 fin
->fin_error
= EAGAIN
;
1160 if (fin
->fin_flx
& (FI_SHORT
|FI_STATE
|FI_FRAGBODY
|FI_BAD
))
1163 if ((fin
->fin_flx
& FI_OOW
) && !(fin
->fin_tcpf
& TH_SYN
))
1167 * If a "keep state" rule has reached the maximum number of references
1168 * to it, then schedule an automatic flush in case we can clear out
1169 * some "dead old wood". Note that because the lock isn't held on
1170 * fr it is possible that we could overflow. The cost of overflowing
1171 * is being ignored here as the number by which it can overflow is
1172 * a product of the number of simultaneous threads that could be
1173 * executing in here, so a limit of 100 won't result in 200, but could
1174 * result in 101 or 102.
1178 if ((ips_num
>= fr_statemax
) && (fr
->fr_statemax
== 0)) {
1179 ATOMIC_INCL(ips_stats
.iss_max
);
1180 fr_state_doflush
= 1;
1183 if ((fr
->fr_statemax
!= 0) &&
1184 (fr
->fr_statecnt
>= fr
->fr_statemax
)) {
1185 ATOMIC_INCL(ips_stats
.iss_maxref
);
1190 pass
= (fr
== NULL
) ? 0 : fr
->fr_flags
;
1196 bzero((char *)is
, sizeof(*is
));
1197 is
->is_die
= 1 + fr_ticks
;
1200 * Copy and calculate...
1202 hv
= (is
->is_p
= fin
->fin_fi
.fi_p
);
1203 is
->is_src
= fin
->fin_fi
.fi_src
;
1205 is
->is_dst
= fin
->fin_fi
.fi_dst
;
1208 if (fin
->fin_v
== 6) {
1210 * For ICMPv6, we check to see if the destination address is
1211 * a multicast address. If it is, do not include it in the
1212 * calculation of the hash because the correct reply will come
1213 * back from a real address, not a multicast address.
1215 if ((is
->is_p
== IPPROTO_ICMPV6
) &&
1216 IN6_IS_ADDR_MULTICAST(&is
->is_dst
.in6
)) {
1218 * So you can do keep state with neighbour discovery.
1220 * Here we could use the address from the neighbour
1221 * solicit message to put in the state structure and
1222 * we could use that without a wildcard flag too...
1224 flags
|= SI_W_DADDR
;
1227 hv
+= is
->is_dst
.i6
[1];
1228 hv
+= is
->is_dst
.i6
[2];
1229 hv
+= is
->is_dst
.i6
[3];
1231 hv
+= is
->is_src
.i6
[1];
1232 hv
+= is
->is_src
.i6
[2];
1233 hv
+= is
->is_src
.i6
[3];
1236 if ((fin
->fin_v
== 4) &&
1237 (fin
->fin_flx
& (FI_MULTICAST
|FI_BROADCAST
|FI_MBCAST
))) {
1238 if (fin
->fin_out
== 0) {
1239 flags
|= SI_W_DADDR
|SI_CLONE
;
1242 flags
|= SI_W_SADDR
|SI_CLONE
;
1250 case IPPROTO_ICMPV6
:
1253 switch (ic
->icmp_type
)
1255 case ICMP6_ECHO_REQUEST
:
1256 is
->is_icmp
.ici_type
= ic
->icmp_type
;
1257 hv
+= (is
->is_icmp
.ici_id
= ic
->icmp_id
);
1259 case ICMP6_MEMBERSHIP_QUERY
:
1260 case ND_ROUTER_SOLICIT
:
1261 case ND_NEIGHBOR_SOLICIT
:
1262 case ICMP6_NI_QUERY
:
1263 is
->is_icmp
.ici_type
= ic
->icmp_type
;
1268 ATOMIC_INCL(ips_stats
.iss_icmp
);
1274 switch (ic
->icmp_type
)
1280 is
->is_icmp
.ici_type
= ic
->icmp_type
;
1281 hv
+= (is
->is_icmp
.ici_id
= ic
->icmp_id
);
1286 ATOMIC_INCL(ips_stats
.iss_icmp
);
1292 is
->is_gre
.gs_flags
= gre
->gr_flags
;
1293 is
->is_gre
.gs_ptype
= gre
->gr_ptype
;
1294 if (GRE_REV(is
->is_gre
.gs_flags
) == 1) {
1295 is
->is_call
[0] = fin
->fin_data
[0];
1296 is
->is_call
[1] = fin
->fin_data
[1];
1303 if (tcp
->th_flags
& TH_RST
)
1306 * The endian of the ports doesn't matter, but the ack and
1307 * sequence numbers do as we do mathematics on them later.
1309 is
->is_sport
= htons(fin
->fin_data
[0]);
1310 is
->is_dport
= htons(fin
->fin_data
[1]);
1311 if ((flags
& (SI_W_DPORT
|SI_W_SPORT
)) == 0) {
1317 * If this is a real packet then initialise fields in the
1318 * state information structure from the TCP header information.
1322 is
->is_maxswin
= ntohs(tcp
->th_win
);
1323 if (is
->is_maxswin
== 0)
1326 if ((fin
->fin_flx
& FI_IGNORE
) == 0) {
1327 is
->is_send
= ntohl(tcp
->th_seq
) + fin
->fin_dlen
-
1328 (TCP_OFF(tcp
) << 2) +
1329 ((tcp
->th_flags
& TH_SYN
) ? 1 : 0) +
1330 ((tcp
->th_flags
& TH_FIN
) ? 1 : 0);
1331 is
->is_maxsend
= is
->is_send
;
1334 * Window scale option is only present in
1335 * SYN/SYN-ACK packet.
1337 if ((tcp
->th_flags
& ~(TH_FIN
|TH_ACK
|TH_ECNALL
)) ==
1339 (TCP_OFF(tcp
) > (sizeof(tcphdr_t
) >> 2))) {
1340 if (fr_tcpoptions(fin
, tcp
,
1341 &is
->is_tcp
.ts_data
[0]) == -1) {
1342 fin
->fin_flx
|= FI_BAD
;
1346 if ((fin
->fin_out
!= 0) && (pass
& FR_NEWISN
) != 0) {
1347 fr_checknewisn(fin
, is
);
1348 fr_fixoutisn(fin
, is
);
1351 if ((tcp
->th_flags
& TH_OPENING
) == TH_SYN
)
1354 is
->is_maxdwin
= is
->is_maxswin
* 2;
1355 is
->is_dend
= ntohl(tcp
->th_ack
);
1356 is
->is_maxdend
= ntohl(tcp
->th_ack
);
1357 is
->is_maxdwin
*= 2;
1362 * If we're creating state for a starting connection, start the
1363 * timer on it as we'll never see an error if it fails to
1366 ATOMIC_INCL(ips_stats
.iss_tcp
);
1372 is
->is_sport
= htons(fin
->fin_data
[0]);
1373 is
->is_dport
= htons(fin
->fin_data
[1]);
1374 if ((flags
& (SI_W_DPORT
|SI_W_SPORT
)) == 0) {
1375 hv
+= tcp
->th_dport
;
1376 hv
+= tcp
->th_sport
;
1378 ATOMIC_INCL(ips_stats
.iss_udp
);
1384 hv
= DOUBLE_HASH(hv
);
1387 is
->is_flags
= flags
& IS_INHERITED
;
1390 * Look for identical state.
1392 for (is
= ips_table
[is
->is_hv
% fr_statesize
]; is
!= NULL
;
1393 is
= is
->is_hnext
) {
1394 if (ipf_state_match(&ips
, is
) == 1) {
1401 if (ips_stats
.iss_bucketlen
[hv
] >= fr_state_maxbucket
) {
1402 ATOMIC_INCL(ips_stats
.iss_bucketfull
);
1405 KMALLOC(is
, ipstate_t
*);
1407 ATOMIC_INCL(ips_stats
.iss_nomem
);
1410 bcopy((char *)&ips
, (char *)is
, sizeof(*is
));
1412 * Do not do the modulous here, it is done in fr_stinsert().
1415 (void) strncpy(is
->is_group
, fr
->fr_group
, FR_GROUPLEN
);
1416 if (fr
->fr_age
[0] != 0) {
1417 is
->is_tqehead
[0] = fr_addtimeoutqueue(&ips_utqe
,
1419 is
->is_sti
.tqe_flags
|= TQE_RULEBASED
;
1421 if (fr
->fr_age
[1] != 0) {
1422 is
->is_tqehead
[1] = fr_addtimeoutqueue(&ips_utqe
,
1424 is
->is_sti
.tqe_flags
|= TQE_RULEBASED
;
1427 is
->is_tag
= fr
->fr_logtag
;
1430 * The name '-' is special for network interfaces and causes
1431 * a NULL name to be present, always, allowing packets to
1432 * match it, regardless of their interface.
1434 if ((fin
->fin_ifp
== NULL
) ||
1435 (fr
->fr_ifnames
[out
<< 1][0] == '-' &&
1436 fr
->fr_ifnames
[out
<< 1][1] == '\0')) {
1437 is
->is_ifp
[out
<< 1] = fr
->fr_ifas
[0];
1438 strncpy(is
->is_ifname
[out
<< 1], fr
->fr_ifnames
[0],
1439 sizeof(fr
->fr_ifnames
[0]));
1441 is
->is_ifp
[out
<< 1] = fin
->fin_ifp
;
1442 COPYIFNAME(is
->is_v
, fin
->fin_ifp
,
1443 is
->is_ifname
[out
<< 1]);
1446 is
->is_ifp
[(out
<< 1) + 1] = fr
->fr_ifas
[1];
1447 strncpy(is
->is_ifname
[(out
<< 1) + 1], fr
->fr_ifnames
[1],
1448 sizeof(fr
->fr_ifnames
[1]));
1450 is
->is_ifp
[(1 - out
) << 1] = fr
->fr_ifas
[2];
1451 strncpy(is
->is_ifname
[((1 - out
) << 1)], fr
->fr_ifnames
[2],
1452 sizeof(fr
->fr_ifnames
[2]));
1454 is
->is_ifp
[((1 - out
) << 1) + 1] = fr
->fr_ifas
[3];
1455 strncpy(is
->is_ifname
[((1 - out
) << 1) + 1], fr
->fr_ifnames
[3],
1456 sizeof(fr
->fr_ifnames
[3]));
1459 is
->is_tag
= FR_NOLOGTAG
;
1461 if (fin
->fin_ifp
!= NULL
) {
1462 is
->is_ifp
[out
<< 1] = fin
->fin_ifp
;
1463 COPYIFNAME(is
->is_v
, fin
->fin_ifp
,
1464 is
->is_ifname
[out
<< 1]);
1470 is
->is_pkts
[0] = 0, is
->is_bytes
[0] = 0;
1471 is
->is_pkts
[1] = 0, is
->is_bytes
[1] = 0;
1472 is
->is_pkts
[2] = 0, is
->is_bytes
[2] = 0;
1473 is
->is_pkts
[3] = 0, is
->is_bytes
[3] = 0;
1474 if ((fin
->fin_flx
& FI_IGNORE
) == 0) {
1475 is
->is_pkts
[out
] = 1;
1476 is
->is_bytes
[out
] = fin
->fin_plen
;
1477 is
->is_flx
[out
][0] = fin
->fin_flx
& FI_CMP
;
1478 is
->is_flx
[out
][0] &= ~FI_OOW
;
1481 if (pass
& FR_STSTRICT
)
1482 is
->is_flags
|= IS_STRICT
;
1484 if (pass
& FR_STATESYNC
)
1485 is
->is_flags
|= IS_STATESYNC
;
1488 * We want to check everything that is a property of this packet,
1489 * but we don't (automatically) care about it's fragment status as
1492 is
->is_v
= fin
->fin_v
;
1493 is
->is_opt
[0] = fin
->fin_optmsk
;
1494 is
->is_optmsk
[0] = 0xffffffff;
1495 is
->is_optmsk
[1] = 0xffffffff;
1496 if (is
->is_v
== 6) {
1497 is
->is_opt
[0] &= ~0x8;
1498 is
->is_optmsk
[0] &= ~0x8;
1499 is
->is_optmsk
[1] &= ~0x8;
1502 is
->is_sec
= fin
->fin_secmsk
;
1503 is
->is_secmsk
= 0xffff;
1504 is
->is_auth
= fin
->fin_auth
;
1505 is
->is_authmsk
= 0xffff;
1506 if (flags
& (SI_WILDP
|SI_WILDA
)) {
1507 ATOMIC_INCL(ips_stats
.iss_wild
);
1509 is
->is_rulen
= fin
->fin_rule
;
1512 if (pass
& FR_LOGFIRST
)
1513 is
->is_pass
&= ~(FR_LOGFIRST
|FR_LOG
);
1515 READ_ENTER(&ipf_state
);
1517 fr_stinsert(is
, fin
->fin_rev
);
1519 if (fin
->fin_p
== IPPROTO_TCP
) {
1521 * If we're creating state for a starting connection, start the
1522 * timer on it as we'll never see an error if it fails to
1525 (void) fr_tcp_age(&is
->is_sti
, fin
, ips_tqtqb
, is
->is_flags
);
1526 MUTEX_EXIT(&is
->is_lock
);
1527 #ifdef IPFILTER_SCAN
1528 if ((is
->is_flags
& SI_CLONE
) == 0)
1529 (void) ipsc_attachis(is
);
1532 MUTEX_EXIT(&is
->is_lock
);
1534 #ifdef IPFILTER_SYNC
1535 if ((is
->is_flags
& IS_STATESYNC
) && ((is
->is_flags
& SI_CLONE
) == 0))
1536 is
->is_sync
= ipfsync_new(SMC_STATE
, fin
, is
);
1538 if (ipstate_logging
)
1539 ipstate_log(is
, ISL_NEW
);
1541 RWLOCK_EXIT(&ipf_state
);
1542 fin
->fin_rev
= IP6_NEQ(&is
->is_dst
, &fin
->fin_daddr
);
1543 fin
->fin_flx
|= FI_STATE
;
1544 if (fin
->fin_flx
& FI_FRAG
)
1545 (void) fr_newfrag(fin
, pass
^ FR_KEEPSTATE
);
1551 /* ------------------------------------------------------------------------ */
1552 /* Function: fr_tcpoptions */
1553 /* Returns: int - 1 == packet matches state entry, 0 == it does not, */
1554 /* -1 == packet has bad TCP options data */
1555 /* Parameters: fin(I) - pointer to packet information */
1556 /* tcp(I) - pointer to TCP packet header */
1557 /* td(I) - pointer to TCP data held as part of the state */
1559 /* Look after the TCP header for any options and deal with those that are */
1560 /* present. Record details about those that we recogise. */
1561 /* ------------------------------------------------------------------------ */
1562 static int fr_tcpoptions(fin
, tcp
, td
)
1567 int off
, mlen
, ol
, i
, len
, retval
;
1568 char buf
[64], *s
, opt
;
1571 len
= (TCP_OFF(tcp
) << 2);
1572 if (fin
->fin_dlen
< len
)
1574 len
-= sizeof(*tcp
);
1576 off
= fin
->fin_plen
- fin
->fin_dlen
+ sizeof(*tcp
) + fin
->fin_ipoff
;
1579 mlen
= MSGDSIZE(m
) - off
;
1587 COPYDATA(m
, off
, len
, buf
);
1589 for (s
= buf
; len
> 0; ) {
1591 if (opt
== TCPOPT_EOL
)
1593 else if (opt
== TCPOPT_NOP
)
1599 if (ol
< 2 || ol
> len
)
1603 * Extract the TCP options we are interested in out of
1604 * the header and store them in the tcpdata struct.
1608 case TCPOPT_WINDOW
:
1609 if (ol
== TCPOLEN_WINDOW
) {
1611 if (i
> TCP_WSCALE_MAX
)
1615 td
->td_winscale
= i
;
1616 td
->td_winflags
|= TCP_WSCALE_SEEN
|
1621 case TCPOPT_MAXSEG
:
1623 * So, if we wanted to set the TCP MAXSEG,
1624 * it should be done here...
1626 if (ol
== TCPOLEN_MAXSEG
) {
1634 case TCPOPT_SACK_PERMITTED
:
1635 if (ol
== TCPOLEN_SACK_PERMITTED
)
1636 td
->td_winflags
|= TCP_SACK_PERMIT
;
1649 /* ------------------------------------------------------------------------ */
1650 /* Function: fr_tcpstate */
1651 /* Returns: int - 1 == packet matches state entry, 0 == it does not */
1652 /* Parameters: fin(I) - pointer to packet information */
1653 /* tcp(I) - pointer to TCP packet header */
1654 /* is(I) - pointer to master state structure */
1656 /* Check to see if a packet with TCP headers fits within the TCP window. */
1657 /* Change timeout depending on whether new packet is a SYN-ACK returning */
1658 /* for a SYN or a RST or FIN which indicate time to close up shop. */
1659 /* ------------------------------------------------------------------------ */
1660 static int fr_tcpstate(fin
, tcp
, is
)
1665 int source
, ret
= 0, flags
;
1666 tcpdata_t
*fdata
, *tdata
;
1668 source
= !fin
->fin_rev
;
1669 if (((is
->is_flags
& IS_TCPFSM
) != 0) && (source
== 1) &&
1670 (ntohs(is
->is_sport
) != fin
->fin_data
[0]))
1672 fdata
= &is
->is_tcp
.ts_data
[!source
];
1673 tdata
= &is
->is_tcp
.ts_data
[source
];
1675 MUTEX_ENTER(&is
->is_lock
);
1678 * If a SYN packet is received for a connection that is on the way out
1679 * but hasn't yet departed then advance this session along the way.
1681 if ((tcp
->th_flags
& TH_OPENING
) == TH_SYN
) {
1682 if ((is
->is_state
[0] > IPF_TCPS_ESTABLISHED
) &&
1683 (is
->is_state
[1] > IPF_TCPS_ESTABLISHED
)) {
1684 is
->is_state
[!source
] = IPF_TCPS_CLOSED
;
1685 fr_movequeue(&is
->is_sti
, is
->is_sti
.tqe_ifq
,
1687 MUTEX_EXIT(&is
->is_lock
);
1692 ret
= fr_tcpinwindow(fin
, fdata
, tdata
, tcp
, is
->is_flags
);
1694 #ifdef IPFILTER_SCAN
1695 if (is
->is_flags
& (IS_SC_CLIENT
|IS_SC_SERVER
)) {
1696 ipsc_packet(fin
, is
);
1697 if (FR_ISBLOCK(is
->is_pass
)) {
1698 MUTEX_EXIT(&is
->is_lock
);
1705 * Nearing end of connection, start timeout.
1707 ret
= fr_tcp_age(&is
->is_sti
, fin
, ips_tqtqb
, is
->is_flags
);
1709 MUTEX_EXIT(&is
->is_lock
);
1714 * set s0's as appropriate. Use syn-ack packet as it
1715 * contains both pieces of required information.
1718 * Window scale option is only present in SYN/SYN-ACK packet.
1719 * Compare with ~TH_FIN to mask out T/TCP setups.
1721 flags
= tcp
->th_flags
& ~(TH_FIN
|TH_ECNALL
);
1722 if (flags
== (TH_SYN
|TH_ACK
)) {
1723 is
->is_s0
[source
] = ntohl(tcp
->th_ack
);
1724 is
->is_s0
[!source
] = ntohl(tcp
->th_seq
) + 1;
1725 if ((TCP_OFF(tcp
) > (sizeof(tcphdr_t
) >> 2))) {
1726 if (fr_tcpoptions(fin
, tcp
, fdata
) == -1)
1727 fin
->fin_flx
|= FI_BAD
;
1729 if ((fin
->fin_out
!= 0) && (is
->is_pass
& FR_NEWISN
))
1730 fr_checknewisn(fin
, is
);
1731 } else if (flags
== TH_SYN
) {
1732 is
->is_s0
[source
] = ntohl(tcp
->th_seq
) + 1;
1733 if ((TCP_OFF(tcp
) > (sizeof(tcphdr_t
) >> 2))) {
1734 if (fr_tcpoptions(fin
, tcp
, fdata
) == -1)
1735 fin
->fin_flx
|= FI_BAD
;
1738 if ((fin
->fin_out
!= 0) && (is
->is_pass
& FR_NEWISN
))
1739 fr_checknewisn(fin
, is
);
1744 fin
->fin_flx
|= FI_OOW
;
1746 MUTEX_EXIT(&is
->is_lock
);
1751 /* ------------------------------------------------------------------------ */
1752 /* Function: fr_checknewisn */
1754 /* Parameters: fin(I) - pointer to packet information */
1755 /* is(I) - pointer to master state structure */
1757 /* Check to see if this TCP connection is expecting and needs a new */
1758 /* sequence number for a particular direction of the connection. */
1760 /* NOTE: This does not actually change the sequence numbers, only gets new */
1762 /* ------------------------------------------------------------------------ */
1763 static void fr_checknewisn(fin
, is
)
1767 u_32_t sumd
, old
, new;
1774 if (((i
== 0) && !(is
->is_flags
& IS_ISNSYN
)) ||
1775 ((i
== 1) && !(is
->is_flags
& IS_ISNACK
))) {
1776 old
= ntohl(tcp
->th_seq
);
1777 new = fr_newisn(fin
);
1778 is
->is_isninc
[i
] = new - old
;
1779 CALC_SUMD(old
, new, sumd
);
1780 is
->is_sumd
[i
] = (sumd
& 0xffff) + (sumd
>> 16);
1782 is
->is_flags
|= ((i
== 0) ? IS_ISNSYN
: IS_ISNACK
);
1787 /* ------------------------------------------------------------------------ */
1788 /* Function: fr_tcpinwindow */
1789 /* Returns: int - 1 == packet inside TCP "window", 0 == not inside, */
1790 /* 2 == packet seq number matches next expected */
1791 /* Parameters: fin(I) - pointer to packet information */
1792 /* fdata(I) - pointer to tcp state informatio (forward) */
1793 /* tdata(I) - pointer to tcp state informatio (reverse) */
1794 /* tcp(I) - pointer to TCP packet header */
1796 /* Given a packet has matched addresses and ports, check to see if it is */
1797 /* within the TCP data window. In a show of generosity, allow packets that */
1798 /* are within the window space behind the current sequence # as well. */
1799 /* ------------------------------------------------------------------------ */
1800 int fr_tcpinwindow(fin
, fdata
, tdata
, tcp
, flags
)
1802 tcpdata_t
*fdata
, *tdata
;
1806 tcp_seq seq
, ack
, end
;
1807 int ackskew
, tcpflags
;
1812 * Find difference between last checked packet and this packet.
1814 tcpflags
= tcp
->th_flags
;
1815 seq
= ntohl(tcp
->th_seq
);
1816 ack
= ntohl(tcp
->th_ack
);
1817 if (tcpflags
& TH_SYN
)
1818 win
= ntohs(tcp
->th_win
);
1820 win
= ntohs(tcp
->th_win
) << fdata
->td_winscale
;
1823 * A window of 0 produces undesirable behaviour from this function.
1828 dsize
= fin
->fin_dlen
- (TCP_OFF(tcp
) << 2) +
1829 ((tcpflags
& TH_SYN
) ? 1 : 0) + ((tcpflags
& TH_FIN
) ? 1 : 0);
1832 * if window scaling is present, the scaling is only allowed
1833 * for windows not in the first SYN packet. In that packet the
1834 * window is 65535 to specify the largest window possible
1835 * for receivers not implementing the window scale option.
1836 * Currently, we do not assume TTCP here. That means that
1837 * if we see a second packet from a host (after the initial
1838 * SYN), we can assume that the receiver of the SYN did
1839 * already send back the SYN/ACK (and thus that we know if
1840 * the receiver also does window scaling)
1842 if (!(tcpflags
& TH_SYN
) && (fdata
->td_winflags
& TCP_WSCALE_FIRST
)) {
1843 fdata
->td_winflags
&= ~TCP_WSCALE_FIRST
;
1844 fdata
->td_maxwin
= win
;
1849 if ((fdata
->td_end
== 0) &&
1850 (!(flags
& IS_TCPFSM
) ||
1851 ((tcpflags
& TH_OPENING
) == TH_OPENING
))) {
1853 * Must be a (outgoing) SYN-ACK in reply to a SYN.
1855 fdata
->td_end
= end
- 1;
1856 fdata
->td_maxwin
= 1;
1857 fdata
->td_maxend
= end
+ win
;
1860 if (!(tcpflags
& TH_ACK
)) { /* Pretend an ack was sent */
1861 ack
= tdata
->td_end
;
1862 } else if (((tcpflags
& (TH_ACK
|TH_RST
)) == (TH_ACK
|TH_RST
)) &&
1864 /* gross hack to get around certain broken tcp stacks */
1865 ack
= tdata
->td_end
;
1868 maxwin
= tdata
->td_maxwin
;
1869 ackskew
= tdata
->td_end
- ack
;
1872 * Strict sequencing only allows in-order delivery.
1874 if (seq
!= fdata
->td_end
) {
1875 if ((flags
& IS_STRICT
) != 0) {
1881 if ((SEQ_GE(fdata
->td_maxend
, end
)) &&
1882 (SEQ_GE(seq
, fdata
->td_end
- maxwin
)) &&
1883 /* XXX what about big packets */
1884 #define MAXACKWINDOW 66000
1885 (-ackskew
<= (MAXACKWINDOW
)) &&
1886 ( ackskew
<= (MAXACKWINDOW
<< fdata
->td_winscale
))) {
1889 * Microsoft Windows will send the next packet to the right of the
1890 * window if SACK is in use.
1892 } else if ((seq
== fdata
->td_maxend
) && (ackskew
== 0) &&
1893 (fdata
->td_winflags
& TCP_SACK_PERMIT
) &&
1894 (tdata
->td_winflags
& TCP_SACK_PERMIT
)) {
1897 * Sometimes a TCP RST will be generated with only the ACK field
1900 } else if ((seq
== 0) && (tcpflags
== (TH_RST
|TH_ACK
)) &&
1901 (ackskew
>= -1) && (ackskew
<= 1)) {
1903 } else if (!(flags
& IS_TCPFSM
)) {
1906 i
= (fin
->fin_rev
<< 1) + fin
->fin_out
;
1909 if (is_pkts
[i
]0 == 0) {
1911 * Picking up a connection in the middle, the "next"
1912 * packet seen from a direction that is new should be
1913 * accepted, even if it appears out of sequence.
1918 if (!(fdata
->td_winflags
&
1919 (TCP_WSCALE_SEEN
|TCP_WSCALE_FIRST
))) {
1921 * No TCPFSM and no window scaling, so make some
1924 if ((seq
== fdata
->td_maxend
) && (ackskew
== 0))
1926 else if (SEQ_GE(seq
+ maxwin
, fdata
->td_end
- maxwin
))
1931 /* TRACE(inseq, fdata, tdata, seq, end, ack, ackskew, win, maxwin) */
1934 /* if ackskew < 0 then this should be due to fragmented
1935 * packets. There is no way to know the length of the
1936 * total packet in advance.
1937 * We do know the total length from the fragment cache though.
1938 * Note however that there might be more sessions with
1939 * exactly the same source and destination parameters in the
1940 * state cache (and source and destination is the only stuff
1941 * that is saved in the fragment cache). Note further that
1942 * some TCP connections in the state cache are hashed with
1943 * sport and dport as well which makes it not worthwhile to
1945 * Thus, when ackskew is negative but still seems to belong
1946 * to this session, we bump up the destinations end value.
1949 tdata
->td_end
= ack
;
1951 /* update max window seen */
1952 if (fdata
->td_maxwin
< win
)
1953 fdata
->td_maxwin
= win
;
1954 if (SEQ_GT(end
, fdata
->td_end
))
1955 fdata
->td_end
= end
;
1956 if (SEQ_GE(ack
+ win
, tdata
->td_maxend
))
1957 tdata
->td_maxend
= ack
+ win
;
1964 /* ------------------------------------------------------------------------ */
1965 /* Function: fr_stclone */
1966 /* Returns: ipstate_t* - NULL == cloning failed, */
1967 /* else pointer to new state structure */
1968 /* Parameters: fin(I) - pointer to packet information */
1969 /* tcp(I) - pointer to TCP/UDP header */
1970 /* is(I) - pointer to master state structure */
1972 /* Create a "duplcate" state table entry from the master. */
1973 /* ------------------------------------------------------------------------ */
1974 static ipstate_t
*fr_stclone(fin
, tcp
, is
)
1982 if (ips_num
== fr_statemax
) {
1983 ATOMIC_INCL(ips_stats
.iss_max
);
1984 fr_state_doflush
= 1;
1987 KMALLOC(clone
, ipstate_t
*);
1990 bcopy((char *)is
, (char *)clone
, sizeof(*clone
));
1992 MUTEX_NUKE(&clone
->is_lock
);
1994 * It has not yet been placed on any timeout queue, so make sure
1995 * all of that data is zero'd out.
1997 clone
->is_sti
.tqe_pnext
= NULL
;
1998 clone
->is_sti
.tqe_next
= NULL
;
1999 clone
->is_sti
.tqe_ifq
= NULL
;
2000 clone
->is_sti
.tqe_parent
= clone
;
2002 clone
->is_die
= ONE_DAY
+ fr_ticks
;
2003 clone
->is_state
[0] = 0;
2004 clone
->is_state
[1] = 0;
2005 send
= ntohl(tcp
->th_seq
) + fin
->fin_dlen
- (TCP_OFF(tcp
) << 2) +
2006 ((tcp
->th_flags
& TH_SYN
) ? 1 : 0) +
2007 ((tcp
->th_flags
& TH_FIN
) ? 1 : 0);
2009 if (fin
->fin_rev
== 1) {
2010 clone
->is_dend
= send
;
2011 clone
->is_maxdend
= send
;
2013 clone
->is_maxswin
= 1;
2014 clone
->is_maxdwin
= ntohs(tcp
->th_win
);
2015 if (clone
->is_maxdwin
== 0)
2016 clone
->is_maxdwin
= 1;
2018 clone
->is_send
= send
;
2019 clone
->is_maxsend
= send
;
2021 clone
->is_maxdwin
= 1;
2022 clone
->is_maxswin
= ntohs(tcp
->th_win
);
2023 if (clone
->is_maxswin
== 0)
2024 clone
->is_maxswin
= 1;
2027 clone
->is_flags
&= ~SI_CLONE
;
2028 clone
->is_flags
|= SI_CLONED
;
2029 fr_stinsert(clone
, fin
->fin_rev
);
2031 if (clone
->is_p
== IPPROTO_TCP
) {
2032 (void) fr_tcp_age(&clone
->is_sti
, fin
, ips_tqtqb
,
2035 MUTEX_EXIT(&clone
->is_lock
);
2036 #ifdef IPFILTER_SCAN
2037 (void) ipsc_attachis(is
);
2039 #ifdef IPFILTER_SYNC
2040 if (is
->is_flags
& IS_STATESYNC
)
2041 clone
->is_sync
= ipfsync_new(SMC_STATE
, fin
, clone
);
2047 /* ------------------------------------------------------------------------ */
2048 /* Function: fr_matchsrcdst */
2050 /* Parameters: fin(I) - pointer to packet information */
2051 /* is(I) - pointer to state structure */
2052 /* src(I) - pointer to source address */
2053 /* dst(I) - pointer to destination address */
2054 /* tcp(I) - pointer to TCP/UDP header */
2056 /* Match a state table entry against an IP packet. The logic below is that */
2057 /* ret gets set to one if the match succeeds, else remains 0. If it is */
2058 /* still 0 after the test. no match. */
2059 /* ------------------------------------------------------------------------ */
2060 static ipstate_t
*fr_matchsrcdst(fin
, is
, src
, dst
, tcp
, cmask
)
2063 i6addr_t
*src
, *dst
;
2067 int ret
= 0, rev
, out
, flags
, flx
= 0, idx
;
2073 * If a connection is about to be deleted, no packets
2074 * are allowed to match it.
2076 if (is
->is_sti
.tqe_ifq
== &ips_deletetq
)
2079 rev
= IP6_NEQ(&is
->is_dst
, dst
);
2082 flags
= is
->is_flags
;
2087 sp
= htons(fin
->fin_sport
);
2088 dp
= ntohs(fin
->fin_dport
);
2092 if (!(flags
& SI_W_SPORT
) && (sp
!= is
->is_sport
))
2094 else if (!(flags
& SI_W_DPORT
) && (dp
!= is
->is_dport
))
2099 idx
= (out
<< 1) + rev
;
2102 * If the interface for this 'direction' is set, make sure it matches.
2103 * An interface name that is not set matches any, as does a name of *.
2105 if ((is
->is_ifp
[idx
] == ifp
) || (is
->is_ifp
[idx
] == NULL
&&
2106 (*is
->is_ifname
[idx
] == '\0' || *is
->is_ifname
[idx
] == '-' ||
2107 *is
->is_ifname
[idx
] == '*')))
2115 * Match addresses and ports.
2118 if ((IP6_EQ(&is
->is_dst
, dst
) || (flags
& SI_W_DADDR
)) &&
2119 (IP6_EQ(&is
->is_src
, src
) || (flags
& SI_W_SADDR
))) {
2121 if ((sp
== is
->is_sport
|| flags
& SI_W_SPORT
)&&
2122 (dp
== is
->is_dport
|| flags
& SI_W_DPORT
))
2129 if ((IP6_EQ(&is
->is_dst
, src
) || (flags
& SI_W_DADDR
)) &&
2130 (IP6_EQ(&is
->is_src
, dst
) || (flags
& SI_W_SADDR
))) {
2132 if ((dp
== is
->is_sport
|| flags
& SI_W_SPORT
)&&
2133 (sp
== is
->is_dport
|| flags
& SI_W_DPORT
))
2145 * Whether or not this should be here, is questionable, but the aim
2146 * is to get this out of the main line.
2149 flags
= is
->is_flags
& ~(SI_WILDP
|SI_NEWFR
|SI_CLONE
|SI_CLONED
);
2152 * Only one of the source or destination address can be flaged as a
2153 * wildcard. Fill in the missing address, if set.
2154 * For IPv6, if the address being copied in is multicast, then
2155 * don't reset the wild flag - multicast causes it to be set in the
2158 if ((flags
& (SI_W_SADDR
|SI_W_DADDR
))) {
2159 fr_ip_t
*fi
= &fin
->fin_fi
;
2161 if ((flags
& SI_W_SADDR
) != 0) {
2164 if (is
->is_v
== 6 &&
2165 IN6_IS_ADDR_MULTICAST(&fi
->fi_src
.in6
))
2170 is
->is_src
= fi
->fi_src
;
2171 is
->is_flags
&= ~SI_W_SADDR
;
2175 if (is
->is_v
== 6 &&
2176 IN6_IS_ADDR_MULTICAST(&fi
->fi_dst
.in6
))
2181 is
->is_src
= fi
->fi_dst
;
2182 is
->is_flags
&= ~SI_W_SADDR
;
2185 } else if ((flags
& SI_W_DADDR
) != 0) {
2188 if (is
->is_v
== 6 &&
2189 IN6_IS_ADDR_MULTICAST(&fi
->fi_dst
.in6
))
2194 is
->is_dst
= fi
->fi_dst
;
2195 is
->is_flags
&= ~SI_W_DADDR
;
2199 if (is
->is_v
== 6 &&
2200 IN6_IS_ADDR_MULTICAST(&fi
->fi_src
.in6
))
2205 is
->is_dst
= fi
->fi_src
;
2206 is
->is_flags
&= ~SI_W_DADDR
;
2210 if ((is
->is_flags
& (SI_WILDA
|SI_WILDP
)) == 0) {
2211 ATOMIC_DECL(ips_stats
.iss_wild
);
2215 flx
= fin
->fin_flx
& cmask
;
2216 cflx
= is
->is_flx
[out
][rev
];
2219 * Match up any flags set from IP options.
2221 if ((cflx
&& (flx
!= (cflx
& cmask
))) ||
2222 ((fin
->fin_optmsk
& is
->is_optmsk
[rev
]) != is
->is_opt
[rev
]) ||
2223 ((fin
->fin_secmsk
& is
->is_secmsk
) != is
->is_sec
) ||
2224 ((fin
->fin_auth
& is
->is_authmsk
) != is
->is_auth
))
2228 * Only one of the source or destination port can be flagged as a
2229 * wildcard. When filling it in, fill in a copy of the matched entry
2230 * if it has the cloning flag set.
2232 if ((fin
->fin_flx
& FI_IGNORE
) != 0) {
2237 if ((flags
& (SI_W_SPORT
|SI_W_DPORT
))) {
2238 if ((flags
& SI_CLONE
) != 0) {
2241 clone
= fr_stclone(fin
, tcp
, is
);
2246 ATOMIC_DECL(ips_stats
.iss_wild
);
2249 if ((flags
& SI_W_SPORT
) != 0) {
2252 is
->is_send
= ntohl(tcp
->th_seq
);
2255 is
->is_send
= ntohl(tcp
->th_ack
);
2257 is
->is_maxsend
= is
->is_send
+ 1;
2258 } else if ((flags
& SI_W_DPORT
) != 0) {
2261 is
->is_dend
= ntohl(tcp
->th_ack
);
2264 is
->is_dend
= ntohl(tcp
->th_seq
);
2266 is
->is_maxdend
= is
->is_dend
+ 1;
2268 is
->is_flags
&= ~(SI_W_SPORT
|SI_W_DPORT
);
2269 if ((flags
& SI_CLONED
) && ipstate_logging
)
2270 ipstate_log(is
, ISL_CLONE
);
2275 if (is
->is_flx
[out
][rev
] == 0) {
2276 is
->is_flx
[out
][rev
] = flx
;
2277 is
->is_opt
[rev
] = fin
->fin_optmsk
;
2278 if (is
->is_v
== 6) {
2279 is
->is_opt
[rev
] &= ~0x8;
2280 is
->is_optmsk
[rev
] &= ~0x8;
2285 * Check if the interface name for this "direction" is set and if not,
2288 if (is
->is_ifp
[idx
] == NULL
&&
2289 (*is
->is_ifname
[idx
] == '\0' || *is
->is_ifname
[idx
] == '*')) {
2290 is
->is_ifp
[idx
] = ifp
;
2291 COPYIFNAME(is
->is_v
, ifp
, is
->is_ifname
[idx
]);
2298 /* ------------------------------------------------------------------------ */
2299 /* Function: fr_checkicmpmatchingstate */
2301 /* Parameters: fin(I) - pointer to packet information */
2303 /* If we've got an ICMP error message, using the information stored in the */
2304 /* ICMP packet, look for a matching state table entry. */
2306 /* If we return NULL then no lock on ipf_state is held. */
2307 /* If we return non-null then a read-lock on ipf_state is held. */
2308 /* ------------------------------------------------------------------------ */
2309 static ipstate_t
*fr_checkicmpmatchingstate(fin
)
2312 ipstate_t
*is
, **isp
;
2313 u_short sport
, dport
;
2315 int backward
, i
, oi
;
2327 * Does it at least have the return (basic) IP header ?
2328 * Is it an actual recognised ICMP error type?
2329 * Only a basic IP header (no options) should be with
2330 * an ICMP error header.
2332 if ((fin
->fin_v
!= 4) || (fin
->fin_hlen
!= sizeof(ip_t
)) ||
2333 (fin
->fin_plen
< ICMPERR_MINPKTLEN
) ||
2334 !(fin
->fin_flx
& FI_ICMPERR
))
2337 type
= ic
->icmp_type
;
2339 oip
= (ip_t
*)((char *)ic
+ ICMPERR_ICMPHLEN
);
2341 * Check if the at least the old IP header (with options) and
2342 * 8 bytes of payload is present.
2344 if (fin
->fin_plen
< ICMPERR_MAXPKTLEN
+ ((IP_HL(oip
) - 5) << 2))
2350 len
= fin
->fin_dlen
- ICMPERR_ICMPHLEN
;
2351 if ((len
<= 0) || ((IP_HL(oip
) << 2) > len
))
2355 * Is the buffer big enough for all of it ? It's the size of the IP
2356 * header claimed in the encapsulated part which is of concern. It
2357 * may be too big to be in this buffer but not so big that it's
2358 * outside the ICMP packet, leading to TCP deref's causing problems.
2359 * This is possible because we don't know how big oip_hl is when we
2360 * do the pullup early in fr_check() and thus can't guarantee it is
2368 # if defined(MENTAT)
2369 if ((char *)oip
+ len
> (char *)m
->b_wptr
)
2372 if ((char *)oip
+ len
> (char *)fin
->fin_ip
+ m
->m_len
)
2377 bcopy((char *)fin
, (char *)&ofin
, sizeof(*fin
));
2380 * in the IPv4 case we must zero the i6addr union otherwise
2381 * the IP6_EQ and IP6_NEQ macros produce the wrong results because
2382 * of the 'junk' in the unused part of the union
2384 bzero((char *)&src
, sizeof(src
));
2385 bzero((char *)&dst
, sizeof(dst
));
2388 * we make an fin entry to be able to feed it to
2389 * matchsrcdst note that not all fields are encessary
2390 * but this is the cleanest way. Note further we fill
2391 * in fin_mp such that if someone uses it we'll get
2392 * a kernel panic. fr_matchsrcdst does not use this.
2394 * watch out here, as ip is in host order and oip in network
2395 * order. Any change we make must be undone afterwards, like
2396 * oip->ip_off - it is still in network byte order so fix it.
2398 savelen
= oip
->ip_len
;
2400 oip
->ip_off
= ntohs(oip
->ip_off
);
2402 ofin
.fin_flx
= FI_NOCKSUM
;
2405 ofin
.fin_m
= NULL
; /* if dereferenced, panic XXX */
2406 ofin
.fin_mp
= NULL
; /* if dereferenced, panic XXX */
2407 (void) fr_makefrip(IP_HL(oip
) << 2, oip
, &ofin
);
2408 ofin
.fin_ifp
= fin
->fin_ifp
;
2409 ofin
.fin_out
= !fin
->fin_out
;
2411 * Reset the short and bad flag here because in fr_matchsrcdst()
2412 * the flags for the current packet (fin_flx) are compared against
2413 * those for the existing session.
2415 ofin
.fin_flx
&= ~(FI_BAD
|FI_SHORT
);
2418 * Put old values of ip_len and ip_off back as we don't know
2419 * if we have to forward the packet (or process it again.
2421 oip
->ip_len
= savelen
;
2422 oip
->ip_off
= htons(oip
->ip_off
);
2428 * an ICMP error can only be generated as a result of an
2429 * ICMP query, not as the response on an ICMP error
2431 * XXX theoretically ICMP_ECHOREP and the other reply's are
2432 * ICMP query's as well, but adding them here seems strange XXX
2434 if ((ofin
.fin_flx
& FI_ICMPERR
) != 0)
2438 * perform a lookup of the ICMP packet in the state table
2440 icmp
= (icmphdr_t
*)((char *)oip
+ (IP_HL(oip
) << 2));
2441 hv
= (pr
= oip
->ip_p
);
2442 src
.in4
= oip
->ip_src
;
2443 hv
+= src
.in4
.s_addr
;
2444 dst
.in4
= oip
->ip_dst
;
2445 hv
+= dst
.in4
.s_addr
;
2446 hv
+= icmp
->icmp_id
;
2447 hv
= DOUBLE_HASH(hv
);
2449 READ_ENTER(&ipf_state
);
2450 for (isp
= &ips_table
[hv
]; ((is
= *isp
) != NULL
); ) {
2451 isp
= &is
->is_hnext
;
2452 if ((is
->is_p
!= pr
) || (is
->is_v
!= 4))
2454 if (is
->is_pass
& FR_NOICMPERR
)
2456 is
= fr_matchsrcdst(&ofin
, is
, &src
, &dst
,
2460 * i : the index of this packet (the icmp
2462 * oi : the index of the original packet found
2463 * in the icmp header (i.e. the packet
2464 * causing this icmp)
2465 * backward : original packet was backward
2466 * compared to the state
2468 backward
= IP6_NEQ(&is
->is_src
, &src
);
2469 fin
->fin_rev
= !backward
;
2470 i
= (!backward
<< 1) + fin
->fin_out
;
2471 oi
= (backward
<< 1) + ofin
.fin_out
;
2472 if (is
->is_icmppkts
[i
] > is
->is_pkts
[oi
])
2474 ips_stats
.iss_hits
++;
2475 is
->is_icmppkts
[i
]++;
2479 RWLOCK_EXIT(&ipf_state
);
2488 tcp
= (tcphdr_t
*)((char *)oip
+ (IP_HL(oip
) << 2));
2489 dport
= tcp
->th_dport
;
2490 sport
= tcp
->th_sport
;
2492 hv
= (pr
= oip
->ip_p
);
2493 src
.in4
= oip
->ip_src
;
2494 hv
+= src
.in4
.s_addr
;
2495 dst
.in4
= oip
->ip_dst
;
2496 hv
+= dst
.in4
.s_addr
;
2499 hv
= DOUBLE_HASH(hv
);
2501 READ_ENTER(&ipf_state
);
2502 for (isp
= &ips_table
[hv
]; ((is
= *isp
) != NULL
); ) {
2503 isp
= &is
->is_hnext
;
2505 * Only allow this icmp though if the
2506 * encapsulated packet was allowed through the
2507 * other way around. Note that the minimal amount
2508 * of info present does not allow for checking against
2509 * tcp internals such as seq and ack numbers. Only the
2510 * ports are known to be present and can be even if the
2511 * short flag is set.
2513 if ((is
->is_p
== pr
) && (is
->is_v
== 4) &&
2514 (is
= fr_matchsrcdst(&ofin
, is
, &src
, &dst
,
2515 tcp
, FI_ICMPCMP
))) {
2517 * i : the index of this packet (the icmp unreachable)
2518 * oi : the index of the original packet found in the
2519 * icmp header (i.e. the packet causing this icmp)
2520 * backward : original packet was backward compared to
2523 backward
= IP6_NEQ(&is
->is_src
, &src
);
2524 fin
->fin_rev
= !backward
;
2525 i
= (!backward
<< 1) + fin
->fin_out
;
2526 oi
= (backward
<< 1) + ofin
.fin_out
;
2528 if (((is
->is_pass
& FR_NOICMPERR
) != 0) ||
2529 (is
->is_icmppkts
[i
] > is
->is_pkts
[oi
]))
2531 ips_stats
.iss_hits
++;
2532 is
->is_icmppkts
[i
]++;
2534 * we deliberately do not touch the timeouts
2535 * for the accompanying state table entry.
2536 * It remains to be seen if that is correct. XXX
2541 RWLOCK_EXIT(&ipf_state
);
2546 /* ------------------------------------------------------------------------ */
2547 /* Function: fr_ipsmove */
2549 /* Parameters: is(I) - pointer to state table entry */
2550 /* hv(I) - new hash value for state table entry */
2551 /* Write Locks: ipf_state */
2553 /* Move a state entry from one position in the hash table to another. */
2554 /* ------------------------------------------------------------------------ */
2555 static void fr_ipsmove(is
, hv
)
2564 * Remove the hash from the old location...
2566 isp
= is
->is_phnext
;
2568 is
->is_hnext
->is_phnext
= isp
;
2569 *isp
= is
->is_hnext
;
2570 if (ips_table
[hvm
] == NULL
)
2571 ips_stats
.iss_inuse
--;
2572 ips_stats
.iss_bucketlen
[hvm
]--;
2575 * ...and put the hash in the new one.
2577 hvm
= DOUBLE_HASH(hv
);
2579 isp
= &ips_table
[hvm
];
2581 (*isp
)->is_phnext
= &is
->is_hnext
;
2583 ips_stats
.iss_inuse
++;
2584 ips_stats
.iss_bucketlen
[hvm
]++;
2585 is
->is_phnext
= isp
;
2586 is
->is_hnext
= *isp
;
2591 /* ------------------------------------------------------------------------ */
2592 /* Function: fr_stlookup */
2593 /* Returns: ipstate_t* - NULL == no matching state found, */
2594 /* else pointer to state information is returned */
2595 /* Parameters: fin(I) - pointer to packet information */
2596 /* tcp(I) - pointer to TCP/UDP header. */
2598 /* Search the state table for a matching entry to the packet described by */
2599 /* the contents of *fin. */
2601 /* If we return NULL then no lock on ipf_state is held. */
2602 /* If we return non-null then a read-lock on ipf_state is held. */
2603 /* ------------------------------------------------------------------------ */
2604 ipstate_t
*fr_stlookup(fin
, tcp
, ifqp
)
2609 u_int hv
, hvm
, pr
, v
, tryagain
;
2610 ipstate_t
*is
, **isp
;
2611 u_short dport
, sport
;
2620 ic
= (struct icmp
*)tcp
;
2621 hv
= (pr
= fin
->fin_fi
.fi_p
);
2622 src
= fin
->fin_fi
.fi_src
;
2623 dst
= fin
->fin_fi
.fi_dst
;
2624 hv
+= src
.in4
.s_addr
;
2625 hv
+= dst
.in4
.s_addr
;
2627 v
= fin
->fin_fi
.fi_v
;
2630 hv
+= fin
->fin_fi
.fi_src
.i6
[1];
2631 hv
+= fin
->fin_fi
.fi_src
.i6
[2];
2632 hv
+= fin
->fin_fi
.fi_src
.i6
[3];
2634 if ((fin
->fin_p
== IPPROTO_ICMPV6
) &&
2635 IN6_IS_ADDR_MULTICAST(&fin
->fin_fi
.fi_dst
.in6
)) {
2636 hv
-= dst
.in4
.s_addr
;
2638 hv
+= fin
->fin_fi
.fi_dst
.i6
[1];
2639 hv
+= fin
->fin_fi
.fi_dst
.i6
[2];
2640 hv
+= fin
->fin_fi
.fi_dst
.i6
[3];
2645 (fin
->fin_flx
& (FI_MULTICAST
|FI_BROADCAST
|FI_MBCAST
))) {
2646 if (fin
->fin_out
== 0) {
2647 hv
-= src
.in4
.s_addr
;
2649 hv
-= dst
.in4
.s_addr
;
2654 * Search the hash table for matching packet header info.
2659 case IPPROTO_ICMPV6
:
2662 if ((ic
->icmp_type
== ICMP6_ECHO_REQUEST
) ||
2663 (ic
->icmp_type
== ICMP6_ECHO_REPLY
)) {
2667 READ_ENTER(&ipf_state
);
2669 hvm
= DOUBLE_HASH(hv
);
2670 for (isp
= &ips_table
[hvm
]; ((is
= *isp
) != NULL
); ) {
2671 isp
= &is
->is_hnext
;
2672 if ((is
->is_p
!= pr
) || (is
->is_v
!= v
))
2674 is
= fr_matchsrcdst(fin
, is
, &src
, &dst
, NULL
, FI_CMP
);
2676 fr_matchicmpqueryreply(v
, &is
->is_icmp
,
2677 ic
, fin
->fin_rev
)) {
2679 ifq
= &ips_icmpacktq
;
2687 if ((tryagain
!= 0) && !(is
->is_flags
& SI_W_DADDR
)) {
2688 hv
+= fin
->fin_fi
.fi_src
.i6
[0];
2689 hv
+= fin
->fin_fi
.fi_src
.i6
[1];
2690 hv
+= fin
->fin_fi
.fi_src
.i6
[2];
2691 hv
+= fin
->fin_fi
.fi_src
.i6
[3];
2693 MUTEX_DOWNGRADE(&ipf_state
);
2697 RWLOCK_EXIT(&ipf_state
);
2700 * No matching icmp state entry. Perhaps this is a
2701 * response to another state entry.
2703 * XXX With some ICMP6 packets, the "other" address is already
2704 * in the packet, after the ICMP6 header, and this could be
2705 * used in place of the multicast address. However, taking
2706 * advantage of this requires some significant code changes
2707 * to handle the specific types where that is the case.
2709 if ((ips_stats
.iss_wild
!= 0) && (v
== 6) && (tryagain
== 0) &&
2710 !IN6_IS_ADDR_MULTICAST(&fin
->fin_fi
.fi_src
.in6
)) {
2711 hv
-= fin
->fin_fi
.fi_src
.i6
[0];
2712 hv
-= fin
->fin_fi
.fi_src
.i6
[1];
2713 hv
-= fin
->fin_fi
.fi_src
.i6
[2];
2714 hv
-= fin
->fin_fi
.fi_src
.i6
[3];
2716 WRITE_ENTER(&ipf_state
);
2720 is
= fr_checkicmp6matchingstate(fin
);
2730 hv
= DOUBLE_HASH(hv
);
2731 READ_ENTER(&ipf_state
);
2732 for (isp
= &ips_table
[hv
]; ((is
= *isp
) != NULL
); ) {
2733 isp
= &is
->is_hnext
;
2734 if ((is
->is_p
!= pr
) || (is
->is_v
!= v
))
2736 is
= fr_matchsrcdst(fin
, is
, &src
, &dst
, NULL
, FI_CMP
);
2738 (ic
->icmp_id
== is
->is_icmp
.ici_id
) &&
2739 fr_matchicmpqueryreply(v
, &is
->is_icmp
,
2740 ic
, fin
->fin_rev
)) {
2742 ifq
= &ips_icmpacktq
;
2749 RWLOCK_EXIT(&ipf_state
);
2756 sport
= htons(fin
->fin_data
[0]);
2758 dport
= htons(fin
->fin_data
[1]);
2762 READ_ENTER(&ipf_state
);
2764 hvm
= DOUBLE_HASH(hv
);
2765 for (isp
= &ips_table
[hvm
]; ((is
= *isp
) != NULL
); ) {
2766 isp
= &is
->is_hnext
;
2767 if ((is
->is_p
!= pr
) || (is
->is_v
!= v
))
2769 fin
->fin_flx
&= ~FI_OOW
;
2770 is
= fr_matchsrcdst(fin
, is
, &src
, &dst
, tcp
, FI_CMP
);
2772 if (pr
== IPPROTO_TCP
) {
2773 if (!fr_tcpstate(fin
, tcp
, is
)) {
2774 oow
|= fin
->fin_flx
& FI_OOW
;
2783 !(is
->is_flags
& (SI_CLONE
|SI_WILDP
|SI_WILDA
))) {
2787 MUTEX_DOWNGRADE(&ipf_state
);
2791 RWLOCK_EXIT(&ipf_state
);
2793 if (ips_stats
.iss_wild
) {
2794 if (tryagain
== 0) {
2797 } else if (tryagain
== 1) {
2798 hv
= fin
->fin_fi
.fi_p
;
2800 * If we try to pretend this is a reply to a
2801 * multicast/broadcast packet then we need to
2802 * exclude part of the address from the hash
2805 if (fin
->fin_out
== 0) {
2806 hv
+= src
.in4
.s_addr
;
2808 hv
+= dst
.in4
.s_addr
;
2814 if (tryagain
<= 2) {
2815 WRITE_ENTER(&ipf_state
);
2819 fin
->fin_flx
|= oow
;
2825 if (GRE_REV(gre
->gr_flags
) == 1) {
2832 hvm
= DOUBLE_HASH(hv
);
2833 READ_ENTER(&ipf_state
);
2834 for (isp
= &ips_table
[hvm
]; ((is
= *isp
) != NULL
); ) {
2835 isp
= &is
->is_hnext
;
2836 if ((is
->is_p
!= pr
) || (is
->is_v
!= v
))
2838 is
= fr_matchsrcdst(fin
, is
, &src
, &dst
, NULL
, FI_CMP
);
2845 RWLOCK_EXIT(&ipf_state
);
2851 if (((is
->is_sti
.tqe_flags
& TQE_RULEBASED
) != 0) &&
2852 (is
->is_tqehead
[fin
->fin_rev
] != NULL
))
2853 ifq
= is
->is_tqehead
[fin
->fin_rev
];
2854 if (ifq
!= NULL
&& ifqp
!= NULL
)
2861 /* ------------------------------------------------------------------------ */
2862 /* Function: fr_updatestate */
2864 /* Parameters: fin(I) - pointer to packet information */
2865 /* is(I) - pointer to state table entry */
2866 /* Read Locks: ipf_state */
2868 /* Updates packet and byte counters for a newly received packet. Seeds the */
2869 /* fragment cache with a new entry as required. */
2870 /* ------------------------------------------------------------------------ */
2871 void fr_updatestate(fin
, is
, ifq
)
2879 i
= (fin
->fin_rev
<< 1) + fin
->fin_out
;
2882 * For TCP packets, ifq == NULL. For all others, check if this new
2883 * queue is different to the last one it was on and move it if so.
2886 MUTEX_ENTER(&is
->is_lock
);
2887 if ((tqe
->tqe_flags
& TQE_RULEBASED
) != 0)
2888 ifq
= is
->is_tqehead
[fin
->fin_rev
];
2891 fr_movequeue(tqe
, tqe
->tqe_ifq
, ifq
);
2894 fin
->fin_pktnum
= is
->is_pkts
[i
] + is
->is_icmppkts
[i
];
2895 is
->is_bytes
[i
] += fin
->fin_plen
;
2896 MUTEX_EXIT(&is
->is_lock
);
2898 #ifdef IPFILTER_SYNC
2899 if (is
->is_flags
& IS_STATESYNC
)
2900 ipfsync_update(SMC_STATE
, fin
, is
->is_sync
);
2903 ATOMIC_INCL(ips_stats
.iss_hits
);
2905 fin
->fin_fr
= is
->is_rule
;
2908 * If this packet is a fragment and the rule says to track fragments,
2909 * then create a new fragment cache entry.
2912 if ((fin
->fin_flx
& FI_FRAG
) && FR_ISPASS(pass
))
2913 (void) fr_newfrag(fin
, pass
^ FR_KEEPSTATE
);
2917 /* ------------------------------------------------------------------------ */
2918 /* Function: fr_checkstate */
2919 /* Returns: frentry_t* - NULL == search failed, */
2920 /* else pointer to rule for matching state */
2921 /* Parameters: ifp(I) - pointer to interface */
2922 /* passp(I) - pointer to filtering result flags */
2924 /* Check if a packet is associated with an entry in the state table. */
2925 /* ------------------------------------------------------------------------ */
2926 frentry_t
*fr_checkstate(fin
, passp
)
2936 if (fr_state_lock
|| (ips_list
== NULL
) ||
2937 (fin
->fin_flx
& (FI_SHORT
|FI_STATE
|FI_FRAGBODY
|FI_BAD
)))
2941 if ((fin
->fin_flx
& FI_TCPUDP
) ||
2942 (fin
->fin_fi
.fi_p
== IPPROTO_ICMP
)
2944 || (fin
->fin_fi
.fi_p
== IPPROTO_ICMPV6
)
2952 * Search the hash table for matching packet header info.
2955 is
= fr_stlookup(fin
, tcp
, &ifq
);
2959 case IPPROTO_ICMPV6
:
2962 if (fin
->fin_v
== 6) {
2963 is
= fr_checkicmp6matchingstate(fin
);
2973 * No matching icmp state entry. Perhaps this is a
2974 * response to another state entry.
2976 is
= fr_checkicmpmatchingstate(fin
);
2984 if (is
->is_pass
& FR_NEWISN
) {
2985 if (fin
->fin_out
== 0)
2986 fr_fixinisn(fin
, is
);
2987 else if (fin
->fin_out
== 1)
2988 fr_fixoutisn(fin
, is
);
2993 ifq
= &ips_udpacktq
;
2999 ATOMIC_INCL(ips_stats
.iss_miss
);
3006 if ((fin
->fin_out
== 0) && (fr
->fr_nattag
.ipt_num
[0] != 0)) {
3007 if (fin
->fin_nattag
== NULL
)
3009 if (fr_matchtag(&fr
->fr_nattag
, fin
->fin_nattag
) != 0)
3012 (void) strncpy(fin
->fin_group
, fr
->fr_group
, FR_GROUPLEN
);
3013 fin
->fin_icode
= fr
->fr_icode
;
3016 fin
->fin_rule
= is
->is_rulen
;
3018 fr_updatestate(fin
, is
, ifq
);
3020 RWLOCK_EXIT(&ipf_state
);
3021 fin
->fin_flx
|= FI_STATE
;
3022 if ((pass
& FR_LOGFIRST
) != 0)
3023 pass
&= ~(FR_LOGFIRST
|FR_LOG
);
3029 /* ------------------------------------------------------------------------ */
3030 /* Function: fr_fixoutisn */
3032 /* Parameters: fin(I) - pointer to packet information */
3033 /* is(I) - pointer to master state structure */
3035 /* Called only for outbound packets, adjusts the sequence number and the */
3036 /* TCP checksum to match that change. */
3037 /* ------------------------------------------------------------------------ */
3038 static void fr_fixoutisn(fin
, is
)
3048 if ((is
->is_flags
& IS_ISNSYN
) != 0) {
3050 seq
= ntohl(tcp
->th_seq
);
3051 seq
+= is
->is_isninc
[0];
3052 tcp
->th_seq
= htonl(seq
);
3053 fix_outcksum(fin
, &tcp
->th_sum
, is
->is_sumd
[0]);
3056 if ((is
->is_flags
& IS_ISNACK
) != 0) {
3058 seq
= ntohl(tcp
->th_seq
);
3059 seq
+= is
->is_isninc
[1];
3060 tcp
->th_seq
= htonl(seq
);
3061 fix_outcksum(fin
, &tcp
->th_sum
, is
->is_sumd
[1]);
3067 /* ------------------------------------------------------------------------ */
3068 /* Function: fr_fixinisn */
3070 /* Parameters: fin(I) - pointer to packet information */
3071 /* is(I) - pointer to master state structure */
3073 /* Called only for inbound packets, adjusts the acknowledge number and the */
3074 /* TCP checksum to match that change. */
3075 /* ------------------------------------------------------------------------ */
3076 static void fr_fixinisn(fin
, is
)
3086 if ((is
->is_flags
& IS_ISNSYN
) != 0) {
3088 ack
= ntohl(tcp
->th_ack
);
3089 ack
-= is
->is_isninc
[0];
3090 tcp
->th_ack
= htonl(ack
);
3091 fix_incksum(fin
, &tcp
->th_sum
, is
->is_sumd
[0]);
3094 if ((is
->is_flags
& IS_ISNACK
) != 0) {
3096 ack
= ntohl(tcp
->th_ack
);
3097 ack
-= is
->is_isninc
[1];
3098 tcp
->th_ack
= htonl(ack
);
3099 fix_incksum(fin
, &tcp
->th_sum
, is
->is_sumd
[1]);
3105 /* ------------------------------------------------------------------------ */
3106 /* Function: fr_statesync */
3108 /* Parameters: ifp(I) - pointer to interface */
3110 /* Walk through all state entries and if an interface pointer match is */
3111 /* found then look it up again, based on its name in case the pointer has */
3112 /* changed since last time. */
3114 /* If ifp is passed in as being non-null then we are only doing updates for */
3115 /* existing, matching, uses of it. */
3116 /* ------------------------------------------------------------------------ */
3117 void fr_statesync(ifp
)
3123 if (fr_running
<= 0)
3126 WRITE_ENTER(&ipf_state
);
3128 if (fr_running
<= 0) {
3129 RWLOCK_EXIT(&ipf_state
);
3133 for (is
= ips_list
; is
; is
= is
->is_next
) {
3135 * Look up all the interface names in the state entry.
3137 for (i
= 0; i
< 4; i
++) {
3138 if (ifp
== NULL
|| ifp
== is
->is_ifp
[i
])
3139 is
->is_ifp
[i
] = fr_resolvenic(is
->is_ifname
[i
],
3143 RWLOCK_EXIT(&ipf_state
);
3147 /* ------------------------------------------------------------------------ */
3148 /* Function: fr_delstate */
3149 /* Returns: int - 0 = entry deleted, else reference count on struct */
3150 /* Parameters: is(I) - pointer to state structure to delete */
3151 /* why(I) - if not 0, log reason why it was deleted */
3152 /* Write Locks: ipf_state */
3154 /* Deletes a state entry from the enumerated list as well as the hash table */
3155 /* and timeout queue lists. Make adjustments to hash table statistics and */
3156 /* global counters as required. */
3157 /* ------------------------------------------------------------------------ */
3158 static int fr_delstate(is
, why
)
3164 * Since we want to delete this, remove it from the state table,
3165 * where it can be found & used, first.
3167 if (is
->is_phnext
!= NULL
) {
3168 *is
->is_phnext
= is
->is_hnext
;
3169 if (is
->is_hnext
!= NULL
)
3170 is
->is_hnext
->is_phnext
= is
->is_phnext
;
3171 if (ips_table
[is
->is_hv
] == NULL
)
3172 ips_stats
.iss_inuse
--;
3173 ips_stats
.iss_bucketlen
[is
->is_hv
]--;
3175 is
->is_phnext
= NULL
;
3176 is
->is_hnext
= NULL
;
3180 * Because ips_stats.iss_wild is a count of entries in the state
3181 * table that have wildcard flags set, only decerement it once
3184 if (is
->is_flags
& (SI_WILDP
|SI_WILDA
)) {
3185 if (!(is
->is_flags
& SI_CLONED
)) {
3186 ATOMIC_DECL(ips_stats
.iss_wild
);
3188 is
->is_flags
&= ~(SI_WILDP
|SI_WILDA
);
3192 * Next, remove it from the timeout queue it is in.
3194 if (is
->is_sti
.tqe_ifq
!= NULL
)
3195 fr_deletequeueentry(&is
->is_sti
);
3197 if (is
->is_me
!= NULL
) {
3203 * If it is still in use by something else, do not go any further,
3204 * but note that at this point it is now an orphan. How can this
3205 * be? fr_state_flush() calls fr_delete() directly because it wants
3206 * to empty the table out and if something has a hold on a state
3207 * entry (such as ipfstat), it'll do the deref path that'll bring
3208 * us back here to do the real delete & free.
3210 MUTEX_ENTER(&is
->is_lock
);
3211 if (is
->is_ref
> 1) {
3213 MUTEX_EXIT(&is
->is_lock
);
3216 MUTEX_EXIT(&is
->is_lock
);
3220 if (is
->is_tqehead
[0] != NULL
) {
3221 (void) fr_deletetimeoutqueue(is
->is_tqehead
[0]);
3223 if (is
->is_tqehead
[1] != NULL
) {
3224 (void) fr_deletetimeoutqueue(is
->is_tqehead
[1]);
3227 #ifdef IPFILTER_SYNC
3229 ipfsync_del(is
->is_sync
);
3231 #ifdef IPFILTER_SCAN
3232 (void) ipsc_detachis(is
);
3236 * Now remove it from the linked list of known states
3238 if (is
->is_pnext
!= NULL
) {
3239 *is
->is_pnext
= is
->is_next
;
3241 if (is
->is_next
!= NULL
)
3242 is
->is_next
->is_pnext
= is
->is_pnext
;
3244 is
->is_pnext
= NULL
;
3248 if (ipstate_logging
!= 0 && why
!= 0)
3249 ipstate_log(is
, why
);
3251 if (is
->is_p
== IPPROTO_TCP
)
3252 ips_stats
.iss_fin
++;
3254 ips_stats
.iss_expire
++;
3256 if (is
->is_rule
!= NULL
) {
3257 is
->is_rule
->fr_statecnt
--;
3258 (void) fr_derefrule(&is
->is_rule
);
3261 #if defined(NEED_LOCAL_RAND) && defined(_KERNEL)
3262 ipf_rand_push(is
, sizeof(*is
));
3265 MUTEX_DESTROY(&is
->is_lock
);
3273 /* ------------------------------------------------------------------------ */
3274 /* Function: fr_timeoutstate */
3276 /* Parameters: Nil */
3278 /* Slowly expire held state for thingslike UDP and ICMP. The algorithm */
3279 /* used here is to keep the queue sorted with the oldest things at the top */
3280 /* and the youngest at the bottom. So if the top one doesn't need to be */
3281 /* expired then neither will any under it. */
3282 /* ------------------------------------------------------------------------ */
3283 void fr_timeoutstate()
3285 ipftq_t
*ifq
, *ifqnext
;
3286 ipftqent_t
*tqe
, *tqn
;
3291 WRITE_ENTER(&ipf_state
);
3292 for (ifq
= ips_tqtqb
; ifq
!= NULL
; ifq
= ifq
->ifq_next
)
3293 for (tqn
= ifq
->ifq_head
; ((tqe
= tqn
) != NULL
); ) {
3294 if (tqe
->tqe_die
> fr_ticks
)
3296 tqn
= tqe
->tqe_next
;
3297 is
= tqe
->tqe_parent
;
3298 fr_delstate(is
, ISL_EXPIRE
);
3301 for (ifq
= ips_utqe
; ifq
!= NULL
; ifq
= ifq
->ifq_next
) {
3302 for (tqn
= ifq
->ifq_head
; ((tqe
= tqn
) != NULL
); ) {
3303 if (tqe
->tqe_die
> fr_ticks
)
3305 tqn
= tqe
->tqe_next
;
3306 is
= tqe
->tqe_parent
;
3307 fr_delstate(is
, ISL_EXPIRE
);
3311 for (ifq
= ips_utqe
; ifq
!= NULL
; ifq
= ifqnext
) {
3312 ifqnext
= ifq
->ifq_next
;
3314 if (((ifq
->ifq_flags
& IFQF_DELETE
) != 0) &&
3315 (ifq
->ifq_ref
== 0)) {
3316 fr_freetimeoutqueue(ifq
);
3320 if (fr_state_doflush
) {
3321 (void) fr_state_flush(2, 0);
3322 fr_state_doflush
= 0;
3325 RWLOCK_EXIT(&ipf_state
);
3330 /* ------------------------------------------------------------------------ */
3331 /* Function: fr_state_flush */
3332 /* Returns: int - 0 == success, -1 == failure */
3333 /* Parameters: Nil */
3334 /* Write Locks: ipf_state */
3336 /* Flush state tables. Three actions currently defined: */
3337 /* which == 0 : flush all state table entries */
3338 /* which == 1 : flush TCP connections which have started to close but are */
3339 /* stuck for some reason. */
3340 /* which == 2 : flush TCP connections which have been idle for a long time, */
3341 /* starting at > 4 days idle and working back in successive half-*/
3342 /* days to at most 12 hours old. If this fails to free enough */
3343 /* slots then work backwards in half hour slots to 30 minutes. */
3344 /* If that too fails, then work backwards in 30 second intervals */
3345 /* for the last 30 minutes to at worst 30 seconds idle. */
3346 /* ------------------------------------------------------------------------ */
3347 int fr_state_flush(which
, proto
)
3350 ipftq_t
*ifq
, *ifqnext
;
3351 ipftqent_t
*tqe
, *tqn
;
3352 ipstate_t
*is
, **isp
;
3364 * Style 0 flush removes everything...
3366 for (isp
= &ips_list
; ((is
= *isp
) != NULL
); ) {
3367 if ((proto
!= 0) && (is
->is_v
!= proto
)) {
3371 if (fr_delstate(is
, ISL_FLUSH
) == 0)
3380 * Since we're only interested in things that are closing,
3381 * we can start with the appropriate timeout queue.
3383 for (ifq
= ips_tqtqb
+ IPF_TCPS_CLOSE_WAIT
; ifq
!= NULL
;
3384 ifq
= ifq
->ifq_next
) {
3386 for (tqn
= ifq
->ifq_head
; ((tqe
= tqn
) != NULL
); ) {
3387 tqn
= tqe
->tqe_next
;
3388 is
= tqe
->tqe_parent
;
3389 if (is
->is_p
!= IPPROTO_TCP
)
3391 if (fr_delstate(is
, ISL_EXPIRE
) == 0)
3397 * Also need to look through the user defined queues.
3399 for (ifq
= ips_utqe
; ifq
!= NULL
; ifq
= ifqnext
) {
3400 ifqnext
= ifq
->ifq_next
;
3401 for (tqn
= ifq
->ifq_head
; ((tqe
= tqn
) != NULL
); ) {
3402 tqn
= tqe
->tqe_next
;
3403 is
= tqe
->tqe_parent
;
3404 if (is
->is_p
!= IPPROTO_TCP
)
3407 if ((is
->is_state
[0] > IPF_TCPS_ESTABLISHED
) &&
3408 (is
->is_state
[1] > IPF_TCPS_ESTABLISHED
)) {
3409 if (fr_delstate(is
, ISL_EXPIRE
) == 0)
3420 * Args 5-11 correspond to flushing those particular states
3421 * for TCP connections.
3423 case IPF_TCPS_CLOSE_WAIT
:
3424 case IPF_TCPS_FIN_WAIT_1
:
3425 case IPF_TCPS_CLOSING
:
3426 case IPF_TCPS_LAST_ACK
:
3427 case IPF_TCPS_FIN_WAIT_2
:
3428 case IPF_TCPS_TIME_WAIT
:
3429 case IPF_TCPS_CLOSED
:
3430 tqn
= ips_tqtqb
[which
].ifq_head
;
3431 while (tqn
!= NULL
) {
3433 tqn
= tqe
->tqe_next
;
3434 is
= tqe
->tqe_parent
;
3435 if (fr_delstate(is
, ISL_FLUSH
) == 0)
3445 * Take a large arbitrary number to mean the number of seconds
3446 * for which which consider to be the maximum value we'll allow
3447 * the expiration to be.
3449 which
= IPF_TTLVAL(which
);
3450 for (isp
= &ips_list
; ((is
= *isp
) != NULL
); ) {
3451 if ((proto
== 0) || (is
->is_v
== proto
)) {
3452 if (fr_ticks
- is
->is_touched
> which
) {
3453 if (fr_delstate(is
, ISL_FLUSH
) == 0) {
3470 * Asked to remove inactive entries because the table is full.
3472 if (fr_ticks
- ips_last_force_flush
> IPF_TTLVAL(5)) {
3473 ips_last_force_flush
= fr_ticks
;
3474 removed
= ipf_queueflush(fr_state_flush_entry
, ips_tqtqb
,
3483 /* ------------------------------------------------------------------------ */
3484 /* Function: fr_state_flush_entry */
3485 /* Returns: int - 0 = entry deleted, else not deleted */
3486 /* Parameters: entry(I) - pointer to state structure to delete */
3487 /* Write Locks: ipf_state */
3489 /* This function is a stepping stone between ipf_queueflush() and */
3490 /* fr_delstate(). It is used so we can provide a uniform interface via the */
3491 /* ipf_queueflush() function. */
3492 /* ------------------------------------------------------------------------ */
3493 static int fr_state_flush_entry(entry
)
3496 return fr_delstate(entry
, ISL_FLUSH
);
3500 /* ------------------------------------------------------------------------ */
3501 /* Function: fr_tcp_age */
3502 /* Returns: int - 1 == state transition made, 0 == no change (rejected) */
3503 /* Parameters: tq(I) - pointer to timeout queue information */
3504 /* fin(I) - pointer to packet information */
3505 /* tqtab(I) - TCP timeout queue table this is in */
3506 /* flags(I) - flags from state/NAT entry */
3508 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */
3510 /* - (try to) base state transitions on real evidence only, */
3511 /* i.e. packets that are sent and have been received by ipfilter; */
3512 /* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */
3514 /* - deal with half-closed connections correctly; */
3516 /* - store the state of the source in state[0] such that ipfstat */
3517 /* displays the state as source/dest instead of dest/source; the calls */
3518 /* to fr_tcp_age have been changed accordingly. */
3520 /* Internal Parameters: */
3522 /* state[0] = state of source (host that initiated connection) */
3523 /* state[1] = state of dest (host that accepted the connection) */
3525 /* dir == 0 : a packet from source to dest */
3526 /* dir == 1 : a packet from dest to source */
3528 /* A typical procession for a connection is as follows: */
3530 /* +--------------+-------------------+ */
3531 /* | Side '0' | Side '1' | */
3532 /* +--------------+-------------------+ */
3533 /* | 0 -> 1 (SYN) | | */
3534 /* | | 0 -> 2 (SYN-ACK) | */
3535 /* | 1 -> 3 (ACK) | | */
3536 /* | | 2 -> 4 (ACK-PUSH) | */
3537 /* | 3 -> 4 (ACK) | | */
3539 /* | | 4 -> 6 (FIN-ACK) | */
3540 /* | 4 -> 5 (ACK) | | */
3541 /* | | 6 -> 6 (ACK-PUSH) | */
3542 /* | 5 -> 5 (ACK) | | */
3543 /* | 5 -> 8 (FIN) | | */
3544 /* | | 6 -> 10 (ACK) | */
3545 /* +--------------+-------------------+ */
3547 /* Locking: it is assumed that the parent of the tqe structure is locked. */
3548 /* ------------------------------------------------------------------------ */
3549 int fr_tcp_age(tqe
, fin
, tqtab
, flags
)
3555 int dlen
, ostate
, nstate
, rval
, dir
;
3563 tcpflags
= tcp
->th_flags
;
3564 dlen
= fin
->fin_dlen
- (TCP_OFF(tcp
) << 2);
3565 ostate
= tqe
->tqe_state
[1 - dir
];
3566 nstate
= tqe
->tqe_state
[dir
];
3568 if (tcpflags
& TH_RST
) {
3569 if (!(tcpflags
& TH_PUSH
) && !dlen
)
3570 nstate
= IPF_TCPS_CLOSED
;
3572 nstate
= IPF_TCPS_CLOSE_WAIT
;
3574 if (ostate
<= IPF_TCPS_ESTABLISHED
) {
3575 tqe
->tqe_state
[1 - dir
] = IPF_TCPS_CLOSE_WAIT
;
3581 case IPF_TCPS_LISTEN
: /* 0 */
3582 if ((tcpflags
& TH_OPENING
) == TH_OPENING
) {
3584 * 'dir' received an S and sends SA in
3585 * response, LISTEN -> SYN_RECEIVED
3587 nstate
= IPF_TCPS_SYN_RECEIVED
;
3589 } else if ((tcpflags
& TH_OPENING
) == TH_SYN
) {
3590 /* 'dir' sent S, LISTEN -> SYN_SENT */
3591 nstate
= IPF_TCPS_SYN_SENT
;
3595 * the next piece of code makes it possible to get
3596 * already established connections into the state table
3597 * after a restart or reload of the filter rules; this
3598 * does not work when a strict 'flags S keep state' is
3599 * used for tcp connections of course
3601 if (((flags
& IS_TCPFSM
) == 0) &&
3602 ((tcpflags
& TH_ACKMASK
) == TH_ACK
)) {
3604 * we saw an A, guess 'dir' is in ESTABLISHED
3609 case IPF_TCPS_LISTEN
:
3610 case IPF_TCPS_SYN_RECEIVED
:
3611 nstate
= IPF_TCPS_HALF_ESTAB
;
3614 case IPF_TCPS_HALF_ESTAB
:
3615 case IPF_TCPS_ESTABLISHED
:
3616 nstate
= IPF_TCPS_ESTABLISHED
;
3624 * TODO: besides regular ACK packets we can have other
3625 * packets as well; it is yet to be determined how we
3626 * should initialize the states in those cases
3630 case IPF_TCPS_SYN_SENT
: /* 1 */
3631 if ((tcpflags
& ~(TH_ECN
|TH_CWR
)) == TH_SYN
) {
3633 * A retransmitted SYN packet. We do not reset
3634 * the timeout here to fr_tcptimeout because a
3635 * connection connect timeout does not renew
3636 * after every packet that is sent. We need to
3637 * set rval so as to indicate the packet has
3638 * passed the check for its flags being valid
3639 * in the TCP FSM. Setting rval to 2 has the
3640 * result of not resetting the timeout.
3643 } else if ((tcpflags
& (TH_SYN
|TH_FIN
|TH_ACK
)) ==
3646 * we see an A from 'dir' which is in SYN_SENT
3647 * state: 'dir' sent an A in response to an SA
3648 * which it received, SYN_SENT -> ESTABLISHED
3650 nstate
= IPF_TCPS_ESTABLISHED
;
3652 } else if (tcpflags
& TH_FIN
) {
3654 * we see an F from 'dir' which is in SYN_SENT
3655 * state and wants to close its side of the
3656 * connection; SYN_SENT -> FIN_WAIT_1
3658 nstate
= IPF_TCPS_FIN_WAIT_1
;
3660 } else if ((tcpflags
& TH_OPENING
) == TH_OPENING
) {
3662 * we see an SA from 'dir' which is already in
3663 * SYN_SENT state, this means we have a
3664 * simultaneous open; SYN_SENT -> SYN_RECEIVED
3666 nstate
= IPF_TCPS_SYN_RECEIVED
;
3671 case IPF_TCPS_SYN_RECEIVED
: /* 2 */
3672 if ((tcpflags
& (TH_SYN
|TH_FIN
|TH_ACK
)) == TH_ACK
) {
3674 * we see an A from 'dir' which was in
3675 * SYN_RECEIVED state so it must now be in
3676 * established state, SYN_RECEIVED ->
3679 nstate
= IPF_TCPS_ESTABLISHED
;
3681 } else if ((tcpflags
& ~(TH_ECN
|TH_CWR
)) ==
3684 * We see an SA from 'dir' which is already in
3685 * SYN_RECEIVED state.
3688 } else if (tcpflags
& TH_FIN
) {
3690 * we see an F from 'dir' which is in
3691 * SYN_RECEIVED state and wants to close its
3692 * side of the connection; SYN_RECEIVED ->
3695 nstate
= IPF_TCPS_FIN_WAIT_1
;
3700 case IPF_TCPS_HALF_ESTAB
: /* 3 */
3701 if (tcpflags
& TH_FIN
) {
3702 nstate
= IPF_TCPS_FIN_WAIT_1
;
3704 } else if ((tcpflags
& TH_ACKMASK
) == TH_ACK
) {
3706 * If we've picked up a connection in mid
3707 * flight, we could be looking at a follow on
3708 * packet from the same direction as the one
3709 * that created this state. Recognise it but
3710 * do not advance the entire connection's
3715 case IPF_TCPS_LISTEN
:
3716 case IPF_TCPS_SYN_SENT
:
3717 case IPF_TCPS_SYN_RECEIVED
:
3720 case IPF_TCPS_HALF_ESTAB
:
3721 case IPF_TCPS_ESTABLISHED
:
3722 nstate
= IPF_TCPS_ESTABLISHED
;
3731 case IPF_TCPS_ESTABLISHED
: /* 4 */
3733 if (tcpflags
& TH_FIN
) {
3735 * 'dir' closed its side of the connection;
3736 * this gives us a half-closed connection;
3737 * ESTABLISHED -> FIN_WAIT_1
3739 if (ostate
== IPF_TCPS_FIN_WAIT_1
) {
3740 nstate
= IPF_TCPS_CLOSING
;
3742 nstate
= IPF_TCPS_FIN_WAIT_1
;
3744 } else if (tcpflags
& TH_ACK
) {
3746 * an ACK, should we exclude other flags here?
3748 if (ostate
== IPF_TCPS_FIN_WAIT_1
) {
3750 * We know the other side did an active
3751 * close, so we are ACKing the recvd
3752 * FIN packet (does the window matching
3753 * code guarantee this?) and go into
3754 * CLOSE_WAIT state; this gives us a
3755 * half-closed connection
3757 nstate
= IPF_TCPS_CLOSE_WAIT
;
3758 } else if (ostate
< IPF_TCPS_CLOSE_WAIT
) {
3760 * still a fully established
3761 * connection reset timeout
3763 nstate
= IPF_TCPS_ESTABLISHED
;
3768 case IPF_TCPS_CLOSE_WAIT
: /* 5 */
3770 if (tcpflags
& TH_FIN
) {
3772 * application closed and 'dir' sent a FIN,
3773 * we're now going into LAST_ACK state
3775 nstate
= IPF_TCPS_LAST_ACK
;
3778 * we remain in CLOSE_WAIT because the other
3779 * side has closed already and we did not
3780 * close our side yet; reset timeout
3782 nstate
= IPF_TCPS_CLOSE_WAIT
;
3786 case IPF_TCPS_FIN_WAIT_1
: /* 6 */
3788 if ((tcpflags
& TH_ACK
) &&
3789 ostate
> IPF_TCPS_CLOSE_WAIT
) {
3791 * if the other side is not active anymore
3792 * it has sent us a FIN packet that we are
3793 * ack'ing now with an ACK; this means both
3794 * sides have now closed the connection and
3795 * we go into TIME_WAIT
3798 * XXX: how do we know we really are ACKing
3799 * the FIN packet here? does the window code
3802 nstate
= IPF_TCPS_TIME_WAIT
;
3805 * we closed our side of the connection
3806 * already but the other side is still active
3807 * (ESTABLISHED/CLOSE_WAIT); continue with
3808 * this half-closed connection
3810 nstate
= IPF_TCPS_FIN_WAIT_1
;
3814 case IPF_TCPS_CLOSING
: /* 7 */
3815 if ((tcpflags
& (TH_FIN
|TH_ACK
)) == TH_ACK
) {
3816 nstate
= IPF_TCPS_TIME_WAIT
;
3821 case IPF_TCPS_LAST_ACK
: /* 8 */
3822 if (tcpflags
& TH_ACK
) {
3823 if ((tcpflags
& TH_PUSH
) || dlen
)
3825 * there is still data to be delivered,
3833 * we cannot detect when we go out of LAST_ACK state to
3834 * CLOSED because that is based on the reception of ACK
3835 * packets; ipfilter can only detect that a packet
3836 * has been sent by a host
3840 case IPF_TCPS_FIN_WAIT_2
: /* 9 */
3844 case IPF_TCPS_TIME_WAIT
: /* 10 */
3845 /* we're in 2MSL timeout now */
3846 if (ostate
== IPF_TCPS_LAST_ACK
) {
3847 nstate
= IPF_TCPS_CLOSED
;
3852 case IPF_TCPS_CLOSED
: /* 11 */
3857 #if defined(_KERNEL)
3860 "tcp %lx flags %x si %lx nstate %d ostate %d\n",
3861 (u_long
)tcp
, tcpflags
, (u_long
)tqe
,
3864 printf("tcp %lx flags %x si %lx nstate %d ostate %d\n",
3865 (u_long
)tcp
, tcpflags
, (u_long
)tqe
,
3876 * If rval == 2 then do not update the queue position, but treat the
3877 * packet as being ok.
3881 else if (rval
== 1) {
3882 tqe
->tqe_state
[dir
] = nstate
;
3883 if ((tqe
->tqe_flags
& TQE_RULEBASED
) == 0)
3884 fr_movequeue(tqe
, tqe
->tqe_ifq
, tqtab
+ nstate
);
3891 /* ------------------------------------------------------------------------ */
3892 /* Function: ipstate_log */
3894 /* Parameters: is(I) - pointer to state structure */
3895 /* type(I) - type of log entry to create */
3897 /* Creates a state table log entry using the state structure and type info. */
3898 /* passed in. Log packet/byte counts, source/destination address and other */
3899 /* protocol specific information. */
3900 /* ------------------------------------------------------------------------ */
3901 void ipstate_log(is
, type
)
3912 * Copy information out of the ipstate_t structure and into the
3913 * structure used for logging.
3915 ipsl
.isl_type
= type
;
3916 ipsl
.isl_pkts
[0] = is
->is_pkts
[0] + is
->is_icmppkts
[0];
3917 ipsl
.isl_bytes
[0] = is
->is_bytes
[0];
3918 ipsl
.isl_pkts
[1] = is
->is_pkts
[1] + is
->is_icmppkts
[1];
3919 ipsl
.isl_bytes
[1] = is
->is_bytes
[1];
3920 ipsl
.isl_pkts
[2] = is
->is_pkts
[2] + is
->is_icmppkts
[2];
3921 ipsl
.isl_bytes
[2] = is
->is_bytes
[2];
3922 ipsl
.isl_pkts
[3] = is
->is_pkts
[3] + is
->is_icmppkts
[3];
3923 ipsl
.isl_bytes
[3] = is
->is_bytes
[3];
3924 ipsl
.isl_src
= is
->is_src
;
3925 ipsl
.isl_dst
= is
->is_dst
;
3926 ipsl
.isl_p
= is
->is_p
;
3927 ipsl
.isl_v
= is
->is_v
;
3928 ipsl
.isl_flags
= is
->is_flags
;
3929 ipsl
.isl_tag
= is
->is_tag
;
3930 ipsl
.isl_rulen
= is
->is_rulen
;
3931 (void) strncpy(ipsl
.isl_group
, is
->is_group
, FR_GROUPLEN
);
3933 if (ipsl
.isl_p
== IPPROTO_TCP
|| ipsl
.isl_p
== IPPROTO_UDP
) {
3934 ipsl
.isl_sport
= is
->is_sport
;
3935 ipsl
.isl_dport
= is
->is_dport
;
3936 if (ipsl
.isl_p
== IPPROTO_TCP
) {
3937 ipsl
.isl_state
[0] = is
->is_state
[0];
3938 ipsl
.isl_state
[1] = is
->is_state
[1];
3940 } else if (ipsl
.isl_p
== IPPROTO_ICMP
) {
3941 ipsl
.isl_itype
= is
->is_icmp
.ici_type
;
3942 } else if (ipsl
.isl_p
== IPPROTO_ICMPV6
) {
3943 ipsl
.isl_itype
= is
->is_icmp
.ici_type
;
3945 ipsl
.isl_ps
.isl_filler
[0] = 0;
3946 ipsl
.isl_ps
.isl_filler
[1] = 0;
3950 sizes
[0] = sizeof(ipsl
);
3953 if (ipllog(IPL_LOGSTATE
, NULL
, items
, sizes
, types
, 1)) {
3954 ATOMIC_INCL(ips_stats
.iss_logged
);
3956 ATOMIC_INCL(ips_stats
.iss_logfail
);
3963 /* ------------------------------------------------------------------------ */
3964 /* Function: fr_checkicmp6matchingstate */
3965 /* Returns: ipstate_t* - NULL == no match found, */
3966 /* else pointer to matching state entry */
3967 /* Parameters: fin(I) - pointer to packet information */
3968 /* Locks: NULL == no locks, else Read Lock on ipf_state */
3970 /* If we've got an ICMPv6 error message, using the information stored in */
3971 /* the ICMPv6 packet, look for a matching state table entry. */
3972 /* ------------------------------------------------------------------------ */
3973 static ipstate_t
*fr_checkicmp6matchingstate(fin
)
3976 struct icmp6_hdr
*ic6
, *oic
;
3977 int type
, backward
, i
;
3978 ipstate_t
*is
, **isp
;
3979 u_short sport
, dport
;
3990 * Does it at least have the return (basic) IP header ?
3991 * Is it an actual recognised ICMP error type?
3992 * Only a basic IP header (no options) should be with
3993 * an ICMP error header.
3995 if ((fin
->fin_v
!= 6) || (fin
->fin_plen
< ICMP6ERR_MINPKTLEN
) ||
3996 !(fin
->fin_flx
& FI_ICMPERR
))
4000 type
= ic6
->icmp6_type
;
4002 oip6
= (ip6_t
*)((char *)ic6
+ ICMPERR_ICMPHLEN
);
4003 if (fin
->fin_plen
< sizeof(*oip6
))
4006 bcopy((char *)fin
, (char *)&ofin
, sizeof(*fin
));
4008 ofin
.fin_ifp
= fin
->fin_ifp
;
4009 ofin
.fin_out
= !fin
->fin_out
;
4010 ofin
.fin_m
= NULL
; /* if dereferenced, panic XXX */
4011 ofin
.fin_mp
= NULL
; /* if dereferenced, panic XXX */
4014 * We make a fin entry to be able to feed it to
4015 * matchsrcdst. Note that not all fields are necessary
4016 * but this is the cleanest way. Note further we fill
4017 * in fin_mp such that if someone uses it we'll get
4018 * a kernel panic. fr_matchsrcdst does not use this.
4020 * watch out here, as ip is in host order and oip6 in network
4021 * order. Any change we make must be undone afterwards.
4023 savelen
= oip6
->ip6_plen
;
4024 oip6
->ip6_plen
= fin
->fin_dlen
- ICMPERR_ICMPHLEN
;
4025 ofin
.fin_flx
= FI_NOCKSUM
;
4026 ofin
.fin_ip
= (ip_t
*)oip6
;
4027 (void) fr_makefrip(sizeof(*oip6
), (ip_t
*)oip6
, &ofin
);
4028 ofin
.fin_flx
&= ~(FI_BAD
|FI_SHORT
);
4029 oip6
->ip6_plen
= savelen
;
4031 if (oip6
->ip6_nxt
== IPPROTO_ICMPV6
) {
4032 oic
= (struct icmp6_hdr
*)(oip6
+ 1);
4034 * an ICMP error can only be generated as a result of an
4035 * ICMP query, not as the response on an ICMP error
4037 * XXX theoretically ICMP_ECHOREP and the other reply's are
4038 * ICMP query's as well, but adding them here seems strange XXX
4040 if (!(oic
->icmp6_type
& ICMP6_INFOMSG_MASK
))
4044 * perform a lookup of the ICMP packet in the state table
4046 hv
= (pr
= oip6
->ip6_nxt
);
4047 src
.in6
= oip6
->ip6_src
;
4048 hv
+= src
.in4
.s_addr
;
4049 dst
.in6
= oip6
->ip6_dst
;
4050 hv
+= dst
.in4
.s_addr
;
4051 hv
+= oic
->icmp6_id
;
4052 hv
+= oic
->icmp6_seq
;
4053 hv
= DOUBLE_HASH(hv
);
4055 READ_ENTER(&ipf_state
);
4056 for (isp
= &ips_table
[hv
]; ((is
= *isp
) != NULL
); ) {
4058 isp
= &is
->is_hnext
;
4059 if ((is
->is_p
== pr
) &&
4060 !(is
->is_pass
& FR_NOICMPERR
) &&
4061 (oic
->icmp6_id
== ic
->ici_id
) &&
4062 (oic
->icmp6_seq
== ic
->ici_seq
) &&
4063 (is
= fr_matchsrcdst(&ofin
, is
, &src
,
4064 &dst
, NULL
, FI_ICMPCMP
))) {
4066 * in the state table ICMP query's are stored
4067 * with the type of the corresponding ICMP
4068 * response. Correct here
4070 if (((ic
->ici_type
== ICMP6_ECHO_REPLY
) &&
4071 (oic
->icmp6_type
== ICMP6_ECHO_REQUEST
)) ||
4072 (ic
->ici_type
- 1 == oic
->icmp6_type
)) {
4073 ips_stats
.iss_hits
++;
4074 backward
= IP6_NEQ(&is
->is_dst
, &src
);
4075 fin
->fin_rev
= !backward
;
4076 i
= (backward
<< 1) + fin
->fin_out
;
4077 is
->is_icmppkts
[i
]++;
4082 RWLOCK_EXIT(&ipf_state
);
4086 hv
= (pr
= oip6
->ip6_nxt
);
4087 src
.in6
= oip6
->ip6_src
;
4092 dst
.in6
= oip6
->ip6_dst
;
4098 if ((oip6
->ip6_nxt
== IPPROTO_TCP
) || (oip6
->ip6_nxt
== IPPROTO_UDP
)) {
4099 tcp
= (tcphdr_t
*)(oip6
+ 1);
4100 dport
= tcp
->th_dport
;
4101 sport
= tcp
->th_sport
;
4106 hv
= DOUBLE_HASH(hv
);
4108 READ_ENTER(&ipf_state
);
4109 for (isp
= &ips_table
[hv
]; ((is
= *isp
) != NULL
); ) {
4110 isp
= &is
->is_hnext
;
4112 * Only allow this icmp though if the
4113 * encapsulated packet was allowed through the
4114 * other way around. Note that the minimal amount
4115 * of info present does not allow for checking against
4116 * tcp internals such as seq and ack numbers.
4118 if ((is
->is_p
!= pr
) || (is
->is_v
!= 6) ||
4119 (is
->is_pass
& FR_NOICMPERR
))
4121 is
= fr_matchsrcdst(&ofin
, is
, &src
, &dst
, tcp
, FI_ICMPCMP
);
4123 ips_stats
.iss_hits
++;
4124 backward
= IP6_NEQ(&is
->is_dst
, &src
);
4125 fin
->fin_rev
= !backward
;
4126 i
= (backward
<< 1) + fin
->fin_out
;
4127 is
->is_icmppkts
[i
]++;
4129 * we deliberately do not touch the timeouts
4130 * for the accompanying state table entry.
4131 * It remains to be seen if that is correct. XXX
4136 RWLOCK_EXIT(&ipf_state
);
4142 /* ------------------------------------------------------------------------ */
4143 /* Function: fr_sttab_init */
4145 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */
4147 /* Initialise the array of timeout queues for TCP. */
4148 /* ------------------------------------------------------------------------ */
4149 void fr_sttab_init(tqp
)
4154 for (i
= IPF_TCP_NSTATES
- 1; i
>= 0; i
--) {
4157 tqp
[i
].ifq_head
= NULL
;
4158 tqp
[i
].ifq_tail
= &tqp
[i
].ifq_head
;
4159 tqp
[i
].ifq_next
= tqp
+ i
+ 1;
4160 MUTEX_INIT(&tqp
[i
].ifq_lock
, "ipftq tcp tab");
4162 tqp
[IPF_TCP_NSTATES
- 1].ifq_next
= NULL
;
4163 tqp
[IPF_TCPS_CLOSED
].ifq_ttl
= fr_tcpclosed
;
4164 tqp
[IPF_TCPS_LISTEN
].ifq_ttl
= fr_tcptimeout
;
4165 tqp
[IPF_TCPS_SYN_SENT
].ifq_ttl
= fr_tcptimeout
;
4166 tqp
[IPF_TCPS_SYN_RECEIVED
].ifq_ttl
= fr_tcptimeout
;
4167 tqp
[IPF_TCPS_ESTABLISHED
].ifq_ttl
= fr_tcpidletimeout
;
4168 tqp
[IPF_TCPS_CLOSE_WAIT
].ifq_ttl
= fr_tcphalfclosed
;
4169 tqp
[IPF_TCPS_FIN_WAIT_1
].ifq_ttl
= fr_tcphalfclosed
;
4170 tqp
[IPF_TCPS_CLOSING
].ifq_ttl
= fr_tcptimeout
;
4171 tqp
[IPF_TCPS_LAST_ACK
].ifq_ttl
= fr_tcplastack
;
4172 tqp
[IPF_TCPS_FIN_WAIT_2
].ifq_ttl
= fr_tcpclosewait
;
4173 tqp
[IPF_TCPS_TIME_WAIT
].ifq_ttl
= fr_tcptimewait
;
4174 tqp
[IPF_TCPS_HALF_ESTAB
].ifq_ttl
= fr_tcptimeout
;
4178 /* ------------------------------------------------------------------------ */
4179 /* Function: fr_sttab_destroy */
4181 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */
4183 /* Do whatever is necessary to "destroy" each of the entries in the array */
4184 /* of timeout queues for TCP. */
4185 /* ------------------------------------------------------------------------ */
4186 void fr_sttab_destroy(tqp
)
4191 for (i
= IPF_TCP_NSTATES
- 1; i
>= 0; i
--)
4192 MUTEX_DESTROY(&tqp
[i
].ifq_lock
);
4196 /* ------------------------------------------------------------------------ */
4197 /* Function: fr_statederef */
4199 /* Parameters: isp(I) - pointer to pointer to state table entry */
4201 /* Decrement the reference counter for this state table entry and free it */
4202 /* if there are no more things using it. */
4204 /* This function is only called when cleaning up after increasing is_ref by */
4205 /* one earlier in the 'code path' so if is_ref is 1 when entering, we do */
4206 /* have an orphan, otherwise not. However there is a possible race between */
4207 /* the entry being deleted via flushing with an ioctl call (that calls the */
4208 /* delete function directly) and the tail end of packet processing so we */
4209 /* need to grab is_lock before doing the check to synchronise the two code */
4212 /* When operating in userland (ipftest), we have no timers to clear a state */
4213 /* entry. Therefore, we make a few simple tests before deleting an entry */
4214 /* outright. We compare states on each side looking for a combination of */
4215 /* TIME_WAIT (should really be FIN_WAIT_2?) and LAST_ACK. Then we factor */
4216 /* in packet direction with the interface list to make sure we don't */
4217 /* prematurely delete an entry on a final inbound packet that's we're also */
4218 /* supposed to route elsewhere. */
4220 /* Internal parameters: */
4221 /* state[0] = state of source (host that initiated connection) */
4222 /* state[1] = state of dest (host that accepted the connection) */
4224 /* dir == 0 : a packet from source to dest */
4225 /* dir == 1 : a packet from dest to source */
4226 /* ------------------------------------------------------------------------ */
4227 void fr_statederef(isp
)
4235 MUTEX_ENTER(&is
->is_lock
);
4236 if (is
->is_ref
> 1) {
4238 MUTEX_EXIT(&is
->is_lock
);
4241 MUTEX_EXIT(&is
->is_lock
);
4243 WRITE_ENTER(&ipf_state
);
4244 fr_delstate(is
, ISL_EXPIRE
);
4245 RWLOCK_EXIT(&ipf_state
);
4249 /* ------------------------------------------------------------------------ */
4250 /* Function: fr_setstatequeue */
4252 /* Parameters: is(I) - pointer to state structure */
4253 /* rev(I) - forward(0) or reverse(1) direction */
4254 /* Locks: ipf_state (read or write) */
4256 /* Put the state entry on its default queue entry, using rev as a helped in */
4257 /* determining which queue it should be placed on. */
4258 /* ------------------------------------------------------------------------ */
4259 void fr_setstatequeue(is
, rev
)
4263 ipftq_t
*oifq
, *nifq
;
4266 if ((is
->is_sti
.tqe_flags
& TQE_RULEBASED
) != 0)
4267 nifq
= is
->is_tqehead
[rev
];
4275 case IPPROTO_ICMPV6
:
4277 nifq
= &ips_icmpacktq
;
4284 nifq
= &ips_icmpacktq
;
4289 nifq
= ips_tqtqb
+ is
->is_state
[rev
];
4294 nifq
= &ips_udpacktq
;
4305 oifq
= is
->is_sti
.tqe_ifq
;
4307 * If it's currently on a timeout queue, move it from one queue to
4308 * another, else put it on the end of the newly determined queue.
4311 fr_movequeue(&is
->is_sti
, oifq
, nifq
);
4313 fr_queueappend(&is
->is_sti
, nifq
, is
);
4318 /* ------------------------------------------------------------------------ */
4319 /* Function: fr_stateiter */
4320 /* Returns: int - 0 == success, else error */
4321 /* Parameters: token(I) - pointer to ipftoken structure */
4322 /* itp(I) - pointer to ipfgeniter structure */
4324 /* This function handles the SIOCGENITER ioctl for the state tables and */
4325 /* walks through the list of entries in the state table list (ips_list.) */
4326 /* ------------------------------------------------------------------------ */
4327 static int fr_stateiter(token
, itp
)
4331 ipstate_t
*is
, *next
, zero
;
4335 if (itp
->igi_data
== NULL
)
4338 if (itp
->igi_nitems
< 1)
4341 if (itp
->igi_type
!= IPFGENITER_STATE
)
4346 READ_ENTER(&ipf_state
);
4349 * Get "previous" entry from the token, and find the next entry
4352 is
= token
->ipt_data
;
4359 dst
= itp
->igi_data
;
4360 for (count
= itp
->igi_nitems
; count
> 0; count
--) {
4362 * If we found an entry, add a reference and update the token.
4363 * Otherwise, zero out data to be returned and NULL out token.
4366 MUTEX_ENTER(&next
->is_lock
);
4368 MUTEX_EXIT(&next
->is_lock
);
4369 token
->ipt_data
= next
;
4371 bzero(&zero
, sizeof(zero
));
4373 token
->ipt_data
= NULL
;
4377 * Safe to release lock now the we have a reference.
4379 RWLOCK_EXIT(&ipf_state
);
4382 * Copy out data and clean up references and tokens.
4384 error
= COPYOUT(next
, dst
, sizeof(*next
));
4391 if (token
->ipt_data
!= NULL
) {
4394 if (next
->is_next
== NULL
) {
4395 token
->ipt_data
= NULL
;
4400 if ((count
== 1) || (error
!= 0))
4403 READ_ENTER(&ipf_state
);
4404 dst
+= sizeof(*next
);
4413 /* ------------------------------------------------------------------------ */
4414 /* Function: fr_stgettable */
4415 /* Returns: int - 0 = success, else error */
4416 /* Parameters: data(I) - pointer to ioctl data */
4418 /* This function handles ioctl requests for tables of state information. */
4419 /* At present the only table it deals with is the hash bucket statistics. */
4420 /* ------------------------------------------------------------------------ */
4421 static int fr_stgettable(data
)
4427 error
= fr_inobj(data
, &table
, IPFOBJ_GTABLE
);
4431 if (table
.ita_type
!= IPFTABLE_BUCKETS
)
4434 error
= COPYOUT(ips_stats
.iss_bucketlen
, table
.ita_table
,
4435 fr_statesize
* sizeof(u_long
));