4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <sys/sysmacros.h>
29 #include <sys/ksynch.h>
30 #include <sys/systm.h>
31 #include <sys/socket.h>
33 #include <sys/taskq.h>
34 #include <sys/cmn_err.h>
35 #include <sys/strsun.h>
37 #include <sys/atomic.h>
38 #include <netinet/in.h>
42 #include <inet/udp_impl.h>
43 #include <inet/kstatcom.h>
45 #include <inet/ilb_ip.h>
50 /* ILB kmem cache flag */
51 int ilb_kmem_flags
= 0;
54 * The default size for the different hash tables. Global for all stacks.
55 * But each stack has its own table, just that their sizes are the same.
57 static size_t ilb_rule_hash_size
= 2048;
59 static size_t ilb_conn_hash_size
= 262144;
61 static size_t ilb_sticky_hash_size
= 262144;
63 /* This should be a prime number. */
64 static size_t ilb_nat_src_hash_size
= 97;
66 /* Default NAT cache entry expiry time. */
67 static uint32_t ilb_conn_tcp_expiry
= 120;
68 static uint32_t ilb_conn_udp_expiry
= 60;
70 /* Default sticky entry expiry time. */
71 static uint32_t ilb_sticky_expiry
= 60;
73 /* addr is assumed to be a uint8_t * to an ipaddr_t. */
74 #define ILB_RULE_HASH(addr, hash_size) \
75 ((*((addr) + 3) * 29791 + *((addr) + 2) * 961 + *((addr) + 1) * 31 + \
76 *(addr)) & ((hash_size) - 1))
79 * Note on ILB delayed processing
81 * To avoid in line removal on some of the data structures, such as rules,
82 * servers and ilb_conn_hash entries, ILB delays such processing to a taskq.
83 * There are three types of ILB taskq:
85 * 1. rule handling: created at stack initialialization time, ilb_stack_init()
86 * 2. conn hash handling: created at conn hash initialization time,
87 * ilb_conn_hash_init()
88 * 3. sticky hash handling: created at sticky hash initialization time,
89 * ilb_sticky_hash_init()
91 * The rule taskq is for processing rule and server removal. When a user
92 * land rule/server removal request comes in, a taskq is dispatched after
93 * removing the rule/server from all related hashes. This taskq will wait
94 * until all references to the rule/server are gone before removing it.
95 * So the user land thread requesting the removal does not need to wait
96 * for the removal completion.
98 * The conn hash/sticky hash taskq is for processing ilb_conn_hash and
99 * ilb_sticky_hash table entry removal. There are ilb_conn_timer_size timers
100 * and ilb_sticky_timer_size timers running for ilb_conn_hash and
101 * ilb_sticky_hash cleanup respectively. Each timer is responsible for one
102 * portion (same size) of the hash table. When a timer fires, it dispatches
103 * a conn hash taskq to clean up its portion of the table. This avoids in
104 * line processing of the removal.
106 * There is another delayed processing, the clean up of NAT source address
107 * table. We just use the timer to directly handle it instead of using
108 * a taskq. The reason is that the table is small so it is OK to use the
112 /* ILB rule taskq constants. */
113 #define ILB_RULE_TASKQ_NUM_THR 20
115 /* Argument passed to ILB rule taskq routines. */
121 /* kstat handling routines. */
122 static kstat_t
*ilb_kstat_g_init(netstackid_t
, ilb_stack_t
*);
123 static void ilb_kstat_g_fini(netstackid_t
, ilb_stack_t
*);
124 static kstat_t
*ilb_rule_kstat_init(netstackid_t
, ilb_rule_t
*);
125 static kstat_t
*ilb_server_kstat_init(netstackid_t
, ilb_rule_t
*,
128 /* Rule hash handling routines. */
129 static void ilb_rule_hash_init(ilb_stack_t
*);
130 static void ilb_rule_hash_fini(ilb_stack_t
*);
131 static void ilb_rule_hash_add(ilb_stack_t
*, ilb_rule_t
*, const in6_addr_t
*);
132 static void ilb_rule_hash_del(ilb_rule_t
*);
133 static ilb_rule_t
*ilb_rule_hash(ilb_stack_t
*, int, int, in6_addr_t
*,
134 in_port_t
, zoneid_t
, uint32_t, boolean_t
*);
136 static void ilb_rule_g_add(ilb_stack_t
*, ilb_rule_t
*);
137 static void ilb_rule_g_del(ilb_stack_t
*, ilb_rule_t
*);
138 static void ilb_del_rule_common(ilb_stack_t
*, ilb_rule_t
*);
139 static ilb_rule_t
*ilb_find_rule_locked(ilb_stack_t
*, zoneid_t
, const char *,
141 static boolean_t
ilb_match_rule(ilb_stack_t
*, zoneid_t
, const char *, int,
142 int, in_port_t
, in_port_t
, const in6_addr_t
*);
144 /* Back end server handling routines. */
145 static void ilb_server_free(ilb_server_t
*);
147 /* Network stack handling routines. */
148 static void *ilb_stack_init(netstackid_t
, netstack_t
*);
149 static void ilb_stack_shutdown(netstackid_t
, void *);
150 static void ilb_stack_fini(netstackid_t
, void *);
152 /* Sticky connection handling routines. */
153 static void ilb_rule_sticky_init(ilb_rule_t
*);
154 static void ilb_rule_sticky_fini(ilb_rule_t
*);
156 /* Handy macro to check for unspecified address. */
157 #define IS_ADDR_UNSPEC(addr) \
158 (IN6_IS_ADDR_V4MAPPED(addr) ? IN6_IS_ADDR_V4MAPPED_ANY(addr) : \
159 IN6_IS_ADDR_UNSPECIFIED(addr))
162 * Global kstat instance counter. When a rule is created, its kstat instance
163 * number is assigned by ilb_kstat_instance and ilb_kstat_instance is
166 static uint_t ilb_kstat_instance
= 0;
169 * The ILB global kstat has name ILB_G_KS_NAME and class name ILB_G_KS_CNAME.
170 * A rule's kstat has ILB_RULE_KS_CNAME class name.
172 #define ILB_G_KS_NAME "global"
173 #define ILB_G_KS_CNAME "kstat"
174 #define ILB_RULE_KS_CNAME "rulestat"
177 ilb_kstat_g_init(netstackid_t stackid
, ilb_stack_t
*ilbs
)
180 ilb_g_kstat_t
template = {
181 { "num_rules", KSTAT_DATA_UINT64
, 0 },
182 { "ip_frag_in", KSTAT_DATA_UINT64
, 0 },
183 { "ip_frag_dropped", KSTAT_DATA_UINT64
, 0 }
186 ksp
= kstat_create_netstack(ILB_KSTAT_MOD_NAME
, 0, ILB_G_KS_NAME
,
187 ILB_G_KS_CNAME
, KSTAT_TYPE_NAMED
, NUM_OF_FIELDS(ilb_g_kstat_t
),
188 KSTAT_FLAG_VIRTUAL
, stackid
);
191 bcopy(&template, ilbs
->ilbs_kstat
, sizeof (template));
192 ksp
->ks_data
= ilbs
->ilbs_kstat
;
193 ksp
->ks_private
= (void *)(uintptr_t)stackid
;
200 ilb_kstat_g_fini(netstackid_t stackid
, ilb_stack_t
*ilbs
)
202 if (ilbs
->ilbs_ksp
!= NULL
) {
203 ASSERT(stackid
== (netstackid_t
)(uintptr_t)
204 ilbs
->ilbs_ksp
->ks_private
);
205 kstat_delete_netstack(ilbs
->ilbs_ksp
, stackid
);
206 ilbs
->ilbs_ksp
= NULL
;
211 ilb_rule_kstat_init(netstackid_t stackid
, ilb_rule_t
*rule
)
214 ilb_rule_kstat_t
template = {
215 { "num_servers", KSTAT_DATA_UINT64
, 0 },
216 { "bytes_not_processed", KSTAT_DATA_UINT64
, 0 },
217 { "pkt_not_processed", KSTAT_DATA_UINT64
, 0 },
218 { "bytes_dropped", KSTAT_DATA_UINT64
, 0 },
219 { "pkt_dropped", KSTAT_DATA_UINT64
, 0 },
220 { "nomem_bytes_dropped", KSTAT_DATA_UINT64
, 0 },
221 { "nomem_pkt_dropped", KSTAT_DATA_UINT64
, 0 },
222 { "noport_bytes_dropped", KSTAT_DATA_UINT64
, 0 },
223 { "noport_pkt_dropped", KSTAT_DATA_UINT64
, 0 },
224 { "icmp_echo_processed", KSTAT_DATA_UINT64
, 0 },
225 { "icmp_dropped", KSTAT_DATA_UINT64
, 0 },
226 { "icmp_too_big_processed", KSTAT_DATA_UINT64
, 0 },
227 { "icmp_too_big_dropped", KSTAT_DATA_UINT64
, 0 }
230 ksp
= kstat_create_netstack(ILB_KSTAT_MOD_NAME
, rule
->ir_ks_instance
,
231 rule
->ir_name
, ILB_RULE_KS_CNAME
, KSTAT_TYPE_NAMED
,
232 NUM_OF_FIELDS(ilb_rule_kstat_t
), KSTAT_FLAG_VIRTUAL
, stackid
);
236 bcopy(&template, &rule
->ir_kstat
, sizeof (template));
237 ksp
->ks_data
= &rule
->ir_kstat
;
238 ksp
->ks_private
= (void *)(uintptr_t)stackid
;
245 ilb_server_kstat_init(netstackid_t stackid
, ilb_rule_t
*rule
,
246 ilb_server_t
*server
)
249 ilb_server_kstat_t
template = {
250 { "bytes_processed", KSTAT_DATA_UINT64
, 0 },
251 { "pkt_processed", KSTAT_DATA_UINT64
, 0 },
252 { "ip_address", KSTAT_DATA_STRING
, 0 }
254 char cname_buf
[KSTAT_STRLEN
];
257 ASSERT(strlen(rule
->ir_name
) + 7 < KSTAT_STRLEN
);
258 (void) sprintf(cname_buf
, "%s-sstat", rule
->ir_name
);
259 ksp
= kstat_create_netstack(ILB_KSTAT_MOD_NAME
, rule
->ir_ks_instance
,
260 server
->iser_name
, cname_buf
, KSTAT_TYPE_NAMED
,
261 NUM_OF_FIELDS(ilb_server_kstat_t
), KSTAT_FLAG_VIRTUAL
, stackid
);
265 bcopy(&template, &server
->iser_kstat
, sizeof (template));
266 ksp
->ks_data
= &server
->iser_kstat
;
267 ksp
->ks_private
= (void *)(uintptr_t)stackid
;
269 kstat_named_setstr(&server
->iser_kstat
.ip_address
,
270 server
->iser_ip_addr
);
271 /* We never change the IP address */
272 ksp
->ks_data_size
+= strlen(server
->iser_ip_addr
) + 1;
278 /* Initialize the rule hash table. */
280 ilb_rule_hash_init(ilb_stack_t
*ilbs
)
285 * If ilbs->ilbs_rule_hash_size is not a power of 2, bump it up to
286 * the next power of 2.
288 if (!ISP2(ilbs
->ilbs_rule_hash_size
)) {
289 for (i
= 0; i
< 31; i
++) {
290 if (ilbs
->ilbs_rule_hash_size
< (1 << i
))
293 ilbs
->ilbs_rule_hash_size
= 1 << i
;
295 ilbs
->ilbs_g_hash
= kmem_zalloc(sizeof (ilb_hash_t
) *
296 ilbs
->ilbs_rule_hash_size
, KM_SLEEP
);
297 for (i
= 0; i
< ilbs
->ilbs_rule_hash_size
; i
++) {
298 mutex_init(&ilbs
->ilbs_g_hash
[i
].ilb_hash_lock
, NULL
,
299 MUTEX_DEFAULT
, NULL
);
303 /* Clean up the rule hash table. */
305 ilb_rule_hash_fini(ilb_stack_t
*ilbs
)
307 if (ilbs
->ilbs_g_hash
== NULL
)
309 kmem_free(ilbs
->ilbs_g_hash
, sizeof (ilb_hash_t
) *
310 ilbs
->ilbs_rule_hash_size
);
313 /* Add a rule to the rule hash table. */
315 ilb_rule_hash_add(ilb_stack_t
*ilbs
, ilb_rule_t
*rule
, const in6_addr_t
*addr
)
319 i
= ILB_RULE_HASH((uint8_t *)&addr
->s6_addr32
[3],
320 ilbs
->ilbs_rule_hash_size
);
321 DTRACE_PROBE2(ilb__rule__hash__add
, ilb_rule_t
*, rule
, int, i
);
322 mutex_enter(&ilbs
->ilbs_g_hash
[i
].ilb_hash_lock
);
323 rule
->ir_hash_next
= ilbs
->ilbs_g_hash
[i
].ilb_hash_rule
;
324 if (ilbs
->ilbs_g_hash
[i
].ilb_hash_rule
!= NULL
)
325 ilbs
->ilbs_g_hash
[i
].ilb_hash_rule
->ir_hash_prev
= rule
;
326 rule
->ir_hash_prev
= NULL
;
327 ilbs
->ilbs_g_hash
[i
].ilb_hash_rule
= rule
;
329 rule
->ir_hash
= &ilbs
->ilbs_g_hash
[i
];
330 mutex_exit(&ilbs
->ilbs_g_hash
[i
].ilb_hash_lock
);
334 * Remove a rule from the rule hash table. Note that the rule is not freed
338 ilb_rule_hash_del(ilb_rule_t
*rule
)
340 mutex_enter(&rule
->ir_hash
->ilb_hash_lock
);
341 if (rule
->ir_hash
->ilb_hash_rule
== rule
) {
342 rule
->ir_hash
->ilb_hash_rule
= rule
->ir_hash_next
;
343 if (rule
->ir_hash_next
!= NULL
)
344 rule
->ir_hash_next
->ir_hash_prev
= NULL
;
346 if (rule
->ir_hash_prev
!= NULL
)
347 rule
->ir_hash_prev
->ir_hash_next
=
349 if (rule
->ir_hash_next
!= NULL
) {
350 rule
->ir_hash_next
->ir_hash_prev
=
354 mutex_exit(&rule
->ir_hash
->ilb_hash_lock
);
356 rule
->ir_hash_next
= NULL
;
357 rule
->ir_hash_prev
= NULL
;
358 rule
->ir_hash
= NULL
;
362 * Given the info of a packet, look for a match in the rule hash table.
365 ilb_rule_hash(ilb_stack_t
*ilbs
, int l3
, int l4
, in6_addr_t
*addr
,
366 in_port_t port
, zoneid_t zoneid
, uint32_t len
, boolean_t
*busy
)
373 IN6_V4MAPPED_TO_IPADDR(addr
, v4_addr
);
374 i
= ILB_RULE_HASH((uint8_t *)&v4_addr
, ilbs
->ilbs_rule_hash_size
);
377 mutex_enter(&ilbs
->ilbs_g_hash
[i
].ilb_hash_lock
);
378 for (rule
= ilbs
->ilbs_g_hash
[i
].ilb_hash_rule
; rule
!= NULL
;
379 rule
= rule
->ir_hash_next
) {
380 if (!rule
->ir_port_range
) {
381 if (rule
->ir_min_port
!= port
)
384 if (port
< rule
->ir_min_port
||
385 port
> rule
->ir_max_port
) {
389 if (rule
->ir_ipver
!= l3
|| rule
->ir_proto
!= l4
||
390 rule
->ir_zoneid
!= zoneid
) {
394 if (l3
== IPPROTO_IP
) {
395 if (rule
->ir_target_v4
!= INADDR_ANY
&&
396 rule
->ir_target_v4
!= v4_addr
) {
400 if (!IN6_IS_ADDR_UNSPECIFIED(&rule
->ir_target_v6
) &&
401 !IN6_ARE_ADDR_EQUAL(addr
, &rule
->ir_target_v6
)) {
407 * Just update the stats if the rule is disabled.
409 mutex_enter(&rule
->ir_lock
);
410 if (!(rule
->ir_flags
& ILB_RULE_ENABLED
)) {
411 ILB_R_KSTAT(rule
, pkt_not_processed
);
412 ILB_R_KSTAT_UPDATE(rule
, bytes_not_processed
, len
);
413 mutex_exit(&rule
->ir_lock
);
416 } else if (rule
->ir_flags
& ILB_RULE_BUSY
) {
420 * XXX we should have a queue to postpone the
421 * packet processing. But this requires a
422 * mechanism in IP to re-start the packet
423 * processing. So for now, just drop the packet.
425 ILB_R_KSTAT(rule
, pkt_dropped
);
426 ILB_R_KSTAT_UPDATE(rule
, bytes_dropped
, len
);
427 mutex_exit(&rule
->ir_lock
);
433 ASSERT(rule
->ir_refcnt
!= 1);
434 mutex_exit(&rule
->ir_lock
);
438 mutex_exit(&ilbs
->ilbs_g_hash
[i
].ilb_hash_lock
);
443 * Add a rule to the global rule list. This list is for finding all rules
444 * in an IP stack. The caller is assumed to hold the ilbs_g_lock.
447 ilb_rule_g_add(ilb_stack_t
*ilbs
, ilb_rule_t
*rule
)
449 ASSERT(mutex_owned(&ilbs
->ilbs_g_lock
));
450 rule
->ir_next
= ilbs
->ilbs_rule_head
;
451 ilbs
->ilbs_rule_head
= rule
;
452 ILB_KSTAT_UPDATE(ilbs
, num_rules
, 1);
455 /* The call is assumed to hold the ilbs_g_lock. */
457 ilb_rule_g_del(ilb_stack_t
*ilbs
, ilb_rule_t
*rule
)
459 ilb_rule_t
*tmp_rule
;
460 ilb_rule_t
*prev_rule
;
462 ASSERT(mutex_owned(&ilbs
->ilbs_g_lock
));
464 for (tmp_rule
= ilbs
->ilbs_rule_head
; tmp_rule
!= NULL
;
465 prev_rule
= tmp_rule
, tmp_rule
= tmp_rule
->ir_next
) {
466 if (tmp_rule
== rule
)
469 if (tmp_rule
== NULL
) {
470 mutex_exit(&ilbs
->ilbs_g_lock
);
473 if (prev_rule
== NULL
)
474 ilbs
->ilbs_rule_head
= tmp_rule
->ir_next
;
476 prev_rule
->ir_next
= tmp_rule
->ir_next
;
477 ILB_KSTAT_UPDATE(ilbs
, num_rules
, -1);
481 * Helper routine to calculate how many source addresses are in a given
485 num_nat_src_v6(const in6_addr_t
*a1
, const in6_addr_t
*a2
)
488 uint32_t addr1
, addr2
;
491 * Here we assume that the max number of NAT source cannot be
492 * large such that the most significant 2 s6_addr32 must be
495 addr1
= ntohl(a1
->s6_addr32
[3]);
496 addr2
= ntohl(a2
->s6_addr32
[3]);
497 if (a1
->s6_addr32
[0] != a2
->s6_addr32
[0] ||
498 a1
->s6_addr32
[1] != a2
->s6_addr32
[1] ||
499 a1
->s6_addr32
[2] > a2
->s6_addr32
[2] ||
500 (a1
->s6_addr32
[2] == a2
->s6_addr32
[2] && addr1
> addr2
)) {
503 if (a1
->s6_addr32
[2] == a2
->s6_addr32
[2]) {
504 return (addr2
- addr1
+ 1);
506 ret
= (ntohl(a2
->s6_addr32
[2]) - ntohl(a1
->s6_addr32
[2]));
508 ret
= ret
+ addr1
- addr2
;
517 ilb_rule_add(ilb_stack_t
*ilbs
, zoneid_t zoneid
, const ilb_rule_cmd_t
*cmd
)
520 netstackid_t stackid
;
522 in_port_t min_port
, max_port
;
526 if (cmd
->ip_ver
!= IPPROTO_IP
&& cmd
->ip_ver
!= IPPROTO_IPV6
)
529 /* Need to support SCTP... */
530 if (cmd
->proto
!= IPPROTO_TCP
&& cmd
->proto
!= IPPROTO_UDP
)
533 /* For full NAT, the NAT source must be supplied. */
534 if (cmd
->topo
== ILB_TOPO_IMPL_NAT
) {
535 if (IS_ADDR_UNSPEC(&cmd
->nat_src_start
) ||
536 IS_ADDR_UNSPEC(&cmd
->nat_src_end
)) {
541 /* Check invalid mask */
542 if ((cmd
->flags
& ILB_RULE_STICKY
) &&
543 IS_ADDR_UNSPEC(&cmd
->sticky_mask
)) {
547 /* Port is passed in network byte order. */
548 min_port
= ntohs(cmd
->min_port
);
549 max_port
= ntohs(cmd
->max_port
);
550 if (min_port
> max_port
)
553 /* min_port == 0 means "all ports". Make it so */
559 /* Funny address checking. */
560 if (cmd
->ip_ver
== IPPROTO_IP
) {
561 in_addr_t v4_addr1
, v4_addr2
;
563 v4_addr1
= cmd
->vip
.s6_addr32
[3];
564 if ((*(uchar_t
*)&v4_addr1
) == IN_LOOPBACKNET
||
565 CLASSD(v4_addr1
) || v4_addr1
== INADDR_BROADCAST
||
566 v4_addr1
== INADDR_ANY
||
567 !IN6_IS_ADDR_V4MAPPED(&cmd
->vip
)) {
571 if (cmd
->topo
== ILB_TOPO_IMPL_NAT
) {
572 v4_addr1
= ntohl(cmd
->nat_src_start
.s6_addr32
[3]);
573 v4_addr2
= ntohl(cmd
->nat_src_end
.s6_addr32
[3]);
574 if ((*(uchar_t
*)&v4_addr1
) == IN_LOOPBACKNET
||
575 (*(uchar_t
*)&v4_addr2
) == IN_LOOPBACKNET
||
576 v4_addr1
== INADDR_BROADCAST
||
577 v4_addr2
== INADDR_BROADCAST
||
578 v4_addr1
== INADDR_ANY
|| v4_addr2
== INADDR_ANY
||
579 CLASSD(v4_addr1
) || CLASSD(v4_addr2
) ||
580 !IN6_IS_ADDR_V4MAPPED(&cmd
->nat_src_start
) ||
581 !IN6_IS_ADDR_V4MAPPED(&cmd
->nat_src_end
)) {
585 num_src
= v4_addr2
- v4_addr1
+ 1;
586 if (v4_addr1
> v4_addr2
|| num_src
> ILB_MAX_NAT_SRC
)
590 if (IN6_IS_ADDR_LOOPBACK(&cmd
->vip
) ||
591 IN6_IS_ADDR_MULTICAST(&cmd
->vip
) ||
592 IN6_IS_ADDR_UNSPECIFIED(&cmd
->vip
) ||
593 IN6_IS_ADDR_V4MAPPED(&cmd
->vip
)) {
597 if (cmd
->topo
== ILB_TOPO_IMPL_NAT
) {
598 if (IN6_IS_ADDR_LOOPBACK(&cmd
->nat_src_start
) ||
599 IN6_IS_ADDR_LOOPBACK(&cmd
->nat_src_end
) ||
600 IN6_IS_ADDR_MULTICAST(&cmd
->nat_src_start
) ||
601 IN6_IS_ADDR_MULTICAST(&cmd
->nat_src_end
) ||
602 IN6_IS_ADDR_UNSPECIFIED(&cmd
->nat_src_start
) ||
603 IN6_IS_ADDR_UNSPECIFIED(&cmd
->nat_src_end
) ||
604 IN6_IS_ADDR_V4MAPPED(&cmd
->nat_src_start
) ||
605 IN6_IS_ADDR_V4MAPPED(&cmd
->nat_src_end
)) {
609 if ((num_src
= num_nat_src_v6(&cmd
->nat_src_start
,
610 &cmd
->nat_src_end
)) < 0 ||
611 num_src
> ILB_MAX_NAT_SRC
) {
617 mutex_enter(&ilbs
->ilbs_g_lock
);
618 if (ilbs
->ilbs_g_hash
== NULL
)
619 ilb_rule_hash_init(ilbs
);
620 if (ilbs
->ilbs_c2s_conn_hash
== NULL
) {
621 ASSERT(ilbs
->ilbs_s2c_conn_hash
== NULL
);
622 ilb_conn_hash_init(ilbs
);
623 ilb_nat_src_init(ilbs
);
626 /* Make sure that the new rule does not duplicate an existing one. */
627 if (ilb_match_rule(ilbs
, zoneid
, cmd
->name
, cmd
->ip_ver
, cmd
->proto
,
628 min_port
, max_port
, &cmd
->vip
)) {
629 mutex_exit(&ilbs
->ilbs_g_lock
);
633 rule
= kmem_zalloc(sizeof (ilb_rule_t
), KM_NOSLEEP
);
635 mutex_exit(&ilbs
->ilbs_g_lock
);
639 /* ir_name is all 0 to begin with */
640 (void) memcpy(rule
->ir_name
, cmd
->name
, ILB_RULE_NAMESZ
- 1);
642 rule
->ir_ks_instance
= atomic_inc_uint_nv(&ilb_kstat_instance
);
643 stackid
= (netstackid_t
)(uintptr_t)ilbs
->ilbs_ksp
->ks_private
;
644 if ((rule
->ir_ksp
= ilb_rule_kstat_init(stackid
, rule
)) == NULL
) {
649 if (cmd
->topo
== ILB_TOPO_IMPL_NAT
) {
650 rule
->ir_nat_src_start
= cmd
->nat_src_start
;
651 rule
->ir_nat_src_end
= cmd
->nat_src_end
;
654 rule
->ir_ipver
= cmd
->ip_ver
;
655 rule
->ir_proto
= cmd
->proto
;
656 rule
->ir_topo
= cmd
->topo
;
658 rule
->ir_min_port
= min_port
;
659 rule
->ir_max_port
= max_port
;
660 if (rule
->ir_min_port
!= rule
->ir_max_port
)
661 rule
->ir_port_range
= B_TRUE
;
663 rule
->ir_port_range
= B_FALSE
;
665 rule
->ir_zoneid
= zoneid
;
667 rule
->ir_target_v6
= cmd
->vip
;
668 rule
->ir_servers
= NULL
;
671 * The default connection drain timeout is indefinite (value 0),
672 * meaning we will wait for all connections to finish. So we
673 * can assign cmd->conn_drain_timeout to it directly.
675 rule
->ir_conn_drain_timeout
= cmd
->conn_drain_timeout
;
676 if (cmd
->nat_expiry
!= 0) {
677 rule
->ir_nat_expiry
= cmd
->nat_expiry
;
679 switch (rule
->ir_proto
) {
681 rule
->ir_nat_expiry
= ilb_conn_tcp_expiry
;
684 rule
->ir_nat_expiry
= ilb_conn_udp_expiry
;
687 cmn_err(CE_PANIC
, "data corruption: wrong ir_proto: %p",
692 if (cmd
->sticky_expiry
!= 0)
693 rule
->ir_sticky_expiry
= cmd
->sticky_expiry
;
695 rule
->ir_sticky_expiry
= ilb_sticky_expiry
;
697 if (cmd
->flags
& ILB_RULE_STICKY
) {
698 rule
->ir_flags
|= ILB_RULE_STICKY
;
699 rule
->ir_sticky_mask
= cmd
->sticky_mask
;
700 if (ilbs
->ilbs_sticky_hash
== NULL
)
701 ilb_sticky_hash_init(ilbs
);
703 if (cmd
->flags
& ILB_RULE_ENABLED
)
704 rule
->ir_flags
|= ILB_RULE_ENABLED
;
706 mutex_init(&rule
->ir_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
707 cv_init(&rule
->ir_cv
, NULL
, CV_DEFAULT
, NULL
);
712 case ILB_ALG_IMPL_ROUNDROBIN
:
713 if ((rule
->ir_alg
= ilb_alg_rr_init(rule
, NULL
)) == NULL
) {
717 rule
->ir_alg_type
= ILB_ALG_IMPL_ROUNDROBIN
;
719 case ILB_ALG_IMPL_HASH_IP
:
720 case ILB_ALG_IMPL_HASH_IP_SPORT
:
721 case ILB_ALG_IMPL_HASH_IP_VIP
:
722 if ((rule
->ir_alg
= ilb_alg_hash_init(rule
,
723 &cmd
->algo
)) == NULL
) {
727 rule
->ir_alg_type
= cmd
->algo
;
734 /* Add it to the global list and hash array at the end. */
735 ilb_rule_g_add(ilbs
, rule
);
736 ilb_rule_hash_add(ilbs
, rule
, &cmd
->vip
);
738 mutex_exit(&ilbs
->ilbs_g_lock
);
743 mutex_exit(&ilbs
->ilbs_g_lock
);
744 if (rule
->ir_ksp
!= NULL
) {
745 /* stackid must be initialized if ir_ksp != NULL */
746 kstat_delete_netstack(rule
->ir_ksp
, stackid
);
748 kmem_free(rule
, sizeof (ilb_rule_t
));
753 * The final part in deleting a rule. Either called directly or by the
757 ilb_rule_del_common(ilb_stack_t
*ilbs
, ilb_rule_t
*tmp_rule
)
759 netstackid_t stackid
;
760 ilb_server_t
*server
;
762 stackid
= (netstackid_t
)(uintptr_t)ilbs
->ilbs_ksp
->ks_private
;
765 * Let the algorithm know that the rule is going away. The
766 * algorithm fini routine will free all its resources with this
769 tmp_rule
->ir_alg
->ilb_alg_fini(&tmp_rule
->ir_alg
);
771 while ((server
= tmp_rule
->ir_servers
) != NULL
) {
772 mutex_enter(&server
->iser_lock
);
773 ilb_destroy_nat_src(&server
->iser_nat_src
);
774 if (tmp_rule
->ir_conn_drain_timeout
!= 0) {
776 * The garbage collection thread checks this value
777 * without grabing a lock. So we need to use
778 * atomic_swap_64() to make sure that the value seen
779 * by gc thread is intact.
781 (void) atomic_swap_64(
782 (uint64_t *)&server
->iser_die_time
,
784 SEC_TO_TICK(tmp_rule
->ir_conn_drain_timeout
));
786 while (server
->iser_refcnt
> 1)
787 cv_wait(&server
->iser_cv
, &server
->iser_lock
);
788 tmp_rule
->ir_servers
= server
->iser_next
;
789 kstat_delete_netstack(server
->iser_ksp
, stackid
);
790 kmem_free(server
, sizeof (ilb_server_t
));
793 ASSERT(tmp_rule
->ir_ksp
!= NULL
);
794 kstat_delete_netstack(tmp_rule
->ir_ksp
, stackid
);
796 kmem_free(tmp_rule
, sizeof (ilb_rule_t
));
799 /* The routine executed by the delayed rule taskq. */
801 ilb_rule_del_tq(void *arg
)
803 ilb_stack_t
*ilbs
= ((ilb_rule_tq_t
*)arg
)->ilbs
;
804 ilb_rule_t
*rule
= ((ilb_rule_tq_t
*)arg
)->rule
;
806 mutex_enter(&rule
->ir_lock
);
807 while (rule
->ir_refcnt
> 1)
808 cv_wait(&rule
->ir_cv
, &rule
->ir_lock
);
809 ilb_rule_del_common(ilbs
, rule
);
810 kmem_free(arg
, sizeof (ilb_rule_tq_t
));
813 /* Routine to delete a rule. */
815 ilb_rule_del(ilb_stack_t
*ilbs
, zoneid_t zoneid
, const char *name
)
817 ilb_rule_t
*tmp_rule
;
821 mutex_enter(&ilbs
->ilbs_g_lock
);
822 if ((tmp_rule
= ilb_find_rule_locked(ilbs
, zoneid
, name
,
824 mutex_exit(&ilbs
->ilbs_g_lock
);
829 * First remove the rule from the hash array and the global list so
830 * that no one can find this rule any more.
832 ilb_rule_hash_del(tmp_rule
);
833 ilb_rule_g_del(ilbs
, tmp_rule
);
834 mutex_exit(&ilbs
->ilbs_g_lock
);
835 ILB_RULE_REFRELE(tmp_rule
);
838 * Now no one can find this rule, we can remove it once all
839 * references to it are dropped and all references to the list
840 * of servers are dropped. So dispatch a task to finish the deletion.
841 * We do this instead of letting the last one referencing the
842 * rule do it. The reason is that the last one may be the
843 * interrupt thread. We want to minimize the work it needs to
844 * do. Rule deletion is not a critical task so it can be delayed.
846 arg
= kmem_alloc(sizeof (ilb_rule_tq_t
), KM_SLEEP
);
848 arg
->rule
= tmp_rule
;
849 (void) taskq_dispatch(ilbs
->ilbs_rule_taskq
, ilb_rule_del_tq
, arg
,
856 * Given an IP address, check to see if there is a rule using this
857 * as the VIP. It can be used to check if we need to drop a fragment.
860 ilb_rule_match_vip_v6(ilb_stack_t
*ilbs
, in6_addr_t
*vip
, ilb_rule_t
**ret_rule
)
864 boolean_t ret
= B_FALSE
;
866 i
= ILB_RULE_HASH((uint8_t *)&vip
->s6_addr32
[3],
867 ilbs
->ilbs_rule_hash_size
);
868 mutex_enter(&ilbs
->ilbs_g_hash
[i
].ilb_hash_lock
);
869 for (rule
= ilbs
->ilbs_g_hash
[i
].ilb_hash_rule
; rule
!= NULL
;
870 rule
= rule
->ir_hash_next
) {
871 if (IN6_ARE_ADDR_EQUAL(vip
, &rule
->ir_target_v6
)) {
872 mutex_enter(&rule
->ir_lock
);
873 if (rule
->ir_flags
& ILB_RULE_BUSY
) {
874 mutex_exit(&rule
->ir_lock
);
877 if (ret_rule
!= NULL
) {
879 mutex_exit(&rule
->ir_lock
);
882 mutex_exit(&rule
->ir_lock
);
888 mutex_exit(&ilbs
->ilbs_g_hash
[i
].ilb_hash_lock
);
893 ilb_rule_match_vip_v4(ilb_stack_t
*ilbs
, ipaddr_t addr
, ilb_rule_t
**ret_rule
)
897 boolean_t ret
= B_FALSE
;
899 i
= ILB_RULE_HASH((uint8_t *)&addr
, ilbs
->ilbs_rule_hash_size
);
900 mutex_enter(&ilbs
->ilbs_g_hash
[i
].ilb_hash_lock
);
901 for (rule
= ilbs
->ilbs_g_hash
[i
].ilb_hash_rule
; rule
!= NULL
;
902 rule
= rule
->ir_hash_next
) {
903 if (rule
->ir_target_v6
.s6_addr32
[3] == addr
) {
904 mutex_enter(&rule
->ir_lock
);
905 if (rule
->ir_flags
& ILB_RULE_BUSY
) {
906 mutex_exit(&rule
->ir_lock
);
909 if (ret_rule
!= NULL
) {
911 mutex_exit(&rule
->ir_lock
);
914 mutex_exit(&rule
->ir_lock
);
920 mutex_exit(&ilbs
->ilbs_g_hash
[i
].ilb_hash_lock
);
925 ilb_find_rule_locked(ilb_stack_t
*ilbs
, zoneid_t zoneid
, const char *name
,
928 ilb_rule_t
*tmp_rule
;
930 ASSERT(mutex_owned(&ilbs
->ilbs_g_lock
));
932 for (tmp_rule
= ilbs
->ilbs_rule_head
; tmp_rule
!= NULL
;
933 tmp_rule
= tmp_rule
->ir_next
) {
934 if (tmp_rule
->ir_zoneid
!= zoneid
)
936 if (strcasecmp(tmp_rule
->ir_name
, name
) == 0) {
937 mutex_enter(&tmp_rule
->ir_lock
);
938 if (tmp_rule
->ir_flags
& ILB_RULE_BUSY
) {
939 mutex_exit(&tmp_rule
->ir_lock
);
943 tmp_rule
->ir_refcnt
++;
944 mutex_exit(&tmp_rule
->ir_lock
);
953 /* To find a rule with a given name and zone in the global rule list. */
955 ilb_find_rule(ilb_stack_t
*ilbs
, zoneid_t zoneid
, const char *name
,
958 ilb_rule_t
*tmp_rule
;
960 mutex_enter(&ilbs
->ilbs_g_lock
);
961 tmp_rule
= ilb_find_rule_locked(ilbs
, zoneid
, name
, err
);
962 mutex_exit(&ilbs
->ilbs_g_lock
);
966 /* Try to match the given packet info and zone ID with a rule. */
968 ilb_match_rule(ilb_stack_t
*ilbs
, zoneid_t zoneid
, const char *name
, int l3
,
969 int l4
, in_port_t min_port
, in_port_t max_port
, const in6_addr_t
*addr
)
971 ilb_rule_t
*tmp_rule
;
973 ASSERT(mutex_owned(&ilbs
->ilbs_g_lock
));
975 for (tmp_rule
= ilbs
->ilbs_rule_head
; tmp_rule
!= NULL
;
976 tmp_rule
= tmp_rule
->ir_next
) {
977 if (tmp_rule
->ir_zoneid
!= zoneid
)
981 * We don't allow the same name in different rules even if all
982 * the other rule components are different.
984 if (strcasecmp(tmp_rule
->ir_name
, name
) == 0)
987 if (tmp_rule
->ir_ipver
!= l3
|| tmp_rule
->ir_proto
!= l4
)
991 * ir_min_port and ir_max_port are the same if ir_port_range
992 * is false. In this case, if the ir_min|max_port (same) is
993 * outside of the given port range, it is OK. In other cases,
994 * check if min and max port are outside a rule's range.
996 if (tmp_rule
->ir_max_port
< min_port
||
997 tmp_rule
->ir_min_port
> max_port
) {
1002 * If l3 is IPv4, the addr passed in is assumed to be
1005 if (V6_OR_V4_INADDR_ANY(*addr
) ||
1006 V6_OR_V4_INADDR_ANY(tmp_rule
->ir_target_v6
) ||
1007 IN6_ARE_ADDR_EQUAL(addr
, &tmp_rule
->ir_target_v6
)) {
1015 ilb_rule_enable(ilb_stack_t
*ilbs
, zoneid_t zoneid
,
1016 const char *rule_name
, ilb_rule_t
*in_rule
)
1021 ASSERT((in_rule
== NULL
&& rule_name
!= NULL
) ||
1022 (in_rule
!= NULL
&& rule_name
== NULL
));
1023 if ((rule
= in_rule
) == NULL
) {
1024 if ((rule
= ilb_find_rule(ilbs
, zoneid
, rule_name
,
1029 mutex_enter(&rule
->ir_lock
);
1030 rule
->ir_flags
|= ILB_RULE_ENABLED
;
1031 mutex_exit(&rule
->ir_lock
);
1033 /* Only refrele if the rule is passed in. */
1034 if (in_rule
== NULL
)
1035 ILB_RULE_REFRELE(rule
);
1040 ilb_rule_disable(ilb_stack_t
*ilbs
, zoneid_t zoneid
,
1041 const char *rule_name
, ilb_rule_t
*in_rule
)
1046 ASSERT((in_rule
== NULL
&& rule_name
!= NULL
) ||
1047 (in_rule
!= NULL
&& rule_name
== NULL
));
1048 if ((rule
= in_rule
) == NULL
) {
1049 if ((rule
= ilb_find_rule(ilbs
, zoneid
, rule_name
,
1054 mutex_enter(&rule
->ir_lock
);
1055 rule
->ir_flags
&= ~ILB_RULE_ENABLED
;
1056 mutex_exit(&rule
->ir_lock
);
1058 /* Only refrele if the rule is passed in. */
1059 if (in_rule
== NULL
)
1060 ILB_RULE_REFRELE(rule
);
1065 * XXX We should probably have a walker function to walk all rules. For
1066 * now, just add a simple loop for enable/disable/del.
1069 ilb_rule_enable_all(ilb_stack_t
*ilbs
, zoneid_t zoneid
)
1073 mutex_enter(&ilbs
->ilbs_g_lock
);
1074 for (rule
= ilbs
->ilbs_rule_head
; rule
!= NULL
; rule
= rule
->ir_next
) {
1075 if (rule
->ir_zoneid
!= zoneid
)
1078 * No need to hold the rule as we are holding the global
1079 * lock so it won't go away. Ignore the return value here
1080 * as the rule is provided so the call cannot fail.
1082 (void) ilb_rule_enable(ilbs
, zoneid
, NULL
, rule
);
1084 mutex_exit(&ilbs
->ilbs_g_lock
);
1088 ilb_rule_disable_all(ilb_stack_t
*ilbs
, zoneid_t zoneid
)
1092 mutex_enter(&ilbs
->ilbs_g_lock
);
1093 for (rule
= ilbs
->ilbs_rule_head
; rule
!= NULL
;
1094 rule
= rule
->ir_next
) {
1095 if (rule
->ir_zoneid
!= zoneid
)
1097 (void) ilb_rule_disable(ilbs
, zoneid
, NULL
, rule
);
1099 mutex_exit(&ilbs
->ilbs_g_lock
);
1103 ilb_rule_del_all(ilb_stack_t
*ilbs
, zoneid_t zoneid
)
1108 mutex_enter(&ilbs
->ilbs_g_lock
);
1109 while ((rule
= ilbs
->ilbs_rule_head
) != NULL
) {
1110 if (rule
->ir_zoneid
!= zoneid
)
1112 ilb_rule_hash_del(rule
);
1113 ilb_rule_g_del(ilbs
, rule
);
1114 mutex_exit(&ilbs
->ilbs_g_lock
);
1116 arg
= kmem_alloc(sizeof (ilb_rule_tq_t
), KM_SLEEP
);
1119 (void) taskq_dispatch(ilbs
->ilbs_rule_taskq
, ilb_rule_del_tq
,
1122 mutex_enter(&ilbs
->ilbs_g_lock
);
1124 mutex_exit(&ilbs
->ilbs_g_lock
);
1128 * This is just an optimization, so don't grab the global lock. The
1129 * worst case is that we missed a couple packets.
1132 ilb_has_rules(ilb_stack_t
*ilbs
)
1134 return (ilbs
->ilbs_rule_head
!= NULL
);
1139 ilb_server_toggle(ilb_stack_t
*ilbs
, zoneid_t zoneid
, const char *rule_name
,
1140 ilb_rule_t
*rule
, in6_addr_t
*addr
, boolean_t enable
)
1142 ilb_server_t
*tmp_server
;
1145 ASSERT((rule
== NULL
&& rule_name
!= NULL
) ||
1146 (rule
!= NULL
&& rule_name
== NULL
));
1149 if ((rule
= ilb_find_rule(ilbs
, zoneid
, rule_name
,
1155 /* Once we get a hold on the rule, no server can be added/deleted. */
1156 for (tmp_server
= rule
->ir_servers
; tmp_server
!= NULL
;
1157 tmp_server
= tmp_server
->iser_next
) {
1158 if (IN6_ARE_ADDR_EQUAL(&tmp_server
->iser_addr_v6
, addr
))
1161 if (tmp_server
== NULL
) {
1167 ret
= rule
->ir_alg
->ilb_alg_server_enable(tmp_server
,
1168 rule
->ir_alg
->ilb_alg_data
);
1170 tmp_server
->iser_enabled
= B_TRUE
;
1171 tmp_server
->iser_die_time
= 0;
1174 ret
= rule
->ir_alg
->ilb_alg_server_disable(tmp_server
,
1175 rule
->ir_alg
->ilb_alg_data
);
1177 tmp_server
->iser_enabled
= B_FALSE
;
1178 if (rule
->ir_conn_drain_timeout
!= 0) {
1179 (void) atomic_swap_64(
1180 (uint64_t *)&tmp_server
->iser_die_time
,
1181 ddi_get_lbolt64() + SEC_TO_TICK(
1182 rule
->ir_conn_drain_timeout
));
1188 if (rule_name
!= NULL
)
1189 ILB_RULE_REFRELE(rule
);
1193 ilb_server_enable(ilb_stack_t
*ilbs
, zoneid_t zoneid
, const char *name
,
1194 ilb_rule_t
*rule
, in6_addr_t
*addr
)
1196 return (ilb_server_toggle(ilbs
, zoneid
, name
, rule
, addr
, B_TRUE
));
1200 ilb_server_disable(ilb_stack_t
*ilbs
, zoneid_t zoneid
, const char *name
,
1201 ilb_rule_t
*rule
, in6_addr_t
*addr
)
1203 return (ilb_server_toggle(ilbs
, zoneid
, name
, rule
, addr
, B_FALSE
));
1207 * Add a back end server to a rule. If the address is IPv4, it is assumed
1208 * to be passed in as a mapped address.
1211 ilb_server_add(ilb_stack_t
*ilbs
, ilb_rule_t
*rule
, ilb_server_info_t
*info
)
1213 ilb_server_t
*server
;
1214 netstackid_t stackid
;
1216 in_port_t min_port
, max_port
;
1219 /* Port is passed in network byte order. */
1220 min_port
= ntohs(info
->min_port
);
1221 max_port
= ntohs(info
->max_port
);
1222 if (min_port
> max_port
)
1225 /* min_port == 0 means "all ports". Make it so */
1226 if (min_port
== 0) {
1230 range
= max_port
- min_port
;
1232 mutex_enter(&rule
->ir_lock
);
1233 /* If someone is already doing server add/del, sleeps and wait. */
1234 while (rule
->ir_flags
& ILB_RULE_BUSY
) {
1235 if (cv_wait_sig(&rule
->ir_cv
, &rule
->ir_lock
) == 0) {
1236 mutex_exit(&rule
->ir_lock
);
1242 * Set the rule to be busy to make sure that no new packet can
1245 rule
->ir_flags
|= ILB_RULE_BUSY
;
1247 /* Now wait for all other guys to finish their work. */
1248 while (rule
->ir_refcnt
> 2) {
1249 if (cv_wait_sig(&rule
->ir_cv
, &rule
->ir_lock
) == 0) {
1250 mutex_exit(&rule
->ir_lock
);
1255 mutex_exit(&rule
->ir_lock
);
1257 /* Sanity checks... */
1258 if ((IN6_IS_ADDR_V4MAPPED(&info
->addr
) &&
1259 rule
->ir_ipver
!= IPPROTO_IP
) ||
1260 (!IN6_IS_ADDR_V4MAPPED(&info
->addr
) &&
1261 rule
->ir_ipver
!= IPPROTO_IPV6
)) {
1267 * Check for valid port range.
1269 * For DSR, there can be no port shifting. Hence the server
1270 * specification must be the same as the rule's.
1272 * For half-NAT/NAT, the range must either be 0 (port collapsing) or
1273 * it must be equal to the same value as the rule port range.
1276 if (rule
->ir_topo
== ILB_TOPO_IMPL_DSR
) {
1277 if (rule
->ir_max_port
!= max_port
||
1278 rule
->ir_min_port
!= min_port
) {
1283 if ((range
!= rule
->ir_max_port
- rule
->ir_min_port
) &&
1290 /* Check for duplicate. */
1291 for (server
= rule
->ir_servers
; server
!= NULL
;
1292 server
= server
->iser_next
) {
1293 if (IN6_ARE_ADDR_EQUAL(&server
->iser_addr_v6
, &info
->addr
) ||
1294 strcasecmp(server
->iser_name
, info
->name
) == 0) {
1298 if (server
!= NULL
) {
1303 if ((server
= kmem_zalloc(sizeof (ilb_server_t
), KM_NOSLEEP
)) == NULL
) {
1308 (void) memcpy(server
->iser_name
, info
->name
, ILB_SERVER_NAMESZ
- 1);
1309 (void) inet_ntop(AF_INET6
, &info
->addr
, server
->iser_ip_addr
,
1310 sizeof (server
->iser_ip_addr
));
1311 stackid
= (netstackid_t
)(uintptr_t)ilbs
->ilbs_ksp
->ks_private
;
1312 server
->iser_ksp
= ilb_server_kstat_init(stackid
, rule
, server
);
1313 if (server
->iser_ksp
== NULL
) {
1314 kmem_free(server
, sizeof (ilb_server_t
));
1319 server
->iser_stackid
= stackid
;
1320 server
->iser_addr_v6
= info
->addr
;
1321 server
->iser_min_port
= min_port
;
1322 server
->iser_max_port
= max_port
;
1323 if (min_port
!= max_port
)
1324 server
->iser_port_range
= B_TRUE
;
1326 server
->iser_port_range
= B_FALSE
;
1329 * If the rule uses NAT, find/create the NAT source entry to use
1332 if (rule
->ir_topo
== ILB_TOPO_IMPL_NAT
) {
1336 * If the server uses a port range, our port allocation
1337 * scheme needs to treat it as a wildcard. Refer to the
1338 * comments in ilb_nat.c about the scheme.
1340 if (server
->iser_port_range
)
1343 port
= server
->iser_min_port
;
1345 if ((ret
= ilb_create_nat_src(ilbs
, &server
->iser_nat_src
,
1346 &server
->iser_addr_v6
, port
, &rule
->ir_nat_src_start
,
1347 num_nat_src_v6(&rule
->ir_nat_src_start
,
1348 &rule
->ir_nat_src_end
))) != 0) {
1349 kstat_delete_netstack(server
->iser_ksp
, stackid
);
1350 kmem_free(server
, sizeof (ilb_server_t
));
1356 * The iser_lock is only used to protect iser_refcnt. All the other
1357 * fields in ilb_server_t should not change, except for iser_enabled.
1358 * The worst thing that can happen if iser_enabled is messed up is
1359 * that one or two packets may not be load balanced to a server
1362 server
->iser_refcnt
= 1;
1363 server
->iser_enabled
= info
->flags
& ILB_SERVER_ENABLED
? B_TRUE
:
1365 mutex_init(&server
->iser_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1366 cv_init(&server
->iser_cv
, NULL
, CV_DEFAULT
, NULL
);
1368 /* Let the load balancing algorithm know about the addition. */
1369 ASSERT(rule
->ir_alg
!= NULL
);
1370 if ((ret
= rule
->ir_alg
->ilb_alg_server_add(server
,
1371 rule
->ir_alg
->ilb_alg_data
)) != 0) {
1372 kstat_delete_netstack(server
->iser_ksp
, stackid
);
1373 kmem_free(server
, sizeof (ilb_server_t
));
1378 * No need to hold ir_lock since no other thread should manipulate
1379 * the following fields until ILB_RULE_BUSY is cleared.
1381 if (rule
->ir_servers
== NULL
) {
1382 server
->iser_next
= NULL
;
1384 server
->iser_next
= rule
->ir_servers
;
1386 rule
->ir_servers
= server
;
1387 ILB_R_KSTAT(rule
, num_servers
);
1390 mutex_enter(&rule
->ir_lock
);
1391 rule
->ir_flags
&= ~ILB_RULE_BUSY
;
1392 cv_signal(&rule
->ir_cv
);
1393 mutex_exit(&rule
->ir_lock
);
1397 /* The routine executed by the delayed rule processing taskq. */
1399 ilb_server_del_tq(void *arg
)
1401 ilb_server_t
*server
= (ilb_server_t
*)arg
;
1403 mutex_enter(&server
->iser_lock
);
1404 while (server
->iser_refcnt
> 1)
1405 cv_wait(&server
->iser_cv
, &server
->iser_lock
);
1406 kstat_delete_netstack(server
->iser_ksp
, server
->iser_stackid
);
1407 kmem_free(server
, sizeof (ilb_server_t
));
1411 * Delete a back end server from a rule. If the address is IPv4, it is assumed
1412 * to be passed in as a mapped address.
1415 ilb_server_del(ilb_stack_t
*ilbs
, zoneid_t zoneid
, const char *rule_name
,
1416 ilb_rule_t
*rule
, in6_addr_t
*addr
)
1418 ilb_server_t
*server
;
1419 ilb_server_t
*prev_server
;
1422 ASSERT((rule
== NULL
&& rule_name
!= NULL
) ||
1423 (rule
!= NULL
&& rule_name
== NULL
));
1425 if ((rule
= ilb_find_rule(ilbs
, zoneid
, rule_name
,
1431 mutex_enter(&rule
->ir_lock
);
1432 /* If someone is already doing server add/del, sleeps and wait. */
1433 while (rule
->ir_flags
& ILB_RULE_BUSY
) {
1434 if (cv_wait_sig(&rule
->ir_cv
, &rule
->ir_lock
) == 0) {
1435 if (rule_name
!= NULL
) {
1436 if (--rule
->ir_refcnt
<= 2)
1437 cv_signal(&rule
->ir_cv
);
1439 mutex_exit(&rule
->ir_lock
);
1444 * Set the rule to be busy to make sure that no new packet can
1447 rule
->ir_flags
|= ILB_RULE_BUSY
;
1449 /* Now wait for all other guys to finish their work. */
1450 while (rule
->ir_refcnt
> 2) {
1451 if (cv_wait_sig(&rule
->ir_cv
, &rule
->ir_lock
) == 0) {
1452 mutex_exit(&rule
->ir_lock
);
1457 mutex_exit(&rule
->ir_lock
);
1460 for (server
= rule
->ir_servers
; server
!= NULL
;
1461 prev_server
= server
, server
= server
->iser_next
) {
1462 if (IN6_ARE_ADDR_EQUAL(&server
->iser_addr_v6
, addr
))
1465 if (server
== NULL
) {
1471 * Let the load balancing algorithm know about the removal.
1472 * The algorithm may disallow the removal...
1474 if ((ret
= rule
->ir_alg
->ilb_alg_server_del(server
,
1475 rule
->ir_alg
->ilb_alg_data
)) != 0) {
1479 if (prev_server
== NULL
)
1480 rule
->ir_servers
= server
->iser_next
;
1482 prev_server
->iser_next
= server
->iser_next
;
1484 ILB_R_KSTAT_UPDATE(rule
, num_servers
, -1);
1487 * Mark the server as disabled so that if there is any sticky cache
1488 * using this server around, it won't be used.
1490 server
->iser_enabled
= B_FALSE
;
1492 mutex_enter(&server
->iser_lock
);
1495 * De-allocate the NAT source array. The indiviual ilb_nat_src_entry_t
1496 * may not go away if there is still a conn using it. The NAT source
1497 * timer will do the garbage collection.
1499 ilb_destroy_nat_src(&server
->iser_nat_src
);
1501 /* If there is a hard limit on when a server should die, set it. */
1502 if (rule
->ir_conn_drain_timeout
!= 0) {
1503 (void) atomic_swap_64((uint64_t *)&server
->iser_die_time
,
1505 SEC_TO_TICK(rule
->ir_conn_drain_timeout
));
1508 if (server
->iser_refcnt
> 1) {
1509 (void) taskq_dispatch(ilbs
->ilbs_rule_taskq
, ilb_server_del_tq
,
1511 mutex_exit(&server
->iser_lock
);
1513 kstat_delete_netstack(server
->iser_ksp
, server
->iser_stackid
);
1514 kmem_free(server
, sizeof (ilb_server_t
));
1518 mutex_enter(&rule
->ir_lock
);
1519 rule
->ir_flags
&= ~ILB_RULE_BUSY
;
1520 if (rule_name
!= NULL
)
1522 cv_signal(&rule
->ir_cv
);
1523 mutex_exit(&rule
->ir_lock
);
1528 * First check if the destination of the ICMP message matches a VIP of
1529 * a rule. If it does not, just return ILB_PASSED.
1531 * If the destination matches a VIP:
1533 * For ICMP_ECHO_REQUEST, generate a response on behalf of the back end
1536 * For ICMP_DEST_UNREACHABLE fragmentation needed, check inside the payload
1537 * and see which back end server we should send this message to. And we
1538 * need to do NAT on both the payload message and the outside IP packet.
1540 * For other ICMP messages, drop them.
1544 ilb_icmp_v4(ilb_stack_t
*ilbs
, ill_t
*ill
, mblk_t
*mp
, ipha_t
*ipha
,
1545 icmph_t
*icmph
, ipaddr_t
*lb_dst
)
1551 if (!ilb_rule_match_vip_v4(ilbs
, ipha
->ipha_dst
, &rule
))
1552 return (ILB_PASSED
);
1555 if ((uint8_t *)icmph
+ sizeof (icmph_t
) > mp
->b_wptr
) {
1556 ILB_R_KSTAT(rule
, icmp_dropped
);
1557 ILB_RULE_REFRELE(rule
);
1558 return (ILB_DROPPED
);
1561 switch (icmph
->icmph_type
) {
1562 case ICMP_ECHO_REQUEST
:
1563 ILB_R_KSTAT(rule
, icmp_echo_processed
);
1564 ILB_RULE_REFRELE(rule
);
1566 icmph
->icmph_type
= ICMP_ECHO_REPLY
;
1567 icmph
->icmph_checksum
= 0;
1568 icmph
->icmph_checksum
= IP_CSUM(mp
, IPH_HDR_LENGTH(ipha
), 0);
1570 ilbs
->ilbs_netstack
->netstack_ip
->ips_ip_def_ttl
;
1571 *lb_dst
= ipha
->ipha_src
;
1572 vip
= ipha
->ipha_dst
;
1573 ipha
->ipha_dst
= ipha
->ipha_src
;
1574 ipha
->ipha_src
= vip
;
1575 return (ILB_BALANCED
);
1576 case ICMP_DEST_UNREACHABLE
: {
1579 if (icmph
->icmph_code
!= ICMP_FRAGMENTATION_NEEDED
) {
1580 ILB_R_KSTAT(rule
, icmp_dropped
);
1581 ILB_RULE_REFRELE(rule
);
1582 return (ILB_DROPPED
);
1584 if (ilb_check_icmp_conn(ilbs
, mp
, IPPROTO_IP
, ipha
, icmph
,
1586 ILB_R_KSTAT(rule
, icmp_2big_processed
);
1589 ILB_R_KSTAT(rule
, icmp_2big_dropped
);
1592 ILB_RULE_REFRELE(rule
);
1593 IN6_V4MAPPED_TO_IPADDR(&addr6
, *lb_dst
);
1597 ILB_R_KSTAT(rule
, icmp_dropped
);
1598 ILB_RULE_REFRELE(rule
);
1599 return (ILB_DROPPED
);
1605 ilb_icmp_v6(ilb_stack_t
*ilbs
, ill_t
*ill
, mblk_t
*mp
, ip6_t
*ip6h
,
1606 icmp6_t
*icmp6
, in6_addr_t
*lb_dst
)
1610 if (!ilb_rule_match_vip_v6(ilbs
, &ip6h
->ip6_dst
, &rule
))
1611 return (ILB_PASSED
);
1613 if ((uint8_t *)icmp6
+ sizeof (icmp6_t
) > mp
->b_wptr
) {
1614 ILB_R_KSTAT(rule
, icmp_dropped
);
1615 ILB_RULE_REFRELE(rule
);
1616 return (ILB_DROPPED
);
1619 switch (icmp6
->icmp6_type
) {
1620 case ICMP6_ECHO_REQUEST
: {
1623 ILB_R_KSTAT(rule
, icmp_echo_processed
);
1624 ILB_RULE_REFRELE(rule
);
1626 icmp6
->icmp6_type
= ICMP6_ECHO_REPLY
;
1627 icmp6
->icmp6_cksum
= ip6h
->ip6_plen
;
1628 hdr_len
= (char *)icmp6
- (char *)ip6h
;
1629 icmp6
->icmp6_cksum
= IP_CSUM(mp
, hdr_len
,
1630 ilb_pseudo_sum_v6(ip6h
, IPPROTO_ICMPV6
));
1631 ip6h
->ip6_vcf
&= ~IPV6_FLOWINFO_FLOWLABEL
;
1633 ilbs
->ilbs_netstack
->netstack_ip
->ips_ipv6_def_hops
;
1634 *lb_dst
= ip6h
->ip6_src
;
1635 ip6h
->ip6_src
= ip6h
->ip6_dst
;
1636 ip6h
->ip6_dst
= *lb_dst
;
1637 return (ILB_BALANCED
);
1639 case ICMP6_PACKET_TOO_BIG
: {
1642 if (ilb_check_icmp_conn(ilbs
, mp
, IPPROTO_IPV6
, ip6h
, icmp6
,
1644 ILB_R_KSTAT(rule
, icmp_2big_processed
);
1647 ILB_R_KSTAT(rule
, icmp_2big_dropped
);
1650 ILB_RULE_REFRELE(rule
);
1654 ILB_R_KSTAT(rule
, icmp_dropped
);
1655 ILB_RULE_REFRELE(rule
);
1656 return (ILB_DROPPED
);
1661 * Common routine to check an incoming packet and decide what to do with it.
1662 * called by ilb_check_v4|v6().
1665 ilb_check(ilb_stack_t
*ilbs
, ill_t
*ill
, mblk_t
*mp
, in6_addr_t
*src
,
1666 in6_addr_t
*dst
, int l3
, int l4
, void *iph
, uint8_t *tph
, uint32_t pkt_len
,
1669 in_port_t sport
, dport
;
1673 ilb_server_t
*server
;
1675 struct ilb_sticky_s
*s
= NULL
;
1677 uint32_t ip_sum
, tp_sum
;
1678 ilb_nat_info_t info
;
1679 uint16_t nat_src_idx
;
1683 * We don't really need to switch here since both protocols's
1684 * ports are at the same offset. Just prepare for future protocol
1685 * specific processing.
1689 if (tph
+ TCP_MIN_HEADER_LENGTH
> mp
->b_wptr
)
1690 return (ILB_DROPPED
);
1691 tcph
= (tcpha_t
*)tph
;
1692 sport
= tcph
->tha_lport
;
1693 dport
= tcph
->tha_fport
;
1696 if (tph
+ sizeof (udpha_t
) > mp
->b_wptr
)
1697 return (ILB_DROPPED
);
1698 udph
= (udpha_t
*)tph
;
1699 sport
= udph
->uha_src_port
;
1700 dport
= udph
->uha_dst_port
;
1703 return (ILB_PASSED
);
1706 /* Fast path, there is an existing conn. */
1707 if (ilb_check_conn(ilbs
, l3
, iph
, l4
, tph
, src
, dst
, sport
, dport
,
1709 return (ILB_BALANCED
);
1713 * If there is no existing connection for the incoming packet, check
1714 * to see if the packet matches a rule. If not, just let IP decide
1715 * what to do with it.
1717 * Note: a reply from back end server should not match a rule. A
1718 * reply should match one existing conn.
1720 rule
= ilb_rule_hash(ilbs
, l3
, l4
, dst
, dport
, ill
->ill_zoneid
,
1723 /* If the rule is busy, just drop the packet. */
1725 return (ILB_DROPPED
);
1727 return (ILB_PASSED
);
1731 * The packet matches a rule, use the rule load balance algorithm
1734 balanced
= rule
->ir_alg
->ilb_alg_lb(src
, sport
, dst
, dport
,
1735 rule
->ir_alg
->ilb_alg_data
, &server
);
1737 * This can only happen if there is no server in a rule or all
1738 * the servers are currently disabled.
1744 * If the rule is sticky enabled, we need to check the sticky table.
1745 * If there is a sticky entry for the client, use the previous server
1746 * instead of the one found above (note that both can be the same).
1747 * If there is no entry for that client, add an entry to the sticky
1748 * table. Both the find and add are done in ilb_sticky_find_add()
1749 * to avoid checking for duplicate when adding an entry.
1751 if (rule
->ir_flags
& ILB_RULE_STICKY
) {
1754 V6_MASK_COPY(*src
, rule
->ir_sticky_mask
, addr
);
1755 if ((server
= ilb_sticky_find_add(ilbs
, rule
, &addr
, server
,
1756 &s
, &nat_src_idx
)) == NULL
) {
1757 ILB_R_KSTAT(rule
, nomem_pkt_dropped
);
1758 ILB_R_KSTAT_UPDATE(rule
, nomem_bytes_dropped
, pkt_len
);
1764 * We are holding a reference on the rule, so the server
1767 *lb_dst
= server
->iser_addr_v6
;
1768 ILB_S_KSTAT(server
, pkt_processed
);
1769 ILB_S_KSTAT_UPDATE(server
, bytes_processed
, pkt_len
);
1771 switch (rule
->ir_topo
) {
1772 case ILB_TOPO_IMPL_NAT
: {
1773 ilb_nat_src_entry_t
*src_ent
;
1777 * We create a cache even if it is not a SYN segment.
1778 * The server should return a RST. When we see the
1779 * RST, we will destroy this cache. But by having
1780 * a cache, we know how to NAT the returned RST.
1787 /* If stickiness is enabled, use the same source address */
1789 src_idx
= &nat_src_idx
;
1793 if ((src_ent
= ilb_alloc_nat_addr(server
->iser_nat_src
,
1794 &info
.nat_src
, &info
.nat_sport
, src_idx
)) == NULL
) {
1796 ilb_sticky_refrele(s
);
1797 ILB_R_KSTAT(rule
, pkt_dropped
);
1798 ILB_R_KSTAT_UPDATE(rule
, bytes_dropped
, pkt_len
);
1799 ILB_R_KSTAT(rule
, noport_pkt_dropped
);
1800 ILB_R_KSTAT_UPDATE(rule
, noport_bytes_dropped
, pkt_len
);
1804 info
.src_ent
= src_ent
;
1805 info
.nat_dst
= server
->iser_addr_v6
;
1806 if (rule
->ir_port_range
&& server
->iser_port_range
) {
1807 info
.nat_dport
= htons(ntohs(dport
) -
1808 rule
->ir_min_port
+ server
->iser_min_port
);
1810 info
.nat_dport
= htons(server
->iser_min_port
);
1814 * If ilb_conn_add() fails, it will release the reference on
1815 * sticky info and de-allocate the NAT source port allocated
1818 if (ilb_conn_add(ilbs
, rule
, server
, src
, sport
, dst
,
1819 dport
, &info
, &ip_sum
, &tp_sum
, s
) != 0) {
1820 ILB_R_KSTAT(rule
, pkt_dropped
);
1821 ILB_R_KSTAT_UPDATE(rule
, bytes_dropped
, pkt_len
);
1822 ILB_R_KSTAT(rule
, nomem_pkt_dropped
);
1823 ILB_R_KSTAT_UPDATE(rule
, nomem_bytes_dropped
, pkt_len
);
1827 ilb_full_nat(l3
, iph
, l4
, tph
, &info
, ip_sum
, tp_sum
, B_TRUE
);
1831 case ILB_TOPO_IMPL_HALF_NAT
:
1833 info
.nat_dst
= server
->iser_addr_v6
;
1835 if (rule
->ir_port_range
&& server
->iser_port_range
) {
1836 info
.nat_dport
= htons(ntohs(dport
) -
1837 rule
->ir_min_port
+ server
->iser_min_port
);
1839 info
.nat_dport
= htons(server
->iser_min_port
);
1842 if (ilb_conn_add(ilbs
, rule
, server
, src
, sport
, dst
,
1843 dport
, &info
, &ip_sum
, &tp_sum
, s
) != 0) {
1844 ILB_R_KSTAT(rule
, pkt_dropped
);
1845 ILB_R_KSTAT_UPDATE(rule
, bytes_dropped
, pkt_len
);
1846 ILB_R_KSTAT(rule
, nomem_pkt_dropped
);
1847 ILB_R_KSTAT_UPDATE(rule
, nomem_bytes_dropped
, pkt_len
);
1851 ilb_half_nat(l3
, iph
, l4
, tph
, &info
, ip_sum
, tp_sum
, B_TRUE
);
1855 case ILB_TOPO_IMPL_DSR
:
1857 * By decrementing the sticky refcnt, the period of
1858 * stickiness (life time of ilb_sticky_t) will be
1859 * from now to (now + default expiry time).
1862 ilb_sticky_refrele(s
);
1866 cmn_err(CE_PANIC
, "data corruption unknown topology: %p",
1870 ILB_RULE_REFRELE(rule
);
1874 /* This can only happen if there is no server available. */
1875 ILB_R_KSTAT(rule
, pkt_dropped
);
1876 ILB_R_KSTAT_UPDATE(rule
, bytes_dropped
, pkt_len
);
1877 ILB_RULE_REFRELE(rule
);
1878 return (ILB_DROPPED
);
1882 ilb_check_v4(ilb_stack_t
*ilbs
, ill_t
*ill
, mblk_t
*mp
, ipha_t
*ipha
, int l4
,
1883 uint8_t *tph
, ipaddr_t
*lb_dst
)
1885 in6_addr_t v6_src
, v6_dst
, v6_lb_dst
;
1888 ASSERT(DB_REF(mp
) == 1);
1890 if (l4
== IPPROTO_ICMP
) {
1891 return (ilb_icmp_v4(ilbs
, ill
, mp
, ipha
, (icmph_t
*)tph
,
1895 IN6_IPADDR_TO_V4MAPPED(ipha
->ipha_src
, &v6_src
);
1896 IN6_IPADDR_TO_V4MAPPED(ipha
->ipha_dst
, &v6_dst
);
1897 ret
= ilb_check(ilbs
, ill
, mp
, &v6_src
, &v6_dst
, IPPROTO_IP
, l4
, ipha
,
1898 tph
, ntohs(ipha
->ipha_length
), &v6_lb_dst
);
1899 if (ret
== ILB_BALANCED
)
1900 IN6_V4MAPPED_TO_IPADDR(&v6_lb_dst
, *lb_dst
);
1905 ilb_check_v6(ilb_stack_t
*ilbs
, ill_t
*ill
, mblk_t
*mp
, ip6_t
*ip6h
, int l4
,
1906 uint8_t *tph
, in6_addr_t
*lb_dst
)
1910 ASSERT(DB_REF(mp
) == 1);
1912 if (l4
== IPPROTO_ICMPV6
) {
1913 return (ilb_icmp_v6(ilbs
, ill
, mp
, ip6h
, (icmp6_t
*)tph
,
1917 pkt_len
= ntohs(ip6h
->ip6_plen
) + IPV6_HDR_LEN
;
1918 return (ilb_check(ilbs
, ill
, mp
, &ip6h
->ip6_src
, &ip6h
->ip6_dst
,
1919 IPPROTO_IPV6
, l4
, ip6h
, tph
, pkt_len
, lb_dst
));
1923 ilb_get_num_rules(ilb_stack_t
*ilbs
, zoneid_t zoneid
, uint32_t *num_rules
)
1925 ilb_rule_t
*tmp_rule
;
1927 mutex_enter(&ilbs
->ilbs_g_lock
);
1929 for (tmp_rule
= ilbs
->ilbs_rule_head
; tmp_rule
!= NULL
;
1930 tmp_rule
= tmp_rule
->ir_next
) {
1931 if (tmp_rule
->ir_zoneid
== zoneid
)
1934 mutex_exit(&ilbs
->ilbs_g_lock
);
1938 ilb_get_num_servers(ilb_stack_t
*ilbs
, zoneid_t zoneid
, const char *name
,
1939 uint32_t *num_servers
)
1944 if ((rule
= ilb_find_rule(ilbs
, zoneid
, name
, &err
)) == NULL
)
1946 *num_servers
= rule
->ir_kstat
.num_servers
.value
.ui64
;
1947 ILB_RULE_REFRELE(rule
);
1952 ilb_get_servers(ilb_stack_t
*ilbs
, zoneid_t zoneid
, const char *name
,
1953 ilb_server_info_t
*servers
, uint32_t *num_servers
)
1956 ilb_server_t
*server
;
1960 if ((rule
= ilb_find_rule(ilbs
, zoneid
, name
, &err
)) == NULL
)
1962 for (server
= rule
->ir_servers
, cnt
= *num_servers
;
1963 server
!= NULL
&& cnt
> 0;
1964 server
= server
->iser_next
, cnt
--, servers
++) {
1965 (void) memcpy(servers
->name
, server
->iser_name
,
1967 servers
->addr
= server
->iser_addr_v6
;
1968 servers
->min_port
= htons(server
->iser_min_port
);
1969 servers
->max_port
= htons(server
->iser_max_port
);
1970 servers
->flags
= server
->iser_enabled
? ILB_SERVER_ENABLED
: 0;
1973 ILB_RULE_REFRELE(rule
);
1974 *num_servers
-= cnt
;
1980 ilb_get_rulenames(ilb_stack_t
*ilbs
, zoneid_t zoneid
, uint32_t *num_names
,
1983 ilb_rule_t
*tmp_rule
;
1986 if (*num_names
== 0)
1989 mutex_enter(&ilbs
->ilbs_g_lock
);
1990 for (cnt
= 0, tmp_rule
= ilbs
->ilbs_rule_head
; tmp_rule
!= NULL
;
1991 tmp_rule
= tmp_rule
->ir_next
) {
1992 if (tmp_rule
->ir_zoneid
!= zoneid
)
1995 (void) memcpy(buf
, tmp_rule
->ir_name
, ILB_RULE_NAMESZ
);
1996 buf
+= ILB_RULE_NAMESZ
;
1997 if (++cnt
== *num_names
)
2000 mutex_exit(&ilbs
->ilbs_g_lock
);
2005 ilb_rule_list(ilb_stack_t
*ilbs
, zoneid_t zoneid
, ilb_rule_cmd_t
*cmd
)
2010 if ((rule
= ilb_find_rule(ilbs
, zoneid
, cmd
->name
, &err
)) == NULL
) {
2015 * Except the enabled flags, none of the following will change
2016 * in the life time of a rule. So we don't hold the mutex when
2017 * reading them. The worst is to report a wrong enabled flags.
2019 cmd
->ip_ver
= rule
->ir_ipver
;
2020 cmd
->proto
= rule
->ir_proto
;
2021 cmd
->min_port
= htons(rule
->ir_min_port
);
2022 cmd
->max_port
= htons(rule
->ir_max_port
);
2024 cmd
->vip
= rule
->ir_target_v6
;
2025 cmd
->algo
= rule
->ir_alg_type
;
2026 cmd
->topo
= rule
->ir_topo
;
2028 cmd
->nat_src_start
= rule
->ir_nat_src_start
;
2029 cmd
->nat_src_end
= rule
->ir_nat_src_end
;
2031 cmd
->conn_drain_timeout
= rule
->ir_conn_drain_timeout
;
2032 cmd
->nat_expiry
= rule
->ir_nat_expiry
;
2033 cmd
->sticky_expiry
= rule
->ir_sticky_expiry
;
2036 if (rule
->ir_flags
& ILB_RULE_ENABLED
)
2037 cmd
->flags
|= ILB_RULE_ENABLED
;
2038 if (rule
->ir_flags
& ILB_RULE_STICKY
) {
2039 cmd
->flags
|= ILB_RULE_STICKY
;
2040 cmd
->sticky_mask
= rule
->ir_sticky_mask
;
2043 ILB_RULE_REFRELE(rule
);
2048 ilb_stack_init(netstackid_t stackid
, netstack_t
*ns
)
2051 char tq_name
[TASKQ_NAMELEN
];
2053 ilbs
= kmem_alloc(sizeof (ilb_stack_t
), KM_SLEEP
);
2054 ilbs
->ilbs_netstack
= ns
;
2056 ilbs
->ilbs_rule_head
= NULL
;
2057 ilbs
->ilbs_g_hash
= NULL
;
2058 mutex_init(&ilbs
->ilbs_g_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2060 ilbs
->ilbs_kstat
= kmem_alloc(sizeof (ilb_g_kstat_t
), KM_SLEEP
);
2061 if ((ilbs
->ilbs_ksp
= ilb_kstat_g_init(stackid
, ilbs
)) == NULL
) {
2062 kmem_free(ilbs
, sizeof (ilb_stack_t
));
2067 * ilbs_conn/sticky_hash related info is initialized in
2068 * ilb_conn/sticky_hash_init().
2070 ilbs
->ilbs_conn_taskq
= NULL
;
2071 ilbs
->ilbs_rule_hash_size
= ilb_rule_hash_size
;
2072 ilbs
->ilbs_conn_hash_size
= ilb_conn_hash_size
;
2073 ilbs
->ilbs_c2s_conn_hash
= NULL
;
2074 ilbs
->ilbs_s2c_conn_hash
= NULL
;
2075 ilbs
->ilbs_conn_timer_list
= NULL
;
2077 ilbs
->ilbs_sticky_hash
= NULL
;
2078 ilbs
->ilbs_sticky_hash_size
= ilb_sticky_hash_size
;
2079 ilbs
->ilbs_sticky_timer_list
= NULL
;
2080 ilbs
->ilbs_sticky_taskq
= NULL
;
2082 /* The allocation is done later when there is a rule using NAT mode. */
2083 ilbs
->ilbs_nat_src
= NULL
;
2084 ilbs
->ilbs_nat_src_hash_size
= ilb_nat_src_hash_size
;
2085 mutex_init(&ilbs
->ilbs_nat_src_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2086 ilbs
->ilbs_nat_src_tid
= 0;
2088 /* For listing the conn hash table */
2089 mutex_init(&ilbs
->ilbs_conn_list_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2090 cv_init(&ilbs
->ilbs_conn_list_cv
, NULL
, CV_DEFAULT
, NULL
);
2091 ilbs
->ilbs_conn_list_busy
= B_FALSE
;
2092 ilbs
->ilbs_conn_list_cur
= 0;
2093 ilbs
->ilbs_conn_list_connp
= NULL
;
2095 /* For listing the sticky hash table */
2096 mutex_init(&ilbs
->ilbs_sticky_list_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2097 cv_init(&ilbs
->ilbs_sticky_list_cv
, NULL
, CV_DEFAULT
, NULL
);
2098 ilbs
->ilbs_sticky_list_busy
= B_FALSE
;
2099 ilbs
->ilbs_sticky_list_cur
= 0;
2100 ilbs
->ilbs_sticky_list_curp
= NULL
;
2102 (void) snprintf(tq_name
, sizeof (tq_name
), "ilb_rule_taskq_%p",
2104 ilbs
->ilbs_rule_taskq
= taskq_create(tq_name
, ILB_RULE_TASKQ_NUM_THR
,
2105 minclsyspri
, 1, INT_MAX
, TASKQ_PREPOPULATE
|TASKQ_DYNAMIC
);
2112 ilb_stack_shutdown(netstackid_t stackid
, void *arg
)
2114 ilb_stack_t
*ilbs
= (ilb_stack_t
*)arg
;
2115 ilb_rule_t
*tmp_rule
;
2117 ilb_sticky_hash_fini(ilbs
);
2118 ilb_conn_hash_fini(ilbs
);
2119 mutex_enter(&ilbs
->ilbs_g_lock
);
2120 while ((tmp_rule
= ilbs
->ilbs_rule_head
) != NULL
) {
2121 ilb_rule_hash_del(tmp_rule
);
2122 ilb_rule_g_del(ilbs
, tmp_rule
);
2123 mutex_exit(&ilbs
->ilbs_g_lock
);
2124 ilb_rule_del_common(ilbs
, tmp_rule
);
2125 mutex_enter(&ilbs
->ilbs_g_lock
);
2127 mutex_exit(&ilbs
->ilbs_g_lock
);
2128 if (ilbs
->ilbs_nat_src
!= NULL
)
2129 ilb_nat_src_fini(ilbs
);
2133 ilb_stack_fini(netstackid_t stackid
, void * arg
)
2135 ilb_stack_t
*ilbs
= (ilb_stack_t
*)arg
;
2137 ilb_rule_hash_fini(ilbs
);
2138 taskq_destroy(ilbs
->ilbs_rule_taskq
);
2139 ilb_kstat_g_fini(stackid
, ilbs
);
2140 kmem_free(ilbs
->ilbs_kstat
, sizeof (ilb_g_kstat_t
));
2141 kmem_free(ilbs
, sizeof (ilb_stack_t
));
2145 ilb_ddi_g_init(void)
2147 netstack_register(NS_ILB
, ilb_stack_init
, ilb_stack_shutdown
,
2152 ilb_ddi_g_destroy(void)
2154 netstack_unregister(NS_ILB
);
2155 ilb_conn_cache_fini();
2156 ilb_sticky_cache_fini();