4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright (c) 2016, Joyent, Inc. All rights reserved.
28 #include <sys/param.h>
29 #include <sys/sysmacros.h>
32 #include <sys/tuneable.h>
33 #include <sys/systm.h>
34 #include <sys/cmn_err.h>
35 #include <sys/debug.h>
37 #include <sys/mutex.h>
38 #include <sys/bitmap.h>
39 #include <sys/atomic.h>
42 #include <vm/seg_kmem.h>
44 #include <sys/netstack.h>
47 * What we use so that the zones framework can tell us about new zones,
48 * which we use to create new stacks.
50 static zone_key_t netstack_zone_key
;
52 static int netstack_initialized
= 0;
55 * Track the registered netstacks.
56 * The global lock protects
58 * - the list starting at netstack_head and following the netstack_next
61 static kmutex_t netstack_g_lock
;
64 * Registry of netstacks with their create/shutdown/destory functions.
66 static struct netstack_registry ns_reg
[NS_MAX
];
69 * Global list of existing stacks. We use this when a new zone with
70 * an exclusive IP instance is created.
72 * Note that in some cases a netstack_t needs to stay around after the zone
73 * has gone away. This is because there might be outstanding references
74 * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
75 * structure and all the foo_stack_t's hanging off of it will be cleaned up
76 * when the last reference to it is dropped.
77 * However, the same zone might be rebooted. That is handled using the
78 * assumption that the zones framework picks a new zoneid each time a zone
79 * is (re)booted. We assert for that condition in netstack_zone_create().
80 * Thus the old netstack_t can take its time for things to time out.
82 static netstack_t
*netstack_head
;
85 * To support kstat_create_netstack() using kstat_zone_add we need
87 * - all zoneids that use the global/shared stack
88 * - all kstats that have been added for the shared stack
90 struct shared_zone_list
{
91 struct shared_zone_list
*sz_next
;
95 struct shared_kstat_list
{
96 struct shared_kstat_list
*sk_next
;
100 static kmutex_t netstack_shared_lock
; /* protects the following two */
101 static struct shared_zone_list
*netstack_shared_zones
;
102 static struct shared_kstat_list
*netstack_shared_kstats
;
104 static void *netstack_zone_create(zoneid_t zoneid
);
105 static void netstack_zone_shutdown(zoneid_t zoneid
, void *arg
);
106 static void netstack_zone_destroy(zoneid_t zoneid
, void *arg
);
108 static void netstack_shared_zone_add(zoneid_t zoneid
);
109 static void netstack_shared_zone_remove(zoneid_t zoneid
);
110 static void netstack_shared_kstat_add(kstat_t
*ks
);
111 static void netstack_shared_kstat_remove(kstat_t
*ks
);
113 typedef boolean_t
applyfn_t(kmutex_t
*, netstack_t
*, int);
115 static void apply_all_netstacks(int, applyfn_t
*);
116 static void apply_all_modules(netstack_t
*, applyfn_t
*);
117 static void apply_all_modules_reverse(netstack_t
*, applyfn_t
*);
118 static boolean_t
netstack_apply_create(kmutex_t
*, netstack_t
*, int);
119 static boolean_t
netstack_apply_shutdown(kmutex_t
*, netstack_t
*, int);
120 static boolean_t
netstack_apply_destroy(kmutex_t
*, netstack_t
*, int);
121 static boolean_t
wait_for_zone_creator(netstack_t
*, kmutex_t
*);
122 static boolean_t
wait_for_nms_inprogress(netstack_t
*, nm_state_t
*,
128 mutex_init(&netstack_g_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
129 mutex_init(&netstack_shared_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
131 netstack_initialized
= 1;
134 * We want to be informed each time a zone is created or
135 * destroyed in the kernel, so we can maintain the
136 * stack instance information.
138 zone_key_create(&netstack_zone_key
, netstack_zone_create
,
139 netstack_zone_shutdown
, netstack_zone_destroy
);
143 * Register a new module with the framework.
144 * This registers interest in changes to the set of netstacks.
145 * The createfn and destroyfn are required, but the shutdownfn can be
147 * Note that due to the current zsd implementation, when the create
148 * function is called the zone isn't fully present, thus functions
149 * like zone_find_by_* will fail, hence the create function can not
150 * use many zones kernel functions including zcmn_err().
153 netstack_register(int moduleid
,
154 void *(*module_create
)(netstackid_t
, netstack_t
*),
155 void (*module_shutdown
)(netstackid_t
, void *),
156 void (*module_destroy
)(netstackid_t
, void *))
160 ASSERT(netstack_initialized
);
161 ASSERT(moduleid
>= 0 && moduleid
< NS_MAX
);
162 ASSERT(module_create
!= NULL
);
165 * Make instances created after this point in time run the create
168 mutex_enter(&netstack_g_lock
);
169 ASSERT(ns_reg
[moduleid
].nr_create
== NULL
);
170 ASSERT(ns_reg
[moduleid
].nr_flags
== 0);
171 ns_reg
[moduleid
].nr_create
= module_create
;
172 ns_reg
[moduleid
].nr_shutdown
= module_shutdown
;
173 ns_reg
[moduleid
].nr_destroy
= module_destroy
;
174 ns_reg
[moduleid
].nr_flags
= NRF_REGISTERED
;
177 * Determine the set of stacks that exist before we drop the lock.
178 * Set NSS_CREATE_NEEDED for each of those.
179 * netstacks which have been deleted will have NSS_CREATE_COMPLETED
180 * set, but check NSF_CLOSING to be sure.
182 for (ns
= netstack_head
; ns
!= NULL
; ns
= ns
->netstack_next
) {
183 nm_state_t
*nms
= &ns
->netstack_m_state
[moduleid
];
185 mutex_enter(&ns
->netstack_lock
);
186 if (!(ns
->netstack_flags
& NSF_CLOSING
) &&
187 (nms
->nms_flags
& NSS_CREATE_ALL
) == 0) {
188 nms
->nms_flags
|= NSS_CREATE_NEEDED
;
189 DTRACE_PROBE2(netstack__create__needed
,
190 netstack_t
*, ns
, int, moduleid
);
192 mutex_exit(&ns
->netstack_lock
);
194 mutex_exit(&netstack_g_lock
);
197 * At this point in time a new instance can be created or an instance
198 * can be destroyed, or some other module can register or unregister.
199 * Make sure we either run all the create functions for this moduleid
200 * or we wait for any other creators for this moduleid.
202 apply_all_netstacks(moduleid
, netstack_apply_create
);
206 netstack_unregister(int moduleid
)
210 ASSERT(moduleid
>= 0 && moduleid
< NS_MAX
);
212 ASSERT(ns_reg
[moduleid
].nr_create
!= NULL
);
213 ASSERT(ns_reg
[moduleid
].nr_flags
& NRF_REGISTERED
);
215 mutex_enter(&netstack_g_lock
);
217 * Determine the set of stacks that exist before we drop the lock.
218 * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
219 * That ensures that when we return all the callbacks for existing
220 * instances have completed. And since we set NRF_DYING no new
221 * instances can use this module.
223 for (ns
= netstack_head
; ns
!= NULL
; ns
= ns
->netstack_next
) {
224 boolean_t created
= B_FALSE
;
225 nm_state_t
*nms
= &ns
->netstack_m_state
[moduleid
];
227 mutex_enter(&ns
->netstack_lock
);
230 * We need to be careful here. We could actually have a netstack
231 * being created as we speak waiting for us to let go of this
232 * lock to proceed. It may have set NSS_CREATE_NEEDED, but not
233 * have gotten to the point of completing it yet. If
234 * NSS_CREATE_NEEDED, we can safely just remove it here and
235 * never create the module. However, if NSS_CREATE_INPROGRESS is
236 * set, we need to still flag this module for shutdown and
237 * deletion, just as though it had reached NSS_CREATE_COMPLETED.
239 * It is safe to do that because of two different guarantees
240 * that exist in the system. The first is that before we do a
241 * create, shutdown, or destroy, we ensure that nothing else is
242 * in progress in the system for this netstack and wait for it
243 * to complete. Secondly, because the zone is being created, we
244 * know that the following call to apply_all_netstack will block
245 * on the zone finishing its initialization.
247 if (nms
->nms_flags
& NSS_CREATE_NEEDED
)
248 nms
->nms_flags
&= ~NSS_CREATE_NEEDED
;
250 if (nms
->nms_flags
& NSS_CREATE_INPROGRESS
||
251 nms
->nms_flags
& NSS_CREATE_COMPLETED
)
254 if (ns_reg
[moduleid
].nr_shutdown
!= NULL
&& created
&&
255 (nms
->nms_flags
& NSS_CREATE_COMPLETED
) &&
256 (nms
->nms_flags
& NSS_SHUTDOWN_ALL
) == 0) {
257 nms
->nms_flags
|= NSS_SHUTDOWN_NEEDED
;
258 DTRACE_PROBE2(netstack__shutdown__needed
,
259 netstack_t
*, ns
, int, moduleid
);
261 if ((ns_reg
[moduleid
].nr_flags
& NRF_REGISTERED
) &&
262 ns_reg
[moduleid
].nr_destroy
!= NULL
&& created
&&
263 (nms
->nms_flags
& NSS_DESTROY_ALL
) == 0) {
264 nms
->nms_flags
|= NSS_DESTROY_NEEDED
;
265 DTRACE_PROBE2(netstack__destroy__needed
,
266 netstack_t
*, ns
, int, moduleid
);
268 mutex_exit(&ns
->netstack_lock
);
271 * Prevent any new netstack from calling the registered create
272 * function, while keeping the function pointers in place until the
273 * shutdown and destroy callbacks are complete.
275 ns_reg
[moduleid
].nr_flags
|= NRF_DYING
;
276 mutex_exit(&netstack_g_lock
);
278 apply_all_netstacks(moduleid
, netstack_apply_shutdown
);
279 apply_all_netstacks(moduleid
, netstack_apply_destroy
);
282 * Clear the nms_flags so that we can handle this module
283 * being loaded again.
284 * Also remove the registered functions.
286 mutex_enter(&netstack_g_lock
);
287 ASSERT(ns_reg
[moduleid
].nr_flags
& NRF_REGISTERED
);
288 ASSERT(ns_reg
[moduleid
].nr_flags
& NRF_DYING
);
289 for (ns
= netstack_head
; ns
!= NULL
; ns
= ns
->netstack_next
) {
290 nm_state_t
*nms
= &ns
->netstack_m_state
[moduleid
];
292 mutex_enter(&ns
->netstack_lock
);
293 if (nms
->nms_flags
& NSS_DESTROY_COMPLETED
) {
295 DTRACE_PROBE2(netstack__destroy__done
,
296 netstack_t
*, ns
, int, moduleid
);
298 mutex_exit(&ns
->netstack_lock
);
301 ns_reg
[moduleid
].nr_create
= NULL
;
302 ns_reg
[moduleid
].nr_shutdown
= NULL
;
303 ns_reg
[moduleid
].nr_destroy
= NULL
;
304 ns_reg
[moduleid
].nr_flags
= 0;
305 mutex_exit(&netstack_g_lock
);
309 * Lookup and/or allocate a netstack for this zone.
312 netstack_zone_create(zoneid_t zoneid
)
314 netstackid_t stackid
;
320 ASSERT(netstack_initialized
);
322 zone
= zone_find_by_id_nolock(zoneid
);
323 ASSERT(zone
!= NULL
);
325 if (zone
->zone_flags
& ZF_NET_EXCL
) {
328 /* Look for the stack instance for the global */
329 stackid
= GLOBAL_NETSTACKID
;
332 /* Allocate even if it isn't needed; simplifies locking */
333 ns
= (netstack_t
*)kmem_zalloc(sizeof (netstack_t
), KM_SLEEP
);
335 /* Look if there is a matching stack instance */
336 mutex_enter(&netstack_g_lock
);
337 for (nsp
= &netstack_head
; *nsp
!= NULL
;
338 nsp
= &((*nsp
)->netstack_next
)) {
339 if ((*nsp
)->netstack_stackid
== stackid
) {
341 * Should never find a pre-existing exclusive stack
343 VERIFY(stackid
== GLOBAL_NETSTACKID
);
344 kmem_free(ns
, sizeof (netstack_t
));
346 mutex_enter(&ns
->netstack_lock
);
347 ns
->netstack_numzones
++;
348 mutex_exit(&ns
->netstack_lock
);
349 mutex_exit(&netstack_g_lock
);
350 DTRACE_PROBE1(netstack__inc__numzones
,
352 /* Record that we have a new shared stack zone */
353 netstack_shared_zone_add(zoneid
);
354 zone
->zone_netstack
= ns
;
359 mutex_init(&ns
->netstack_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
360 cv_init(&ns
->netstack_cv
, NULL
, CV_DEFAULT
, NULL
);
361 ns
->netstack_stackid
= zoneid
;
362 ns
->netstack_numzones
= 1;
363 ns
->netstack_refcnt
= 1; /* Decremented by netstack_zone_destroy */
364 ns
->netstack_flags
= NSF_UNINIT
;
366 zone
->zone_netstack
= ns
;
368 mutex_enter(&ns
->netstack_lock
);
370 * Mark this netstack as having a CREATE running so
371 * any netstack_register/netstack_unregister waits for
372 * the existing create callbacks to complete in moduleid order
374 ns
->netstack_flags
|= NSF_ZONE_CREATE
;
377 * Determine the set of module create functions that need to be
378 * called before we drop the lock.
379 * Set NSS_CREATE_NEEDED for each of those.
380 * Skip any with NRF_DYING set, since those are in the process of
381 * going away, by checking for flags being exactly NRF_REGISTERED.
383 for (i
= 0; i
< NS_MAX
; i
++) {
384 nm_state_t
*nms
= &ns
->netstack_m_state
[i
];
386 cv_init(&nms
->nms_cv
, NULL
, CV_DEFAULT
, NULL
);
388 if ((ns_reg
[i
].nr_flags
== NRF_REGISTERED
) &&
389 (nms
->nms_flags
& NSS_CREATE_ALL
) == 0) {
390 nms
->nms_flags
|= NSS_CREATE_NEEDED
;
391 DTRACE_PROBE2(netstack__create__needed
,
392 netstack_t
*, ns
, int, i
);
395 mutex_exit(&ns
->netstack_lock
);
396 mutex_exit(&netstack_g_lock
);
398 apply_all_modules(ns
, netstack_apply_create
);
400 /* Tell any waiting netstack_register/netstack_unregister to proceed */
401 mutex_enter(&ns
->netstack_lock
);
402 ns
->netstack_flags
&= ~NSF_UNINIT
;
403 ASSERT(ns
->netstack_flags
& NSF_ZONE_CREATE
);
404 ns
->netstack_flags
&= ~NSF_ZONE_CREATE
;
405 cv_broadcast(&ns
->netstack_cv
);
406 mutex_exit(&ns
->netstack_lock
);
413 netstack_zone_shutdown(zoneid_t zoneid
, void *arg
)
415 netstack_t
*ns
= (netstack_t
*)arg
;
420 mutex_enter(&ns
->netstack_lock
);
421 ASSERT(ns
->netstack_numzones
> 0);
422 if (ns
->netstack_numzones
!= 1) {
423 /* Stack instance being used by other zone */
424 mutex_exit(&ns
->netstack_lock
);
425 ASSERT(ns
->netstack_stackid
== GLOBAL_NETSTACKID
);
428 mutex_exit(&ns
->netstack_lock
);
430 mutex_enter(&netstack_g_lock
);
431 mutex_enter(&ns
->netstack_lock
);
433 * Mark this netstack as having a SHUTDOWN running so
434 * any netstack_register/netstack_unregister waits for
435 * the existing create callbacks to complete in moduleid order
437 ASSERT(!(ns
->netstack_flags
& NSF_ZONE_INPROGRESS
));
438 ns
->netstack_flags
|= NSF_ZONE_SHUTDOWN
;
441 * Determine the set of stacks that exist before we drop the lock.
442 * Set NSS_SHUTDOWN_NEEDED for each of those.
444 for (i
= 0; i
< NS_MAX
; i
++) {
445 nm_state_t
*nms
= &ns
->netstack_m_state
[i
];
447 if ((ns_reg
[i
].nr_flags
& NRF_REGISTERED
) &&
448 ns_reg
[i
].nr_shutdown
!= NULL
&&
449 (nms
->nms_flags
& NSS_CREATE_COMPLETED
) &&
450 (nms
->nms_flags
& NSS_SHUTDOWN_ALL
) == 0) {
451 nms
->nms_flags
|= NSS_SHUTDOWN_NEEDED
;
452 DTRACE_PROBE2(netstack__shutdown__needed
,
453 netstack_t
*, ns
, int, i
);
456 mutex_exit(&ns
->netstack_lock
);
457 mutex_exit(&netstack_g_lock
);
460 * Call the shutdown function for all registered modules for this
463 apply_all_modules_reverse(ns
, netstack_apply_shutdown
);
465 /* Tell any waiting netstack_register/netstack_unregister to proceed */
466 mutex_enter(&ns
->netstack_lock
);
467 ASSERT(ns
->netstack_flags
& NSF_ZONE_SHUTDOWN
);
468 ns
->netstack_flags
&= ~NSF_ZONE_SHUTDOWN
;
469 cv_broadcast(&ns
->netstack_cv
);
470 mutex_exit(&ns
->netstack_lock
);
474 * Common routine to release a zone.
475 * If this was the last zone using the stack instance then prepare to
476 * have the refcnt dropping to zero free the zone.
480 netstack_zone_destroy(zoneid_t zoneid
, void *arg
)
482 netstack_t
*ns
= (netstack_t
*)arg
;
486 mutex_enter(&ns
->netstack_lock
);
487 ASSERT(ns
->netstack_numzones
> 0);
488 ns
->netstack_numzones
--;
489 if (ns
->netstack_numzones
!= 0) {
490 /* Stack instance being used by other zone */
491 mutex_exit(&ns
->netstack_lock
);
492 ASSERT(ns
->netstack_stackid
== GLOBAL_NETSTACKID
);
493 /* Record that we a shared stack zone has gone away */
494 netstack_shared_zone_remove(zoneid
);
498 * Set CLOSING so that netstack_find_by will not find it.
500 ns
->netstack_flags
|= NSF_CLOSING
;
501 mutex_exit(&ns
->netstack_lock
);
502 DTRACE_PROBE1(netstack__dec__numzones
, netstack_t
*, ns
);
503 /* No other thread can call zone_destroy for this stack */
506 * Decrease refcnt to account for the one in netstack_zone_init()
512 * Called when the reference count drops to zero.
513 * Call the destroy functions for each registered module.
516 netstack_stack_inactive(netstack_t
*ns
)
520 mutex_enter(&netstack_g_lock
);
521 mutex_enter(&ns
->netstack_lock
);
523 * Mark this netstack as having a DESTROY running so
524 * any netstack_register/netstack_unregister waits for
525 * the existing destroy callbacks to complete in reverse moduleid order
527 ASSERT(!(ns
->netstack_flags
& NSF_ZONE_INPROGRESS
));
528 ns
->netstack_flags
|= NSF_ZONE_DESTROY
;
530 * If the shutdown callback wasn't called earlier (e.g., if this is
531 * a netstack shared between multiple zones), then we schedule it now.
533 * Determine the set of stacks that exist before we drop the lock.
534 * Set NSS_DESTROY_NEEDED for each of those. That
535 * ensures that when we return all the callbacks for existing
536 * instances have completed.
538 for (i
= 0; i
< NS_MAX
; i
++) {
539 nm_state_t
*nms
= &ns
->netstack_m_state
[i
];
541 if ((ns_reg
[i
].nr_flags
& NRF_REGISTERED
) &&
542 ns_reg
[i
].nr_shutdown
!= NULL
&&
543 (nms
->nms_flags
& NSS_CREATE_COMPLETED
) &&
544 (nms
->nms_flags
& NSS_SHUTDOWN_ALL
) == 0) {
545 nms
->nms_flags
|= NSS_SHUTDOWN_NEEDED
;
546 DTRACE_PROBE2(netstack__shutdown__needed
,
547 netstack_t
*, ns
, int, i
);
550 if ((ns_reg
[i
].nr_flags
& NRF_REGISTERED
) &&
551 ns_reg
[i
].nr_destroy
!= NULL
&&
552 (nms
->nms_flags
& NSS_CREATE_COMPLETED
) &&
553 (nms
->nms_flags
& NSS_DESTROY_ALL
) == 0) {
554 nms
->nms_flags
|= NSS_DESTROY_NEEDED
;
555 DTRACE_PROBE2(netstack__destroy__needed
,
556 netstack_t
*, ns
, int, i
);
559 mutex_exit(&ns
->netstack_lock
);
560 mutex_exit(&netstack_g_lock
);
563 * Call the shutdown and destroy functions for all registered modules
566 * Since there are some ordering dependencies between the modules we
567 * tear them down in the reverse order of what was used to create them.
569 * Since a netstack_t is never reused (when a zone is rebooted it gets
570 * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
571 * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
572 * That is different than in the netstack_unregister() case.
574 apply_all_modules_reverse(ns
, netstack_apply_shutdown
);
575 apply_all_modules_reverse(ns
, netstack_apply_destroy
);
577 /* Tell any waiting netstack_register/netstack_unregister to proceed */
578 mutex_enter(&ns
->netstack_lock
);
579 ASSERT(ns
->netstack_flags
& NSF_ZONE_DESTROY
);
580 ns
->netstack_flags
&= ~NSF_ZONE_DESTROY
;
581 cv_broadcast(&ns
->netstack_cv
);
582 mutex_exit(&ns
->netstack_lock
);
586 * Apply a function to all netstacks for a particular moduleid.
588 * If there is any zone activity (due to a zone being created, shutdown,
589 * or destroyed) we wait for that to complete before we proceed. This ensures
590 * that the moduleids are processed in order when a zone is created or
593 * The applyfn has to drop netstack_g_lock if it does some work.
594 * In that case we don't follow netstack_next,
595 * even if it is possible to do so without any hazards. This is
596 * because we want the design to allow for the list of netstacks threaded
597 * by netstack_next to change in any arbitrary way during the time the
600 * It is safe to restart the loop at netstack_head since the applyfn
601 * changes netstack_m_state as it processes things, so a subsequent
602 * pass through will have no effect in applyfn, hence the loop will terminate
603 * in at worst O(N^2).
606 apply_all_netstacks(int moduleid
, applyfn_t
*applyfn
)
610 mutex_enter(&netstack_g_lock
);
613 if (wait_for_zone_creator(ns
, &netstack_g_lock
)) {
614 /* Lock dropped - restart at head */
616 } else if ((applyfn
)(&netstack_g_lock
, ns
, moduleid
)) {
617 /* Lock dropped - restart at head */
620 ns
= ns
->netstack_next
;
623 mutex_exit(&netstack_g_lock
);
627 * Apply a function to all moduleids for a particular netstack.
629 * Since the netstack linkage doesn't matter in this case we can
630 * ignore whether the function drops the lock.
633 apply_all_modules(netstack_t
*ns
, applyfn_t
*applyfn
)
637 mutex_enter(&netstack_g_lock
);
638 for (i
= 0; i
< NS_MAX
; i
++) {
640 * We don't care whether the lock was dropped
641 * since we are not iterating over netstack_head.
643 (void) (applyfn
)(&netstack_g_lock
, ns
, i
);
645 mutex_exit(&netstack_g_lock
);
648 /* Like the above but in reverse moduleid order */
650 apply_all_modules_reverse(netstack_t
*ns
, applyfn_t
*applyfn
)
654 mutex_enter(&netstack_g_lock
);
655 for (i
= NS_MAX
-1; i
>= 0; i
--) {
657 * We don't care whether the lock was dropped
658 * since we are not iterating over netstack_head.
660 (void) (applyfn
)(&netstack_g_lock
, ns
, i
);
662 mutex_exit(&netstack_g_lock
);
666 * Call the create function for the ns and moduleid if CREATE_NEEDED
668 * If some other thread gets here first and sets *_INPROGRESS, then
669 * we wait for that thread to complete so that we can ensure that
670 * all the callbacks are done when we've looped over all netstacks/moduleids.
672 * When we call the create function, we temporarily drop the netstack_lock
673 * held by the caller, and return true to tell the caller it needs to
674 * re-evalute the state.
677 netstack_apply_create(kmutex_t
*lockp
, netstack_t
*ns
, int moduleid
)
680 netstackid_t stackid
;
681 nm_state_t
*nms
= &ns
->netstack_m_state
[moduleid
];
682 boolean_t dropped
= B_FALSE
;
684 ASSERT(MUTEX_HELD(lockp
));
685 mutex_enter(&ns
->netstack_lock
);
687 if (wait_for_nms_inprogress(ns
, nms
, lockp
))
690 if (nms
->nms_flags
& NSS_CREATE_NEEDED
) {
691 nms
->nms_flags
&= ~NSS_CREATE_NEEDED
;
692 nms
->nms_flags
|= NSS_CREATE_INPROGRESS
;
693 DTRACE_PROBE2(netstack__create__inprogress
,
694 netstack_t
*, ns
, int, moduleid
);
695 mutex_exit(&ns
->netstack_lock
);
699 ASSERT(ns_reg
[moduleid
].nr_create
!= NULL
);
700 stackid
= ns
->netstack_stackid
;
701 DTRACE_PROBE2(netstack__create__start
,
702 netstackid_t
, stackid
,
704 result
= (ns_reg
[moduleid
].nr_create
)(stackid
, ns
);
705 DTRACE_PROBE2(netstack__create__end
,
706 void *, result
, netstack_t
*, ns
);
708 ASSERT(result
!= NULL
);
710 mutex_enter(&ns
->netstack_lock
);
711 ns
->netstack_modules
[moduleid
] = result
;
712 nms
->nms_flags
&= ~NSS_CREATE_INPROGRESS
;
713 nms
->nms_flags
|= NSS_CREATE_COMPLETED
;
714 cv_broadcast(&nms
->nms_cv
);
715 DTRACE_PROBE2(netstack__create__completed
,
716 netstack_t
*, ns
, int, moduleid
);
717 mutex_exit(&ns
->netstack_lock
);
720 mutex_exit(&ns
->netstack_lock
);
726 * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
728 * If some other thread gets here first and sets *_INPROGRESS, then
729 * we wait for that thread to complete so that we can ensure that
730 * all the callbacks are done when we've looped over all netstacks/moduleids.
732 * When we call the shutdown function, we temporarily drop the netstack_lock
733 * held by the caller, and return true to tell the caller it needs to
734 * re-evalute the state.
737 netstack_apply_shutdown(kmutex_t
*lockp
, netstack_t
*ns
, int moduleid
)
739 netstackid_t stackid
;
740 void * netstack_module
;
741 nm_state_t
*nms
= &ns
->netstack_m_state
[moduleid
];
742 boolean_t dropped
= B_FALSE
;
744 ASSERT(MUTEX_HELD(lockp
));
745 mutex_enter(&ns
->netstack_lock
);
747 if (wait_for_nms_inprogress(ns
, nms
, lockp
))
750 if (nms
->nms_flags
& NSS_SHUTDOWN_NEEDED
) {
751 nms
->nms_flags
&= ~NSS_SHUTDOWN_NEEDED
;
752 nms
->nms_flags
|= NSS_SHUTDOWN_INPROGRESS
;
753 DTRACE_PROBE2(netstack__shutdown__inprogress
,
754 netstack_t
*, ns
, int, moduleid
);
755 mutex_exit(&ns
->netstack_lock
);
759 ASSERT(ns_reg
[moduleid
].nr_shutdown
!= NULL
);
760 stackid
= ns
->netstack_stackid
;
761 netstack_module
= ns
->netstack_modules
[moduleid
];
762 DTRACE_PROBE2(netstack__shutdown__start
,
763 netstackid_t
, stackid
,
764 void *, netstack_module
);
765 (ns_reg
[moduleid
].nr_shutdown
)(stackid
, netstack_module
);
766 DTRACE_PROBE1(netstack__shutdown__end
,
770 mutex_enter(&ns
->netstack_lock
);
771 nms
->nms_flags
&= ~NSS_SHUTDOWN_INPROGRESS
;
772 nms
->nms_flags
|= NSS_SHUTDOWN_COMPLETED
;
773 cv_broadcast(&nms
->nms_cv
);
774 DTRACE_PROBE2(netstack__shutdown__completed
,
775 netstack_t
*, ns
, int, moduleid
);
776 mutex_exit(&ns
->netstack_lock
);
779 mutex_exit(&ns
->netstack_lock
);
785 * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
787 * If some other thread gets here first and sets *_INPROGRESS, then
788 * we wait for that thread to complete so that we can ensure that
789 * all the callbacks are done when we've looped over all netstacks/moduleids.
791 * When we call the destroy function, we temporarily drop the netstack_lock
792 * held by the caller, and return true to tell the caller it needs to
793 * re-evalute the state.
796 netstack_apply_destroy(kmutex_t
*lockp
, netstack_t
*ns
, int moduleid
)
798 netstackid_t stackid
;
799 void * netstack_module
;
800 nm_state_t
*nms
= &ns
->netstack_m_state
[moduleid
];
801 boolean_t dropped
= B_FALSE
;
803 ASSERT(MUTEX_HELD(lockp
));
804 mutex_enter(&ns
->netstack_lock
);
806 if (wait_for_nms_inprogress(ns
, nms
, lockp
))
809 if (nms
->nms_flags
& NSS_DESTROY_NEEDED
) {
810 nms
->nms_flags
&= ~NSS_DESTROY_NEEDED
;
811 nms
->nms_flags
|= NSS_DESTROY_INPROGRESS
;
812 DTRACE_PROBE2(netstack__destroy__inprogress
,
813 netstack_t
*, ns
, int, moduleid
);
814 mutex_exit(&ns
->netstack_lock
);
818 ASSERT(ns_reg
[moduleid
].nr_destroy
!= NULL
);
819 stackid
= ns
->netstack_stackid
;
820 netstack_module
= ns
->netstack_modules
[moduleid
];
821 DTRACE_PROBE2(netstack__destroy__start
,
822 netstackid_t
, stackid
,
823 void *, netstack_module
);
824 (ns_reg
[moduleid
].nr_destroy
)(stackid
, netstack_module
);
825 DTRACE_PROBE1(netstack__destroy__end
,
829 mutex_enter(&ns
->netstack_lock
);
830 ns
->netstack_modules
[moduleid
] = NULL
;
831 nms
->nms_flags
&= ~NSS_DESTROY_INPROGRESS
;
832 nms
->nms_flags
|= NSS_DESTROY_COMPLETED
;
833 cv_broadcast(&nms
->nms_cv
);
834 DTRACE_PROBE2(netstack__destroy__completed
,
835 netstack_t
*, ns
, int, moduleid
);
836 mutex_exit(&ns
->netstack_lock
);
839 mutex_exit(&ns
->netstack_lock
);
845 * If somebody is creating the netstack (due to a new zone being created)
846 * then we wait for them to complete. This ensures that any additional
847 * netstack_register() doesn't cause the create functions to run out of
849 * Note that we do not need such a global wait in the case of the shutdown
850 * and destroy callbacks, since in that case it is sufficient for both
851 * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
852 * Returns true if lockp was temporarily dropped while waiting.
855 wait_for_zone_creator(netstack_t
*ns
, kmutex_t
*lockp
)
857 boolean_t dropped
= B_FALSE
;
859 mutex_enter(&ns
->netstack_lock
);
860 while (ns
->netstack_flags
& NSF_ZONE_CREATE
) {
861 DTRACE_PROBE1(netstack__wait__zone__inprogress
,
867 cv_wait(&ns
->netstack_cv
, &ns
->netstack_lock
);
869 /* First drop netstack_lock to preserve order */
870 mutex_exit(&ns
->netstack_lock
);
872 mutex_enter(&ns
->netstack_lock
);
875 mutex_exit(&ns
->netstack_lock
);
880 * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
882 * Returns true if lockp was temporarily dropped while waiting.
885 wait_for_nms_inprogress(netstack_t
*ns
, nm_state_t
*nms
, kmutex_t
*lockp
)
887 boolean_t dropped
= B_FALSE
;
889 while (nms
->nms_flags
& NSS_ALL_INPROGRESS
) {
890 DTRACE_PROBE2(netstack__wait__nms__inprogress
,
891 netstack_t
*, ns
, nm_state_t
*, nms
);
896 cv_wait(&nms
->nms_cv
, &ns
->netstack_lock
);
898 /* First drop netstack_lock to preserve order */
899 mutex_exit(&ns
->netstack_lock
);
901 mutex_enter(&ns
->netstack_lock
);
908 * Get the stack instance used in caller's zone.
909 * Increases the reference count, caller must do a netstack_rele.
910 * It can't be called after zone_destroy() has started.
913 netstack_get_current(void)
917 ns
= curproc
->p_zone
->zone_netstack
;
919 if (ns
->netstack_flags
& (NSF_UNINIT
|NSF_CLOSING
))
928 * Find a stack instance given the cred.
929 * This is used by the modules to potentially allow for a future when
930 * something other than the zoneid is used to determine the stack.
933 netstack_find_by_cred(const cred_t
*cr
)
935 zoneid_t zoneid
= crgetzoneid(cr
);
937 /* Handle the case when cr_zone is NULL */
938 if (zoneid
== (zoneid_t
)-1)
939 zoneid
= GLOBAL_ZONEID
;
941 /* For performance ... */
942 if (curproc
->p_zone
->zone_id
== zoneid
)
943 return (netstack_get_current());
945 return (netstack_find_by_zoneid(zoneid
));
949 * Find a stack instance given the zoneid.
950 * Increases the reference count if found; caller must do a
953 * If there is no exact match then assume the shared stack instance
956 * Skip the unitialized ones.
959 netstack_find_by_zoneid(zoneid_t zoneid
)
964 zone
= zone_find_by_id(zoneid
);
969 ns
= zone
->zone_netstack
;
971 if (ns
->netstack_flags
& (NSF_UNINIT
|NSF_CLOSING
))
981 * Find a stack instance given the zoneid. Can only be called from
982 * the create callback. See the comments in zone_find_by_id_nolock why
983 * that limitation exists.
985 * Increases the reference count if found; caller must do a
988 * If there is no exact match then assume the shared stack instance
991 * Skip the unitialized ones.
994 netstack_find_by_zoneid_nolock(zoneid_t zoneid
)
999 zone
= zone_find_by_id_nolock(zoneid
);
1004 ns
= zone
->zone_netstack
;
1007 if (ns
->netstack_flags
& (NSF_UNINIT
|NSF_CLOSING
))
1012 /* zone_find_by_id_nolock does not have a hold on the zone */
1017 * Find a stack instance given the stackid with exact match?
1018 * Increases the reference count if found; caller must do a
1021 * Skip the unitialized ones.
1024 netstack_find_by_stackid(netstackid_t stackid
)
1028 mutex_enter(&netstack_g_lock
);
1029 for (ns
= netstack_head
; ns
!= NULL
; ns
= ns
->netstack_next
) {
1030 mutex_enter(&ns
->netstack_lock
);
1031 if (ns
->netstack_stackid
== stackid
&&
1032 !(ns
->netstack_flags
& (NSF_UNINIT
|NSF_CLOSING
))) {
1033 mutex_exit(&ns
->netstack_lock
);
1035 mutex_exit(&netstack_g_lock
);
1038 mutex_exit(&ns
->netstack_lock
);
1040 mutex_exit(&netstack_g_lock
);
1045 netstack_inuse_by_stackid(netstackid_t stackid
)
1048 boolean_t rval
= B_FALSE
;
1050 mutex_enter(&netstack_g_lock
);
1052 for (ns
= netstack_head
; ns
!= NULL
; ns
= ns
->netstack_next
) {
1053 if (ns
->netstack_stackid
== stackid
) {
1059 mutex_exit(&netstack_g_lock
);
1065 netstack_rele(netstack_t
*ns
)
1069 int refcnt
, numzones
;
1072 mutex_enter(&ns
->netstack_lock
);
1073 ASSERT(ns
->netstack_refcnt
> 0);
1074 ns
->netstack_refcnt
--;
1076 * As we drop the lock additional netstack_rele()s can come in
1077 * and decrement the refcnt to zero and free the netstack_t.
1078 * Store pointers in local variables and if we were not the last
1079 * then don't reference the netstack_t after that.
1081 refcnt
= ns
->netstack_refcnt
;
1082 numzones
= ns
->netstack_numzones
;
1083 DTRACE_PROBE1(netstack__dec__ref
, netstack_t
*, ns
);
1084 mutex_exit(&ns
->netstack_lock
);
1086 if (refcnt
== 0 && numzones
== 0) {
1088 * Time to call the destroy functions and free up
1091 netstack_stack_inactive(ns
);
1093 /* Make sure nothing increased the references */
1094 ASSERT(ns
->netstack_refcnt
== 0);
1095 ASSERT(ns
->netstack_numzones
== 0);
1097 /* Finally remove from list of netstacks */
1098 mutex_enter(&netstack_g_lock
);
1100 for (nsp
= &netstack_head
; *nsp
!= NULL
;
1101 nsp
= &(*nsp
)->netstack_next
) {
1103 *nsp
= ns
->netstack_next
;
1104 ns
->netstack_next
= NULL
;
1110 mutex_exit(&netstack_g_lock
);
1112 /* Make sure nothing increased the references */
1113 ASSERT(ns
->netstack_refcnt
== 0);
1114 ASSERT(ns
->netstack_numzones
== 0);
1116 ASSERT(ns
->netstack_flags
& NSF_CLOSING
);
1118 for (i
= 0; i
< NS_MAX
; i
++) {
1119 nm_state_t
*nms
= &ns
->netstack_m_state
[i
];
1121 cv_destroy(&nms
->nms_cv
);
1123 mutex_destroy(&ns
->netstack_lock
);
1124 cv_destroy(&ns
->netstack_cv
);
1125 kmem_free(ns
, sizeof (*ns
));
1130 netstack_hold(netstack_t
*ns
)
1132 mutex_enter(&ns
->netstack_lock
);
1133 ns
->netstack_refcnt
++;
1134 ASSERT(ns
->netstack_refcnt
> 0);
1135 mutex_exit(&ns
->netstack_lock
);
1136 DTRACE_PROBE1(netstack__inc__ref
, netstack_t
*, ns
);
1140 * To support kstat_create_netstack() using kstat_zone_add we need
1142 * - all zoneids that use the global/shared stack
1143 * - all kstats that have been added for the shared stack
1146 kstat_create_netstack(char *ks_module
, int ks_instance
, char *ks_name
,
1147 char *ks_class
, uchar_t ks_type
, uint_t ks_ndata
, uchar_t ks_flags
,
1148 netstackid_t ks_netstackid
)
1152 if (ks_netstackid
== GLOBAL_NETSTACKID
) {
1153 ks
= kstat_create_zone(ks_module
, ks_instance
, ks_name
,
1154 ks_class
, ks_type
, ks_ndata
, ks_flags
, GLOBAL_ZONEID
);
1156 netstack_shared_kstat_add(ks
);
1159 zoneid_t zoneid
= ks_netstackid
;
1161 return (kstat_create_zone(ks_module
, ks_instance
, ks_name
,
1162 ks_class
, ks_type
, ks_ndata
, ks_flags
, zoneid
));
1167 kstat_delete_netstack(kstat_t
*ks
, netstackid_t ks_netstackid
)
1169 if (ks_netstackid
== GLOBAL_NETSTACKID
) {
1170 netstack_shared_kstat_remove(ks
);
1176 netstack_shared_zone_add(zoneid_t zoneid
)
1178 struct shared_zone_list
*sz
;
1179 struct shared_kstat_list
*sk
;
1181 sz
= (struct shared_zone_list
*)kmem_zalloc(sizeof (*sz
), KM_SLEEP
);
1182 sz
->sz_zoneid
= zoneid
;
1184 /* Insert in list */
1185 mutex_enter(&netstack_shared_lock
);
1186 sz
->sz_next
= netstack_shared_zones
;
1187 netstack_shared_zones
= sz
;
1190 * Perform kstat_zone_add for each existing shared stack kstat.
1191 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1193 for (sk
= netstack_shared_kstats
; sk
!= NULL
; sk
= sk
->sk_next
) {
1194 kstat_zone_add(sk
->sk_kstat
, zoneid
);
1196 mutex_exit(&netstack_shared_lock
);
1200 netstack_shared_zone_remove(zoneid_t zoneid
)
1202 struct shared_zone_list
**szp
, *sz
;
1203 struct shared_kstat_list
*sk
;
1206 mutex_enter(&netstack_shared_lock
);
1208 for (szp
= &netstack_shared_zones
; *szp
!= NULL
;
1209 szp
= &((*szp
)->sz_next
)) {
1210 if ((*szp
)->sz_zoneid
== zoneid
) {
1215 /* We must find it */
1221 * Perform kstat_zone_remove for each existing shared stack kstat.
1222 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1224 for (sk
= netstack_shared_kstats
; sk
!= NULL
; sk
= sk
->sk_next
) {
1225 kstat_zone_remove(sk
->sk_kstat
, zoneid
);
1227 mutex_exit(&netstack_shared_lock
);
1229 kmem_free(sz
, sizeof (*sz
));
1233 netstack_shared_kstat_add(kstat_t
*ks
)
1235 struct shared_zone_list
*sz
;
1236 struct shared_kstat_list
*sk
;
1238 sk
= (struct shared_kstat_list
*)kmem_zalloc(sizeof (*sk
), KM_SLEEP
);
1241 /* Insert in list */
1242 mutex_enter(&netstack_shared_lock
);
1243 sk
->sk_next
= netstack_shared_kstats
;
1244 netstack_shared_kstats
= sk
;
1247 * Perform kstat_zone_add for each existing shared stack zone.
1248 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1250 for (sz
= netstack_shared_zones
; sz
!= NULL
; sz
= sz
->sz_next
) {
1251 kstat_zone_add(ks
, sz
->sz_zoneid
);
1253 mutex_exit(&netstack_shared_lock
);
1257 netstack_shared_kstat_remove(kstat_t
*ks
)
1259 struct shared_zone_list
*sz
;
1260 struct shared_kstat_list
**skp
, *sk
;
1263 mutex_enter(&netstack_shared_lock
);
1265 for (skp
= &netstack_shared_kstats
; *skp
!= NULL
;
1266 skp
= &((*skp
)->sk_next
)) {
1267 if ((*skp
)->sk_kstat
== ks
) {
1278 * Perform kstat_zone_remove for each existing shared stack kstat.
1279 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1281 for (sz
= netstack_shared_zones
; sz
!= NULL
; sz
= sz
->sz_next
) {
1282 kstat_zone_remove(ks
, sz
->sz_zoneid
);
1284 mutex_exit(&netstack_shared_lock
);
1285 kmem_free(sk
, sizeof (*sk
));
1289 * If a zoneid is part of the shared zone, return true
1292 netstack_find_shared_zoneid(zoneid_t zoneid
)
1294 struct shared_zone_list
*sz
;
1296 mutex_enter(&netstack_shared_lock
);
1297 for (sz
= netstack_shared_zones
; sz
!= NULL
; sz
= sz
->sz_next
) {
1298 if (sz
->sz_zoneid
== zoneid
) {
1299 mutex_exit(&netstack_shared_lock
);
1303 mutex_exit(&netstack_shared_lock
);
1308 * Hide the fact that zoneids and netstackids are allocated from
1309 * the same space in the current implementation.
1310 * We currently do not check that the stackid/zoneids are valid, since there
1311 * is no need for that. But this should only be done for ids that are
1315 netstackid_to_zoneid(netstackid_t stackid
)
1321 zoneid_to_netstackid(zoneid_t zoneid
)
1323 if (netstack_find_shared_zoneid(zoneid
))
1324 return (GLOBAL_ZONEID
);
1330 netstack_get_zoneid(netstack_t
*ns
)
1332 return (netstackid_to_zoneid(ns
->netstack_stackid
));
1336 * Simplistic support for walking all the handles.
1338 * netstack_handle_t nh;
1341 * netstack_next_init(&nh);
1342 * while ((ns = netstack_next(&nh)) != NULL) {
1344 * netstack_rele(ns);
1346 * netstack_next_fini(&nh);
1349 netstack_next_init(netstack_handle_t
*handle
)
1356 netstack_next_fini(netstack_handle_t
*handle
)
1361 netstack_next(netstack_handle_t
*handle
)
1367 /* Walk skipping *handle number of instances */
1369 /* Look if there is a matching stack instance */
1370 mutex_enter(&netstack_g_lock
);
1372 for (i
= 0; i
< end
; i
++) {
1375 ns
= ns
->netstack_next
;
1377 /* skip those with that aren't really here */
1378 while (ns
!= NULL
) {
1379 mutex_enter(&ns
->netstack_lock
);
1380 if ((ns
->netstack_flags
& (NSF_UNINIT
|NSF_CLOSING
)) == 0) {
1381 mutex_exit(&ns
->netstack_lock
);
1384 mutex_exit(&ns
->netstack_lock
);
1386 ns
= ns
->netstack_next
;
1392 mutex_exit(&netstack_g_lock
);