4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2017, Joyent, Inc.
25 * Copyright 2015 Garrett D'Amore <garrett@damore.org>
31 * The GLDv3 framework locking - The MAC layer
32 * --------------------------------------------
34 * The MAC layer is central to the GLD framework and can provide the locking
35 * framework needed for itself and for the use of MAC clients. MAC end points
36 * are fairly disjoint and don't share a lot of state. So a coarse grained
37 * multi-threading scheme is to single thread all create/modify/delete or set
38 * type of control operations on a per mac end point while allowing data threads
41 * Control operations (set) that modify a mac end point are always serialized on
42 * a per mac end point basis, We have at most 1 such thread per mac end point
45 * All other operations that are not serialized are essentially multi-threaded.
46 * For example a control operation (get) like getting statistics which may not
47 * care about reading values atomically or data threads sending or receiving
48 * data. Mostly these type of operations don't modify the control state. Any
49 * state these operations care about are protected using traditional locks.
51 * The perimeter only serializes serial operations. It does not imply there
52 * aren't any other concurrent operations. However a serialized operation may
53 * sometimes need to make sure it is the only thread. In this case it needs
54 * to use reference counting mechanisms to cv_wait until any current data
57 * The mac layer itself does not hold any locks across a call to another layer.
58 * The perimeter is however held across a down call to the driver to make the
59 * whole control operation atomic with respect to other control operations.
60 * Also the data path and get type control operations may proceed concurrently.
61 * These operations synchronize with the single serial operation on a given mac
62 * end point using regular locks. The perimeter ensures that conflicting
63 * operations like say a mac_multicast_add and a mac_multicast_remove on the
64 * same mac end point don't interfere with each other and also ensures that the
65 * changes in the mac layer and the call to the underlying driver to say add a
66 * multicast address are done atomically without interference from a thread
67 * trying to delete the same address.
69 * For example, consider
72 * mac_perimeter_enter(); serialize all control operations
74 * grab list lock protect against access by data threads
78 * call driver's mi_multicst
80 * mac_perimeter_exit();
83 * To lessen the number of serialization locks and simplify the lock hierarchy,
84 * we serialize all the control operations on a per mac end point by using a
85 * single serialization lock called the perimeter. We allow recursive entry into
86 * the perimeter to facilitate use of this mechanism by both the mac client and
87 * the MAC layer itself.
89 * MAC client means an entity that does an operation on a mac handle
90 * obtained from a mac_open/mac_client_open. Similarly MAC driver means
91 * an entity that does an operation on a mac handle obtained from a
92 * mac_register. An entity could be both client and driver but on different
93 * handles eg. aggr. and should only make the corresponding mac interface calls
94 * i.e. mac driver interface or mac client interface as appropriate for that
100 * R1. The lock order of upcall threads is natually opposite to downcall
101 * threads. Hence upcalls must not hold any locks across layers for fear of
102 * recursive lock enter and lock order violation. This applies to all layers.
104 * R2. The perimeter is just another lock. Since it is held in the down
105 * direction, acquiring the perimeter in an upcall is prohibited as it would
106 * cause a deadlock. This applies to all layers.
108 * Note that upcalls that need to grab the mac perimeter (for example
109 * mac_notify upcalls) can still achieve that by posting the request to a
110 * thread, which can then grab all the required perimeters and locks in the
111 * right global order. Note that in the above example the mac layer iself
112 * won't grab the mac perimeter in the mac_notify upcall, instead the upcall
113 * to the client must do that. Please see the aggr code for an example.
118 * R3. A MAC client may use the MAC provided perimeter facility to serialize
119 * control operations on a per mac end point. It does this by by acquring
120 * and holding the perimeter across a sequence of calls to the mac layer.
121 * This ensures atomicity across the entire block of mac calls. In this
122 * model the MAC client must not hold any client locks across the calls to
123 * the mac layer. This model is the preferred solution.
125 * R4. However if a MAC client has a lot of global state across all mac end
126 * points the per mac end point serialization may not be sufficient. In this
127 * case the client may choose to use global locks or use its own serialization.
128 * To avoid deadlocks, these client layer locks held across the mac calls
129 * in the control path must never be acquired by the data path for the reason
132 * (Assume that a control operation that holds a client lock blocks in the
133 * mac layer waiting for upcall reference counts to drop to zero. If an upcall
134 * data thread that holds this reference count, tries to acquire the same
135 * client lock subsequently it will deadlock).
137 * A MAC client may follow either the R3 model or the R4 model, but can't
138 * mix both. In the former, the hierarchy is Perim -> client locks, but in
139 * the latter it is client locks -> Perim.
141 * R5. MAC clients must make MAC calls (excluding data calls) in a cv_wait'able
142 * context since they may block while trying to acquire the perimeter.
143 * In addition some calls may block waiting for upcall refcnts to come down to
146 * R6. MAC clients must make sure that they are single threaded and all threads
147 * from the top (in particular data threads) have finished before calling
148 * mac_client_close. The MAC framework does not track the number of client
149 * threads using the mac client handle. Also mac clients must make sure
150 * they have undone all the control operations before calling mac_client_close.
151 * For example mac_unicast_remove/mac_multicast_remove to undo the corresponding
152 * mac_unicast_add/mac_multicast_add.
154 * MAC framework rules
155 * -------------------
157 * R7. The mac layer itself must not hold any mac layer locks (except the mac
158 * perimeter) across a call to any other layer from the mac layer. The call to
159 * any other layer could be via mi_* entry points, classifier entry points into
160 * the driver or via upcall pointers into layers above. The mac perimeter may
161 * be acquired or held only in the down direction, for e.g. when calling into
162 * a mi_* driver enty point to provide atomicity of the operation.
164 * R8. Since it is not guaranteed (see R14) that drivers won't hold locks across
165 * mac driver interfaces, the MAC layer must provide a cut out for control
166 * interfaces like upcall notifications and start them in a separate thread.
168 * R9. Note that locking order also implies a plumbing order. For example
169 * VNICs are allowed to be created over aggrs, but not vice-versa. An attempt
170 * to plumb in any other order must be failed at mac_open time, otherwise it
171 * could lead to deadlocks due to inverse locking order.
173 * R10. MAC driver interfaces must not block since the driver could call them
174 * in interrupt context.
176 * R11. Walkers must preferably not hold any locks while calling walker
177 * callbacks. Instead these can operate on reference counts. In simple
178 * callbacks it may be ok to hold a lock and call the callbacks, but this is
179 * harder to maintain in the general case of arbitrary callbacks.
181 * R12. The MAC layer must protect upcall notification callbacks using reference
182 * counts rather than holding locks across the callbacks.
184 * R13. Given the variety of drivers, it is preferable if the MAC layer can make
185 * sure that any pointers (such as mac ring pointers) it passes to the driver
186 * remain valid until mac unregister time. Currently the mac layer achieves
187 * this by using generation numbers for rings and freeing the mac rings only
188 * at unregister time. The MAC layer must provide a layer of indirection and
189 * must not expose underlying driver rings or driver data structures/pointers
190 * directly to MAC clients.
195 * R14. It would be preferable if MAC drivers don't hold any locks across any
196 * mac call. However at a minimum they must not hold any locks across data
197 * upcalls. They must also make sure that all references to mac data structures
198 * are cleaned up and that it is single threaded at mac_unregister time.
200 * R15. MAC driver interfaces don't block and so the action may be done
201 * asynchronously in a separate thread as for example handling notifications.
202 * The driver must not assume that the action is complete when the call
205 * R16. Drivers must maintain a generation number per Rx ring, and pass it
206 * back to mac_rx_ring(); They are expected to increment the generation
207 * number whenever the ring's stop routine is invoked.
208 * See comments in mac_rx_ring();
210 * R17 Similarly mi_stop is another synchronization point and the driver must
211 * ensure that all upcalls are done and there won't be any future upcall
212 * before returning from mi_stop.
214 * R18. The driver may assume that all set/modify control operations via
215 * the mi_* entry points are single threaded on a per mac end point.
217 * Lock and Perimeter hierarchy scenarios
218 * ---------------------------------------
220 * i_mac_impl_lock -> mi_rw_lock -> srs_lock -> s_ring_lock[i_mac_tx_srs_notify]
222 * ft_lock -> fe_lock [mac_flow_lookup]
224 * mi_rw_lock -> fe_lock [mac_bcast_send]
226 * srs_lock -> mac_bw_lock [mac_rx_srs_drain_bw]
228 * cpu_lock -> mac_srs_g_lock -> srs_lock -> s_ring_lock [mac_walk_srs_and_bind]
230 * i_dls_devnet_lock -> mac layer locks [dls_devnet_rename]
232 * Perimeters are ordered P1 -> P2 -> P3 from top to bottom in order of mac
233 * client to driver. In the case of clients that explictly use the mac provided
234 * perimeter mechanism for its serialization, the hierarchy is
235 * Perimeter -> mac layer locks, since the client never holds any locks across
236 * the mac calls. In the case of clients that use its own locks the hierarchy
237 * is Client locks -> Mac Perim -> Mac layer locks. The client never explicitly
238 * calls mac_perim_enter/exit in this case.
240 * Subflow creation rules
241 * ---------------------------
242 * o In case of a user specified cpulist present on underlying link and flows,
243 * the flows cpulist must be a subset of the underlying link.
244 * o In case of a user specified fanout mode present on link and flow, the
245 * subflow fanout count has to be less than or equal to that of the
246 * underlying link. The cpu-bindings for the subflows will be a subset of
247 * the underlying link.
248 * o In case if no cpulist specified on both underlying link and flow, the
249 * underlying link relies on a MAC tunable to provide out of box fanout.
250 * The subflow will have no cpulist (the subflow will be unbound)
251 * o In case if no cpulist is specified on the underlying link, a subflow can
252 * carry either a user-specified cpulist or fanout count. The cpu-bindings
253 * for the subflow will not adhere to restriction that they need to be subset
254 * of the underlying link.
255 * o In case where the underlying link is carrying either a user specified
256 * cpulist or fanout mode and for a unspecified subflow, the subflow will be
258 * o While creating unbound subflows, bandwidth mode changes attempt to
259 * figure a right fanout count. In such cases the fanout count will override
260 * the unbound cpu-binding behavior.
261 * o In addition to this, while cycling between flow and link properties, we
262 * impose a restriction that if a link property has a subflow with
263 * user-specified attributes, we will not allow changing the link property.
264 * The administrator needs to reset all the user specified properties for the
265 * subflows before attempting a link property change.
266 * Some of the above rules can be overridden by specifying additional command
267 * line options while creating or modifying link or subflow properties.
272 * For information on the datapath, the world of soft rings, hardware rings, how
273 * it is structured, and the path of an mblk_t between a driver and a mac
274 * client, see mac_sched.c.
277 #include <sys/types.h>
278 #include <sys/conf.h>
279 #include <sys/id_space.h>
280 #include <sys/esunddi.h>
281 #include <sys/stat.h>
282 #include <sys/mkdev.h>
283 #include <sys/stream.h>
284 #include <sys/strsun.h>
285 #include <sys/strsubr.h>
286 #include <sys/dlpi.h>
287 #include <sys/list.h>
288 #include <sys/modhash.h>
289 #include <sys/mac_provider.h>
290 #include <sys/mac_client_impl.h>
291 #include <sys/mac_soft_ring.h>
292 #include <sys/mac_stat.h>
293 #include <sys/mac_impl.h>
297 #include <sys/modctl.h>
298 #include <sys/fs/dv_node.h>
299 #include <sys/thread.h>
300 #include <sys/proc.h>
301 #include <sys/callb.h>
302 #include <sys/cpuvar.h>
303 #include <sys/atomic.h>
304 #include <sys/bitmap.h>
306 #include <sys/mac_flow.h>
307 #include <sys/ddi_intr_impl.h>
308 #include <sys/disp.h>
310 #include <sys/vnic.h>
311 #include <sys/vnic_impl.h>
312 #include <sys/vlan.h>
314 #include <inet/ip6.h>
315 #include <sys/exacct.h>
316 #include <sys/exacct_impl.h>
318 #include <sys/ethernet.h>
319 #include <sys/pool.h>
320 #include <sys/pool_pset.h>
321 #include <sys/cpupart.h>
322 #include <inet/wifi_ioctl.h>
325 #define IMPL_HASHSZ 67 /* prime */
327 kmem_cache_t
*i_mac_impl_cachep
;
328 mod_hash_t
*i_mac_impl_hash
;
329 krwlock_t i_mac_impl_lock
;
330 uint_t i_mac_impl_count
;
331 static kmem_cache_t
*mac_ring_cache
;
332 static id_space_t
*minor_ids
;
333 static uint32_t minor_count
;
334 static pool_event_cb_t mac_pool_event_reg
;
337 * Logging stuff. Perhaps mac_logging_interval could be broken into
338 * mac_flow_log_interval and mac_link_log_interval if we want to be
339 * able to schedule them differently.
341 uint_t mac_logging_interval
;
342 boolean_t mac_flow_log_enable
;
343 boolean_t mac_link_log_enable
;
344 timeout_id_t mac_logging_timer
;
346 #define MACTYPE_KMODDIR "mac"
347 #define MACTYPE_HASHSZ 67
348 static mod_hash_t
*i_mactype_hash
;
350 * i_mactype_lock synchronizes threads that obtain references to mactype_t
351 * structures through i_mactype_getplugin().
353 static kmutex_t i_mactype_lock
;
358 * Number of per cpu locks per mac_client_impl_t. Used by the transmit side
359 * in mac_tx to reduce lock contention. This is sized at boot time in mac_init.
360 * mac_tx_percpu_cnt_max is settable in /etc/system and must be a power of 2.
361 * Per cpu locks may be disabled by setting mac_tx_percpu_cnt_max to 1.
363 int mac_tx_percpu_cnt
;
364 int mac_tx_percpu_cnt_max
= 128;
367 * Call back functions for the bridge module. These are guaranteed to be valid
368 * when holding a reference on a link or when holding mip->mi_bridge_lock and
369 * mi_bridge_link is non-NULL.
371 mac_bridge_tx_t mac_bridge_tx_cb
;
372 mac_bridge_rx_t mac_bridge_rx_cb
;
373 mac_bridge_ref_t mac_bridge_ref_cb
;
374 mac_bridge_ls_t mac_bridge_ls_cb
;
376 static int i_mac_constructor(void *, void *, int);
377 static void i_mac_destructor(void *, void *);
378 static int i_mac_ring_ctor(void *, void *, int);
379 static void i_mac_ring_dtor(void *, void *);
380 static mblk_t
*mac_rx_classify(mac_impl_t
*, mac_resource_handle_t
, mblk_t
*);
381 void mac_tx_client_flush(mac_client_impl_t
*);
382 void mac_tx_client_block(mac_client_impl_t
*);
383 static void mac_rx_ring_quiesce(mac_ring_t
*, uint_t
);
384 static int mac_start_group_and_rings(mac_group_t
*);
385 static void mac_stop_group_and_rings(mac_group_t
*);
386 static void mac_pool_event_cb(pool_event_t
, int, void *);
388 typedef struct netinfo_s
{
396 * Module initialization functions.
402 mac_tx_percpu_cnt
= ((boot_max_ncpus
== -1) ? max_ncpus
:
405 /* Upper bound is mac_tx_percpu_cnt_max */
406 if (mac_tx_percpu_cnt
> mac_tx_percpu_cnt_max
)
407 mac_tx_percpu_cnt
= mac_tx_percpu_cnt_max
;
409 if (mac_tx_percpu_cnt
< 1) {
410 /* Someone set max_tx_percpu_cnt_max to 0 or less */
411 mac_tx_percpu_cnt
= 1;
414 ASSERT(mac_tx_percpu_cnt
>= 1);
415 mac_tx_percpu_cnt
= (1 << highbit(mac_tx_percpu_cnt
- 1));
417 * Make it of the form 2**N - 1 in the range
418 * [0 .. mac_tx_percpu_cnt_max - 1]
422 i_mac_impl_cachep
= kmem_cache_create("mac_impl_cache",
423 sizeof (mac_impl_t
), 0, i_mac_constructor
, i_mac_destructor
,
424 NULL
, NULL
, NULL
, 0);
425 ASSERT(i_mac_impl_cachep
!= NULL
);
427 mac_ring_cache
= kmem_cache_create("mac_ring_cache",
428 sizeof (mac_ring_t
), 0, i_mac_ring_ctor
, i_mac_ring_dtor
, NULL
,
430 ASSERT(mac_ring_cache
!= NULL
);
432 i_mac_impl_hash
= mod_hash_create_extended("mac_impl_hash",
433 IMPL_HASHSZ
, mod_hash_null_keydtor
, mod_hash_null_valdtor
,
434 mod_hash_bystr
, NULL
, mod_hash_strkey_cmp
, KM_SLEEP
);
435 rw_init(&i_mac_impl_lock
, NULL
, RW_DEFAULT
, NULL
);
438 mac_soft_ring_init();
442 i_mac_impl_count
= 0;
444 i_mactype_hash
= mod_hash_create_extended("mactype_hash",
446 mod_hash_null_keydtor
, mod_hash_null_valdtor
,
447 mod_hash_bystr
, NULL
, mod_hash_strkey_cmp
, KM_SLEEP
);
450 * Allocate an id space to manage minor numbers. The range of the
451 * space will be from MAC_MAX_MINOR+1 to MAC_PRIVATE_MINOR-1. This
452 * leaves half of the 32-bit minors available for driver private use.
454 minor_ids
= id_space_create("mac_minor_ids", MAC_MAX_MINOR
+1,
455 MAC_PRIVATE_MINOR
-1);
456 ASSERT(minor_ids
!= NULL
);
459 /* Let's default to 20 seconds */
460 mac_logging_interval
= 20;
461 mac_flow_log_enable
= B_FALSE
;
462 mac_link_log_enable
= B_FALSE
;
463 mac_logging_timer
= 0;
465 /* Register to be notified of noteworthy pools events */
466 mac_pool_event_reg
.pec_func
= mac_pool_event_cb
;
467 mac_pool_event_reg
.pec_arg
= NULL
;
468 pool_event_cb_register(&mac_pool_event_reg
);
475 if (i_mac_impl_count
> 0 || minor_count
> 0)
478 pool_event_cb_unregister(&mac_pool_event_reg
);
480 id_space_destroy(minor_ids
);
483 mod_hash_destroy_hash(i_mac_impl_hash
);
484 rw_destroy(&i_mac_impl_lock
);
487 kmem_cache_destroy(mac_ring_cache
);
489 mod_hash_destroy_hash(i_mactype_hash
);
490 mac_soft_ring_finish();
497 * Initialize a GLDv3 driver's device ops. A driver that manages its own ops
498 * (e.g. softmac) may pass in a NULL ops argument.
501 mac_init_ops(struct dev_ops
*ops
, const char *name
)
503 major_t major
= ddi_name_to_major((char *)name
);
506 * By returning on error below, we are not letting the driver continue
507 * in an undefined context. The mac_register() function will faill if
508 * DN_GLDV3_DRIVER isn't set.
510 if (major
== DDI_MAJOR_T_NONE
)
512 LOCK_DEV_OPS(&devnamesp
[major
].dn_lock
);
513 devnamesp
[major
].dn_flags
|= (DN_GLDV3_DRIVER
| DN_NETWORK_DRIVER
);
514 UNLOCK_DEV_OPS(&devnamesp
[major
].dn_lock
);
516 dld_init_ops(ops
, name
);
520 mac_fini_ops(struct dev_ops
*ops
)
527 i_mac_constructor(void *buf
, void *arg
, int kmflag
)
529 mac_impl_t
*mip
= buf
;
531 bzero(buf
, sizeof (mac_impl_t
));
533 mip
->mi_linkstate
= LINK_STATE_UNKNOWN
;
535 rw_init(&mip
->mi_rw_lock
, NULL
, RW_DRIVER
, NULL
);
536 mutex_init(&mip
->mi_notify_lock
, NULL
, MUTEX_DRIVER
, NULL
);
537 mutex_init(&mip
->mi_promisc_lock
, NULL
, MUTEX_DRIVER
, NULL
);
538 mutex_init(&mip
->mi_ring_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
540 mip
->mi_notify_cb_info
.mcbi_lockp
= &mip
->mi_notify_lock
;
541 cv_init(&mip
->mi_notify_cb_info
.mcbi_cv
, NULL
, CV_DRIVER
, NULL
);
542 mip
->mi_promisc_cb_info
.mcbi_lockp
= &mip
->mi_promisc_lock
;
543 cv_init(&mip
->mi_promisc_cb_info
.mcbi_cv
, NULL
, CV_DRIVER
, NULL
);
545 mutex_init(&mip
->mi_bridge_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
552 i_mac_destructor(void *buf
, void *arg
)
554 mac_impl_t
*mip
= buf
;
557 ASSERT(mip
->mi_ref
== 0);
558 ASSERT(mip
->mi_active
== 0);
559 ASSERT(mip
->mi_linkstate
== LINK_STATE_UNKNOWN
);
560 ASSERT(mip
->mi_devpromisc
== 0);
561 ASSERT(mip
->mi_ksp
== NULL
);
562 ASSERT(mip
->mi_kstat_count
== 0);
563 ASSERT(mip
->mi_nclients
== 0);
564 ASSERT(mip
->mi_nactiveclients
== 0);
565 ASSERT(mip
->mi_single_active_client
== NULL
);
566 ASSERT(mip
->mi_state_flags
== 0);
567 ASSERT(mip
->mi_factory_addr
== NULL
);
568 ASSERT(mip
->mi_factory_addr_num
== 0);
569 ASSERT(mip
->mi_default_tx_ring
== NULL
);
571 mcbi
= &mip
->mi_notify_cb_info
;
572 ASSERT(mcbi
->mcbi_del_cnt
== 0 && mcbi
->mcbi_walker_cnt
== 0);
573 ASSERT(mip
->mi_notify_bits
== 0);
574 ASSERT(mip
->mi_notify_thread
== NULL
);
575 ASSERT(mcbi
->mcbi_lockp
== &mip
->mi_notify_lock
);
576 mcbi
->mcbi_lockp
= NULL
;
578 mcbi
= &mip
->mi_promisc_cb_info
;
579 ASSERT(mcbi
->mcbi_del_cnt
== 0 && mip
->mi_promisc_list
== NULL
);
580 ASSERT(mip
->mi_promisc_list
== NULL
);
581 ASSERT(mcbi
->mcbi_lockp
== &mip
->mi_promisc_lock
);
582 mcbi
->mcbi_lockp
= NULL
;
584 ASSERT(mip
->mi_bcast_ngrps
== 0 && mip
->mi_bcast_grp
== NULL
);
585 ASSERT(mip
->mi_perim_owner
== NULL
&& mip
->mi_perim_ocnt
== 0);
587 rw_destroy(&mip
->mi_rw_lock
);
589 mutex_destroy(&mip
->mi_promisc_lock
);
590 cv_destroy(&mip
->mi_promisc_cb_info
.mcbi_cv
);
591 mutex_destroy(&mip
->mi_notify_lock
);
592 cv_destroy(&mip
->mi_notify_cb_info
.mcbi_cv
);
593 mutex_destroy(&mip
->mi_ring_lock
);
595 ASSERT(mip
->mi_bridge_link
== NULL
);
600 i_mac_ring_ctor(void *buf
, void *arg
, int kmflag
)
602 mac_ring_t
*ring
= (mac_ring_t
*)buf
;
604 bzero(ring
, sizeof (mac_ring_t
));
605 cv_init(&ring
->mr_cv
, NULL
, CV_DEFAULT
, NULL
);
606 mutex_init(&ring
->mr_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
607 ring
->mr_state
= MR_FREE
;
613 i_mac_ring_dtor(void *buf
, void *arg
)
615 mac_ring_t
*ring
= (mac_ring_t
*)buf
;
617 cv_destroy(&ring
->mr_cv
);
618 mutex_destroy(&ring
->mr_lock
);
622 * Common functions to do mac callback addition and deletion. Currently this is
623 * used by promisc callbacks and notify callbacks. List addition and deletion
624 * need to take care of list walkers. List walkers in general, can't hold list
625 * locks and make upcall callbacks due to potential lock order and recursive
626 * reentry issues. Instead list walkers increment the list walker count to mark
627 * the presence of a walker thread. Addition can be carefully done to ensure
628 * that the list walker always sees either the old list or the new list.
629 * However the deletion can't be done while the walker is active, instead the
630 * deleting thread simply marks the entry as logically deleted. The last walker
631 * physically deletes and frees up the logically deleted entries when the walk
635 mac_callback_add(mac_cb_info_t
*mcbi
, mac_cb_t
**mcb_head
,
641 /* Verify it is not already in the list */
642 for (pp
= mcb_head
; (p
= *pp
) != NULL
; pp
= &p
->mcb_nextp
) {
649 * Add it to the head of the callback list. The membar ensures that
650 * the following list pointer manipulations reach global visibility
651 * in exactly the program order below.
653 ASSERT(MUTEX_HELD(mcbi
->mcbi_lockp
));
655 mcb_elem
->mcb_nextp
= *mcb_head
;
657 *mcb_head
= mcb_elem
;
661 * Mark the entry as logically deleted. If there aren't any walkers unlink
662 * from the list. In either case return the corresponding status.
665 mac_callback_remove(mac_cb_info_t
*mcbi
, mac_cb_t
**mcb_head
,
671 ASSERT(MUTEX_HELD(mcbi
->mcbi_lockp
));
673 * Search the callback list for the entry to be removed
675 for (pp
= mcb_head
; (p
= *pp
) != NULL
; pp
= &p
->mcb_nextp
) {
682 * If there are walkers just mark it as deleted and the last walker
683 * will remove from the list and free it.
685 if (mcbi
->mcbi_walker_cnt
!= 0) {
686 p
->mcb_flags
|= MCB_CONDEMNED
;
687 mcbi
->mcbi_del_cnt
++;
691 ASSERT(mcbi
->mcbi_del_cnt
== 0);
698 * Wait for all pending callback removals to be completed
701 mac_callback_remove_wait(mac_cb_info_t
*mcbi
)
703 ASSERT(MUTEX_HELD(mcbi
->mcbi_lockp
));
704 while (mcbi
->mcbi_del_cnt
!= 0) {
705 DTRACE_PROBE1(need_wait
, mac_cb_info_t
*, mcbi
);
706 cv_wait(&mcbi
->mcbi_cv
, mcbi
->mcbi_lockp
);
711 * The last mac callback walker does the cleanup. Walk the list and unlik
712 * all the logically deleted entries and construct a temporary list of
713 * removed entries. Return the list of removed entries to the caller.
716 mac_callback_walker_cleanup(mac_cb_info_t
*mcbi
, mac_cb_t
**mcb_head
)
720 mac_cb_t
*rmlist
= NULL
; /* List of removed elements */
723 ASSERT(MUTEX_HELD(mcbi
->mcbi_lockp
));
724 ASSERT(mcbi
->mcbi_del_cnt
!= 0 && mcbi
->mcbi_walker_cnt
== 0);
727 while (*pp
!= NULL
) {
728 if ((*pp
)->mcb_flags
& MCB_CONDEMNED
) {
731 p
->mcb_nextp
= rmlist
;
736 pp
= &(*pp
)->mcb_nextp
;
739 ASSERT(mcbi
->mcbi_del_cnt
== cnt
);
740 mcbi
->mcbi_del_cnt
= 0;
745 mac_callback_lookup(mac_cb_t
**mcb_headp
, mac_cb_t
*mcb_elem
)
749 /* Verify it is not already in the list */
750 for (mcb
= *mcb_headp
; mcb
!= NULL
; mcb
= mcb
->mcb_nextp
) {
759 mac_callback_find(mac_cb_info_t
*mcbi
, mac_cb_t
**mcb_headp
, mac_cb_t
*mcb_elem
)
763 mutex_enter(mcbi
->mcbi_lockp
);
764 found
= mac_callback_lookup(mcb_headp
, mcb_elem
);
765 mutex_exit(mcbi
->mcbi_lockp
);
770 /* Free the list of removed callbacks */
772 mac_callback_free(mac_cb_t
*rmlist
)
777 for (mcb
= rmlist
; mcb
!= NULL
; mcb
= mcb_next
) {
778 mcb_next
= mcb
->mcb_nextp
;
779 kmem_free(mcb
->mcb_objp
, mcb
->mcb_objsize
);
784 * The promisc callbacks are in 2 lists, one off the 'mip' and another off the
785 * 'mcip' threaded by mpi_mi_link and mpi_mci_link respectively. However there
786 * is only a single shared total walker count, and an entry can't be physically
787 * unlinked if a walker is active on either list. The last walker does this
788 * cleanup of logically deleted entries.
791 i_mac_promisc_walker_cleanup(mac_impl_t
*mip
)
796 mac_promisc_impl_t
*mpip
;
799 * Construct a temporary list of deleted callbacks by walking the
800 * the mi_promisc_list. Then for each entry in the temporary list,
801 * remove it from the mci_promisc_list and free the entry.
803 rmlist
= mac_callback_walker_cleanup(&mip
->mi_promisc_cb_info
,
804 &mip
->mi_promisc_list
);
806 for (mcb
= rmlist
; mcb
!= NULL
; mcb
= mcb_next
) {
807 mcb_next
= mcb
->mcb_nextp
;
808 mpip
= (mac_promisc_impl_t
*)mcb
->mcb_objp
;
809 VERIFY(mac_callback_remove(&mip
->mi_promisc_cb_info
,
810 &mpip
->mpi_mcip
->mci_promisc_list
, &mpip
->mpi_mci_link
));
812 mcb
->mcb_nextp
= NULL
;
813 kmem_cache_free(mac_promisc_impl_cache
, mpip
);
818 i_mac_notify(mac_impl_t
*mip
, mac_notify_type_t type
)
823 * Signal the notify thread even after mi_ref has become zero and
824 * mi_disabled is set. The synchronization with the notify thread
825 * happens in mac_unregister and that implies the driver must make
826 * sure it is single-threaded (with respect to mac calls) and that
827 * all pending mac calls have returned before it calls mac_unregister
829 rw_enter(&i_mac_impl_lock
, RW_READER
);
830 if (mip
->mi_state_flags
& MIS_DISABLED
)
834 * Guard against incorrect notifications. (Running a newer
835 * mac client against an older implementation?)
837 if (type
>= MAC_NNOTE
)
840 mcbi
= &mip
->mi_notify_cb_info
;
841 mutex_enter(mcbi
->mcbi_lockp
);
842 mip
->mi_notify_bits
|= (1 << type
);
843 cv_broadcast(&mcbi
->mcbi_cv
);
844 mutex_exit(mcbi
->mcbi_lockp
);
847 rw_exit(&i_mac_impl_lock
);
851 * Mac serialization primitives. Please see the block comment at the
855 i_mac_perim_enter(mac_impl_t
*mip
)
857 mac_client_impl_t
*mcip
;
859 if (mip
->mi_state_flags
& MIS_IS_VNIC
) {
861 * This is a VNIC. Return the lower mac since that is what
862 * we want to serialize on.
864 mcip
= mac_vnic_lower(mip
);
868 mutex_enter(&mip
->mi_perim_lock
);
869 if (mip
->mi_perim_owner
== curthread
) {
870 mip
->mi_perim_ocnt
++;
871 mutex_exit(&mip
->mi_perim_lock
);
875 while (mip
->mi_perim_owner
!= NULL
)
876 cv_wait(&mip
->mi_perim_cv
, &mip
->mi_perim_lock
);
878 mip
->mi_perim_owner
= curthread
;
879 ASSERT(mip
->mi_perim_ocnt
== 0);
880 mip
->mi_perim_ocnt
++;
882 mip
->mi_perim_stack_depth
= getpcstack(mip
->mi_perim_stack
,
883 MAC_PERIM_STACK_DEPTH
);
885 mutex_exit(&mip
->mi_perim_lock
);
889 i_mac_perim_enter_nowait(mac_impl_t
*mip
)
892 * The vnic is a special case, since the serialization is done based
893 * on the lower mac. If the lower mac is busy, it does not imply the
894 * vnic can't be unregistered. But in the case of other drivers,
895 * a busy perimeter or open mac handles implies that the mac is busy
896 * and can't be unregistered.
898 if (mip
->mi_state_flags
& MIS_IS_VNIC
) {
899 i_mac_perim_enter(mip
);
903 mutex_enter(&mip
->mi_perim_lock
);
904 if (mip
->mi_perim_owner
!= NULL
) {
905 mutex_exit(&mip
->mi_perim_lock
);
908 ASSERT(mip
->mi_perim_ocnt
== 0);
909 mip
->mi_perim_owner
= curthread
;
910 mip
->mi_perim_ocnt
++;
911 mutex_exit(&mip
->mi_perim_lock
);
917 i_mac_perim_exit(mac_impl_t
*mip
)
919 mac_client_impl_t
*mcip
;
921 if (mip
->mi_state_flags
& MIS_IS_VNIC
) {
923 * This is a VNIC. Return the lower mac since that is what
924 * we want to serialize on.
926 mcip
= mac_vnic_lower(mip
);
930 ASSERT(mip
->mi_perim_owner
== curthread
&& mip
->mi_perim_ocnt
!= 0);
932 mutex_enter(&mip
->mi_perim_lock
);
933 if (--mip
->mi_perim_ocnt
== 0) {
934 mip
->mi_perim_owner
= NULL
;
935 cv_signal(&mip
->mi_perim_cv
);
937 mutex_exit(&mip
->mi_perim_lock
);
941 * Returns whether the current thread holds the mac perimeter. Used in making
945 mac_perim_held(mac_handle_t mh
)
947 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
948 mac_client_impl_t
*mcip
;
950 if (mip
->mi_state_flags
& MIS_IS_VNIC
) {
952 * This is a VNIC. Return the lower mac since that is what
953 * we want to serialize on.
955 mcip
= mac_vnic_lower(mip
);
958 return (mip
->mi_perim_owner
== curthread
);
962 * mac client interfaces to enter the mac perimeter of a mac end point, given
963 * its mac handle, or macname or linkid.
966 mac_perim_enter_by_mh(mac_handle_t mh
, mac_perim_handle_t
*mphp
)
968 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
970 i_mac_perim_enter(mip
);
972 * The mac_perim_handle_t returned encodes the 'mip' and whether a
973 * mac_open has been done internally while entering the perimeter.
974 * This information is used in mac_perim_exit
976 MAC_ENCODE_MPH(*mphp
, mip
, 0);
980 mac_perim_enter_by_macname(const char *name
, mac_perim_handle_t
*mphp
)
985 if ((err
= mac_open(name
, &mh
)) != 0)
988 mac_perim_enter_by_mh(mh
, mphp
);
989 MAC_ENCODE_MPH(*mphp
, mh
, 1);
994 mac_perim_enter_by_linkid(datalink_id_t linkid
, mac_perim_handle_t
*mphp
)
999 if ((err
= mac_open_by_linkid(linkid
, &mh
)) != 0)
1002 mac_perim_enter_by_mh(mh
, mphp
);
1003 MAC_ENCODE_MPH(*mphp
, mh
, 1);
1008 mac_perim_exit(mac_perim_handle_t mph
)
1011 boolean_t need_close
;
1013 MAC_DECODE_MPH(mph
, mip
, need_close
);
1014 i_mac_perim_exit(mip
);
1016 mac_close((mac_handle_t
)mip
);
1020 mac_hold(const char *macname
, mac_impl_t
**pmip
)
1026 * Check the device name length to make sure it won't overflow our
1029 if (strlen(macname
) >= MAXNAMELEN
)
1033 * Look up its entry in the global hash table.
1035 rw_enter(&i_mac_impl_lock
, RW_WRITER
);
1036 err
= mod_hash_find(i_mac_impl_hash
, (mod_hash_key_t
)macname
,
1037 (mod_hash_val_t
*)&mip
);
1040 rw_exit(&i_mac_impl_lock
);
1044 if (mip
->mi_state_flags
& MIS_DISABLED
) {
1045 rw_exit(&i_mac_impl_lock
);
1049 if (mip
->mi_state_flags
& MIS_EXCLUSIVE_HELD
) {
1050 rw_exit(&i_mac_impl_lock
);
1055 rw_exit(&i_mac_impl_lock
);
1062 mac_rele(mac_impl_t
*mip
)
1064 rw_enter(&i_mac_impl_lock
, RW_WRITER
);
1065 ASSERT(mip
->mi_ref
!= 0);
1066 if (--mip
->mi_ref
== 0) {
1067 ASSERT(mip
->mi_nactiveclients
== 0 &&
1068 !(mip
->mi_state_flags
& MIS_EXCLUSIVE
));
1070 rw_exit(&i_mac_impl_lock
);
1074 * Private GLDv3 function to start a MAC instance.
1077 mac_start(mac_handle_t mh
)
1079 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
1081 mac_group_t
*defgrp
;
1083 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
1084 ASSERT(mip
->mi_start
!= NULL
);
1087 * Check whether the device is already started.
1089 if (mip
->mi_active
++ == 0) {
1090 mac_ring_t
*ring
= NULL
;
1095 err
= mip
->mi_start(mip
->mi_driver
);
1102 * Start the default tx ring.
1104 if (mip
->mi_default_tx_ring
!= NULL
) {
1106 ring
= (mac_ring_t
*)mip
->mi_default_tx_ring
;
1107 if (ring
->mr_state
!= MR_INUSE
) {
1108 err
= mac_start_ring(ring
);
1116 if ((defgrp
= MAC_DEFAULT_RX_GROUP(mip
)) != NULL
) {
1118 * Start the default ring, since it will be needed
1119 * to receive broadcast and multicast traffic for
1120 * both primary and non-primary MAC clients.
1122 ASSERT(defgrp
->mrg_state
== MAC_GROUP_STATE_REGISTERED
);
1123 err
= mac_start_group_and_rings(defgrp
);
1126 if ((ring
!= NULL
) &&
1127 (ring
->mr_state
== MR_INUSE
))
1128 mac_stop_ring(ring
);
1131 mac_set_group_state(defgrp
, MAC_GROUP_STATE_SHARED
);
1139 * Private GLDv3 function to stop a MAC instance.
1142 mac_stop(mac_handle_t mh
)
1144 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
1147 ASSERT(mip
->mi_stop
!= NULL
);
1148 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
1151 * Check whether the device is still needed.
1153 ASSERT(mip
->mi_active
!= 0);
1154 if (--mip
->mi_active
== 0) {
1155 if ((grp
= MAC_DEFAULT_RX_GROUP(mip
)) != NULL
) {
1157 * There should be no more active clients since the
1158 * MAC is being stopped. Stop the default RX group
1159 * and transition it back to registered state.
1161 * When clients are torn down, the groups
1162 * are release via mac_release_rx_group which
1163 * knows the the default group is always in
1164 * started mode since broadcast uses it. So
1165 * we can assert that their are no clients
1166 * (since mac_bcast_add doesn't register itself
1167 * as a client) and group is in SHARED state.
1169 ASSERT(grp
->mrg_state
== MAC_GROUP_STATE_SHARED
);
1170 ASSERT(MAC_GROUP_NO_CLIENT(grp
) &&
1171 mip
->mi_nactiveclients
== 0);
1172 mac_stop_group_and_rings(grp
);
1173 mac_set_group_state(grp
, MAC_GROUP_STATE_REGISTERED
);
1176 if (mip
->mi_default_tx_ring
!= NULL
) {
1179 ring
= (mac_ring_t
*)mip
->mi_default_tx_ring
;
1180 if (ring
->mr_state
== MR_INUSE
) {
1181 mac_stop_ring(ring
);
1189 mip
->mi_stop(mip
->mi_driver
);
1194 i_mac_promisc_set(mac_impl_t
*mip
, boolean_t on
)
1198 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
1199 ASSERT(mip
->mi_setpromisc
!= NULL
);
1203 * Enable promiscuous mode on the device if not yet enabled.
1205 if (mip
->mi_devpromisc
++ == 0) {
1206 err
= mip
->mi_setpromisc(mip
->mi_driver
, B_TRUE
);
1208 mip
->mi_devpromisc
--;
1211 i_mac_notify(mip
, MAC_NOTE_DEVPROMISC
);
1214 if (mip
->mi_devpromisc
== 0)
1218 * Disable promiscuous mode on the device if this is the last
1221 if (--mip
->mi_devpromisc
== 0) {
1222 err
= mip
->mi_setpromisc(mip
->mi_driver
, B_FALSE
);
1224 mip
->mi_devpromisc
++;
1227 i_mac_notify(mip
, MAC_NOTE_DEVPROMISC
);
1235 * The promiscuity state can change any time. If the caller needs to take
1236 * actions that are atomic with the promiscuity state, then the caller needs
1237 * to bracket the entire sequence with mac_perim_enter/exit
1240 mac_promisc_get(mac_handle_t mh
)
1242 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
1245 * Return the current promiscuity.
1247 return (mip
->mi_devpromisc
!= 0);
1251 * Invoked at MAC instance attach time to initialize the list
1252 * of factory MAC addresses supported by a MAC instance. This function
1253 * builds a local cache in the mac_impl_t for the MAC addresses
1254 * supported by the underlying hardware. The MAC clients themselves
1255 * use the mac_addr_factory*() functions to query and reserve
1256 * factory MAC addresses.
1259 mac_addr_factory_init(mac_impl_t
*mip
)
1261 mac_capab_multifactaddr_t capab
;
1266 * First round to see how many factory MAC addresses are available.
1268 bzero(&capab
, sizeof (capab
));
1269 if (!i_mac_capab_get((mac_handle_t
)mip
, MAC_CAPAB_MULTIFACTADDR
,
1270 &capab
) || (capab
.mcm_naddr
== 0)) {
1272 * The MAC instance doesn't support multiple factory
1273 * MAC addresses, we're done here.
1279 * Allocate the space and get all the factory addresses.
1281 addr
= kmem_alloc(capab
.mcm_naddr
* MAXMACADDRLEN
, KM_SLEEP
);
1282 capab
.mcm_getaddr(mip
->mi_driver
, capab
.mcm_naddr
, addr
);
1284 mip
->mi_factory_addr_num
= capab
.mcm_naddr
;
1285 mip
->mi_factory_addr
= kmem_zalloc(mip
->mi_factory_addr_num
*
1286 sizeof (mac_factory_addr_t
), KM_SLEEP
);
1288 for (i
= 0; i
< capab
.mcm_naddr
; i
++) {
1289 bcopy(addr
+ i
* MAXMACADDRLEN
,
1290 mip
->mi_factory_addr
[i
].mfa_addr
,
1291 mip
->mi_type
->mt_addr_length
);
1292 mip
->mi_factory_addr
[i
].mfa_in_use
= B_FALSE
;
1295 kmem_free(addr
, capab
.mcm_naddr
* MAXMACADDRLEN
);
1299 mac_addr_factory_fini(mac_impl_t
*mip
)
1301 if (mip
->mi_factory_addr
== NULL
) {
1302 ASSERT(mip
->mi_factory_addr_num
== 0);
1306 kmem_free(mip
->mi_factory_addr
, mip
->mi_factory_addr_num
*
1307 sizeof (mac_factory_addr_t
));
1309 mip
->mi_factory_addr
= NULL
;
1310 mip
->mi_factory_addr_num
= 0;
1314 * Reserve a factory MAC address. If *slot is set to -1, the function
1315 * attempts to reserve any of the available factory MAC addresses and
1316 * returns the reserved slot id. If no slots are available, the function
1317 * returns ENOSPC. If *slot is not set to -1, the function reserves
1318 * the specified slot if it is available, or returns EBUSY is the slot
1319 * is already used. Returns ENOTSUP if the underlying MAC does not
1320 * support multiple factory addresses. If the slot number is not -1 but
1321 * is invalid, returns EINVAL.
1324 mac_addr_factory_reserve(mac_client_handle_t mch
, int *slot
)
1326 mac_client_impl_t
*mcip
= (mac_client_impl_t
*)mch
;
1327 mac_impl_t
*mip
= mcip
->mci_mip
;
1330 i_mac_perim_enter(mip
);
1332 * Protect against concurrent readers that may need a self-consistent
1333 * view of the factory addresses
1335 rw_enter(&mip
->mi_rw_lock
, RW_WRITER
);
1337 if (mip
->mi_factory_addr_num
== 0) {
1343 /* check the specified slot */
1344 if (*slot
< 1 || *slot
> mip
->mi_factory_addr_num
) {
1348 if (mip
->mi_factory_addr
[*slot
-1].mfa_in_use
) {
1353 /* pick the next available slot */
1354 for (i
= 0; i
< mip
->mi_factory_addr_num
; i
++) {
1355 if (!mip
->mi_factory_addr
[i
].mfa_in_use
)
1359 if (i
== mip
->mi_factory_addr_num
) {
1366 mip
->mi_factory_addr
[*slot
-1].mfa_in_use
= B_TRUE
;
1367 mip
->mi_factory_addr
[*slot
-1].mfa_client
= mcip
;
1370 rw_exit(&mip
->mi_rw_lock
);
1371 i_mac_perim_exit(mip
);
1376 * Release the specified factory MAC address slot.
1379 mac_addr_factory_release(mac_client_handle_t mch
, uint_t slot
)
1381 mac_client_impl_t
*mcip
= (mac_client_impl_t
*)mch
;
1382 mac_impl_t
*mip
= mcip
->mci_mip
;
1384 i_mac_perim_enter(mip
);
1386 * Protect against concurrent readers that may need a self-consistent
1387 * view of the factory addresses
1389 rw_enter(&mip
->mi_rw_lock
, RW_WRITER
);
1391 ASSERT(slot
> 0 && slot
<= mip
->mi_factory_addr_num
);
1392 ASSERT(mip
->mi_factory_addr
[slot
-1].mfa_in_use
);
1394 mip
->mi_factory_addr
[slot
-1].mfa_in_use
= B_FALSE
;
1396 rw_exit(&mip
->mi_rw_lock
);
1397 i_mac_perim_exit(mip
);
1401 * Stores in mac_addr the value of the specified MAC address. Returns
1402 * 0 on success, or EINVAL if the slot number is not valid for the MAC.
1403 * The caller must provide a string of at least MAXNAMELEN bytes.
1406 mac_addr_factory_value(mac_handle_t mh
, int slot
, uchar_t
*mac_addr
,
1407 uint_t
*addr_len
, char *client_name
, boolean_t
*in_use_arg
)
1409 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
1412 ASSERT(slot
> 0 && slot
<= mip
->mi_factory_addr_num
);
1415 * Readers need to hold mi_rw_lock. Writers need to hold mac perimeter
1418 rw_enter(&mip
->mi_rw_lock
, RW_READER
);
1419 bcopy(mip
->mi_factory_addr
[slot
-1].mfa_addr
, mac_addr
, MAXMACADDRLEN
);
1420 *addr_len
= mip
->mi_type
->mt_addr_length
;
1421 in_use
= mip
->mi_factory_addr
[slot
-1].mfa_in_use
;
1422 if (in_use
&& client_name
!= NULL
) {
1423 bcopy(mip
->mi_factory_addr
[slot
-1].mfa_client
->mci_name
,
1424 client_name
, MAXNAMELEN
);
1426 if (in_use_arg
!= NULL
)
1427 *in_use_arg
= in_use
;
1428 rw_exit(&mip
->mi_rw_lock
);
1432 * Returns the number of factory MAC addresses (in addition to the
1433 * primary MAC address), 0 if the underlying MAC doesn't support
1437 mac_addr_factory_num(mac_handle_t mh
)
1439 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
1441 return (mip
->mi_factory_addr_num
);
1446 mac_rx_group_unmark(mac_group_t
*grp
, uint_t flag
)
1450 for (ring
= grp
->mrg_rings
; ring
!= NULL
; ring
= ring
->mr_next
)
1451 ring
->mr_flag
&= ~flag
;
1455 * The following mac_hwrings_xxx() functions are private mac client functions
1456 * used by the aggr driver to access and control the underlying HW Rx group
1457 * and rings. In this case, the aggr driver has exclusive control of the
1458 * underlying HW Rx group/rings, it calls the following functions to
1459 * start/stop the HW Rx rings, disable/enable polling, add/remove mac'
1460 * addresses, or set up the Rx callback.
1464 mac_hwrings_rx_process(void *arg
, mac_resource_handle_t srs
,
1465 mblk_t
*mp_chain
, boolean_t loopback
)
1467 mac_soft_ring_set_t
*mac_srs
= (mac_soft_ring_set_t
*)srs
;
1468 mac_srs_rx_t
*srs_rx
= &mac_srs
->srs_rx
;
1469 mac_direct_rx_t proc
;
1471 mac_resource_handle_t arg2
;
1473 proc
= srs_rx
->sr_func
;
1474 arg1
= srs_rx
->sr_arg1
;
1475 arg2
= mac_srs
->srs_mrh
;
1477 proc(arg1
, arg2
, mp_chain
, NULL
);
1481 * This function is called to get the list of HW rings that are reserved by
1482 * an exclusive mac client.
1484 * Return value: the number of HW rings.
1487 mac_hwrings_get(mac_client_handle_t mch
, mac_group_handle_t
*hwgh
,
1488 mac_ring_handle_t
*hwrh
, mac_ring_type_t rtype
)
1490 mac_client_impl_t
*mcip
= (mac_client_impl_t
*)mch
;
1491 flow_entry_t
*flent
= mcip
->mci_flent
;
1496 if (rtype
== MAC_RING_TYPE_RX
) {
1497 grp
= flent
->fe_rx_ring_group
;
1498 } else if (rtype
== MAC_RING_TYPE_TX
) {
1499 grp
= flent
->fe_tx_ring_group
;
1505 * The mac client did not reserve any RX group, return directly.
1506 * This is probably because the underlying MAC does not support
1514 * This group must be reserved by this mac client.
1516 ASSERT((grp
->mrg_state
== MAC_GROUP_STATE_RESERVED
) &&
1517 (mcip
== MAC_GROUP_ONLY_CLIENT(grp
)));
1519 for (ring
= grp
->mrg_rings
; ring
!= NULL
; ring
= ring
->mr_next
, cnt
++) {
1520 ASSERT(cnt
< MAX_RINGS_PER_GROUP
);
1521 hwrh
[cnt
] = (mac_ring_handle_t
)ring
;
1524 *hwgh
= (mac_group_handle_t
)grp
;
1530 * This function is called to get info about Tx/Rx rings.
1532 * Return value: returns uint_t which will have various bits set
1533 * that indicates different properties of the ring.
1536 mac_hwring_getinfo(mac_ring_handle_t rh
)
1538 mac_ring_t
*ring
= (mac_ring_t
*)rh
;
1539 mac_ring_info_t
*info
= &ring
->mr_info
;
1541 return (info
->mri_flags
);
1545 * Export ddi interrupt handles from the HW ring to the pseudo ring and
1546 * setup the RX callback of the mac client which exclusively controls
1550 mac_hwring_setup(mac_ring_handle_t hwrh
, mac_resource_handle_t prh
,
1551 mac_ring_handle_t pseudo_rh
)
1553 mac_ring_t
*hw_ring
= (mac_ring_t
*)hwrh
;
1554 mac_ring_t
*pseudo_ring
;
1555 mac_soft_ring_set_t
*mac_srs
= hw_ring
->mr_srs
;
1557 if (pseudo_rh
!= NULL
) {
1558 pseudo_ring
= (mac_ring_t
*)pseudo_rh
;
1559 /* Export the ddi handles to pseudo ring */
1560 pseudo_ring
->mr_info
.mri_intr
.mi_ddi_handle
=
1561 hw_ring
->mr_info
.mri_intr
.mi_ddi_handle
;
1562 pseudo_ring
->mr_info
.mri_intr
.mi_ddi_shared
=
1563 hw_ring
->mr_info
.mri_intr
.mi_ddi_shared
;
1565 * Save a pointer to pseudo ring in the hw ring. If
1566 * interrupt handle changes, the hw ring will be
1567 * notified of the change (see mac_ring_intr_set())
1568 * and the appropriate change has to be made to
1569 * the pseudo ring that has exported the ddi handle.
1571 hw_ring
->mr_prh
= pseudo_rh
;
1574 if (hw_ring
->mr_type
== MAC_RING_TYPE_RX
) {
1575 ASSERT(!(mac_srs
->srs_type
& SRST_TX
));
1576 mac_srs
->srs_mrh
= prh
;
1577 mac_srs
->srs_rx
.sr_lower_proc
= mac_hwrings_rx_process
;
1582 mac_hwring_teardown(mac_ring_handle_t hwrh
)
1584 mac_ring_t
*hw_ring
= (mac_ring_t
*)hwrh
;
1585 mac_soft_ring_set_t
*mac_srs
;
1587 if (hw_ring
== NULL
)
1589 hw_ring
->mr_prh
= NULL
;
1590 if (hw_ring
->mr_type
== MAC_RING_TYPE_RX
) {
1591 mac_srs
= hw_ring
->mr_srs
;
1592 ASSERT(!(mac_srs
->srs_type
& SRST_TX
));
1593 mac_srs
->srs_rx
.sr_lower_proc
= mac_rx_srs_process
;
1594 mac_srs
->srs_mrh
= NULL
;
1599 mac_hwring_disable_intr(mac_ring_handle_t rh
)
1601 mac_ring_t
*rr_ring
= (mac_ring_t
*)rh
;
1602 mac_intr_t
*intr
= &rr_ring
->mr_info
.mri_intr
;
1604 return (intr
->mi_disable(intr
->mi_handle
));
1608 mac_hwring_enable_intr(mac_ring_handle_t rh
)
1610 mac_ring_t
*rr_ring
= (mac_ring_t
*)rh
;
1611 mac_intr_t
*intr
= &rr_ring
->mr_info
.mri_intr
;
1613 return (intr
->mi_enable(intr
->mi_handle
));
1617 mac_hwring_start(mac_ring_handle_t rh
)
1619 mac_ring_t
*rr_ring
= (mac_ring_t
*)rh
;
1621 MAC_RING_UNMARK(rr_ring
, MR_QUIESCE
);
1626 mac_hwring_stop(mac_ring_handle_t rh
)
1628 mac_ring_t
*rr_ring
= (mac_ring_t
*)rh
;
1630 mac_rx_ring_quiesce(rr_ring
, MR_QUIESCE
);
1634 mac_hwring_poll(mac_ring_handle_t rh
, int bytes_to_pickup
)
1636 mac_ring_t
*rr_ring
= (mac_ring_t
*)rh
;
1637 mac_ring_info_t
*info
= &rr_ring
->mr_info
;
1639 return (info
->mri_poll(info
->mri_driver
, bytes_to_pickup
));
1643 * Send packets through a selected tx ring.
1646 mac_hwring_tx(mac_ring_handle_t rh
, mblk_t
*mp
)
1648 mac_ring_t
*ring
= (mac_ring_t
*)rh
;
1649 mac_ring_info_t
*info
= &ring
->mr_info
;
1651 ASSERT(ring
->mr_type
== MAC_RING_TYPE_TX
&&
1652 ring
->mr_state
>= MR_INUSE
);
1653 return (info
->mri_tx(info
->mri_driver
, mp
));
1657 * Query stats for a particular rx/tx ring
1660 mac_hwring_getstat(mac_ring_handle_t rh
, uint_t stat
, uint64_t *val
)
1662 mac_ring_t
*ring
= (mac_ring_t
*)rh
;
1663 mac_ring_info_t
*info
= &ring
->mr_info
;
1665 return (info
->mri_stat(info
->mri_driver
, stat
, val
));
1669 * Private function that is only used by aggr to send packets through
1670 * a port/Tx ring. Since aggr exposes a pseudo Tx ring even for ports
1671 * that does not expose Tx rings, aggr_ring_tx() entry point needs
1672 * access to mac_impl_t to send packets through m_tx() entry point.
1673 * It accomplishes this by calling mac_hwring_send_priv() function.
1676 mac_hwring_send_priv(mac_client_handle_t mch
, mac_ring_handle_t rh
, mblk_t
*mp
)
1678 mac_client_impl_t
*mcip
= (mac_client_impl_t
*)mch
;
1679 mac_impl_t
*mip
= mcip
->mci_mip
;
1681 MAC_TX(mip
, rh
, mp
, mcip
);
1686 * Private function that is only used by aggr to update the default transmission
1687 * ring. Because aggr exposes a pseudo Tx ring even for ports that may
1688 * temporarily be down, it may need to update the default ring that is used by
1689 * MAC such that it refers to a link that can actively be used to send traffic.
1690 * Note that this is different from the case where the port has been removed
1691 * from the group. In those cases, all of the rings will be torn down because
1692 * the ring will no longer exist. It's important to give aggr a case where the
1693 * rings can still exist such that it may be able to continue to send LACP PDUs
1694 * to potentially restore the link.
1696 * Finally, we explicitly don't do anything if the ring hasn't been enabled yet.
1697 * This is to help out aggr which doesn't really know the internal state that
1698 * MAC does about the rings and can't know that it's not quite ready for use
1702 mac_hwring_set_default(mac_handle_t mh
, mac_ring_handle_t rh
)
1704 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
1705 mac_ring_t
*ring
= (mac_ring_t
*)rh
;
1707 ASSERT(MAC_PERIM_HELD(mh
));
1708 VERIFY(mip
->mi_state_flags
& MIS_IS_AGGR
);
1710 if (ring
->mr_state
!= MR_INUSE
)
1713 mip
->mi_default_tx_ring
= rh
;
1717 mac_hwgroup_addmac(mac_group_handle_t gh
, const uint8_t *addr
)
1719 mac_group_t
*group
= (mac_group_t
*)gh
;
1721 return (mac_group_addmac(group
, addr
));
1725 mac_hwgroup_remmac(mac_group_handle_t gh
, const uint8_t *addr
)
1727 mac_group_t
*group
= (mac_group_t
*)gh
;
1729 return (mac_group_remmac(group
, addr
));
1733 * Set the RX group to be shared/reserved. Note that the group must be
1734 * started/stopped outside of this function.
1737 mac_set_group_state(mac_group_t
*grp
, mac_group_state_t state
)
1740 * If there is no change in the group state, just return.
1742 if (grp
->mrg_state
== state
)
1746 case MAC_GROUP_STATE_RESERVED
:
1748 * Successfully reserved the group.
1750 * Given that there is an exclusive client controlling this
1751 * group, we enable the group level polling when available,
1752 * so that SRSs get to turn on/off individual rings they's
1755 ASSERT(MAC_PERIM_HELD(grp
->mrg_mh
));
1757 if (grp
->mrg_type
== MAC_RING_TYPE_RX
&&
1758 GROUP_INTR_DISABLE_FUNC(grp
) != NULL
) {
1759 GROUP_INTR_DISABLE_FUNC(grp
)(GROUP_INTR_HANDLE(grp
));
1763 case MAC_GROUP_STATE_SHARED
:
1765 * Set all rings of this group to software classified.
1766 * If the group has an overriding interrupt, then re-enable it.
1768 ASSERT(MAC_PERIM_HELD(grp
->mrg_mh
));
1770 if (grp
->mrg_type
== MAC_RING_TYPE_RX
&&
1771 GROUP_INTR_ENABLE_FUNC(grp
) != NULL
) {
1772 GROUP_INTR_ENABLE_FUNC(grp
)(GROUP_INTR_HANDLE(grp
));
1774 /* The ring is not available for reservations any more */
1777 case MAC_GROUP_STATE_REGISTERED
:
1778 /* Also callable from mac_register, perim is not held */
1786 grp
->mrg_state
= state
;
1790 * Quiesce future hardware classified packets for the specified Rx ring
1793 mac_rx_ring_quiesce(mac_ring_t
*rx_ring
, uint_t ring_flag
)
1795 ASSERT(rx_ring
->mr_classify_type
== MAC_HW_CLASSIFIER
);
1796 ASSERT(ring_flag
== MR_CONDEMNED
|| ring_flag
== MR_QUIESCE
);
1798 mutex_enter(&rx_ring
->mr_lock
);
1799 rx_ring
->mr_flag
|= ring_flag
;
1800 while (rx_ring
->mr_refcnt
!= 0)
1801 cv_wait(&rx_ring
->mr_cv
, &rx_ring
->mr_lock
);
1802 mutex_exit(&rx_ring
->mr_lock
);
1806 * Please see mac_tx for details about the per cpu locking scheme
1809 mac_tx_lock_all(mac_client_impl_t
*mcip
)
1813 for (i
= 0; i
<= mac_tx_percpu_cnt
; i
++)
1814 mutex_enter(&mcip
->mci_tx_pcpu
[i
].pcpu_tx_lock
);
1818 mac_tx_unlock_all(mac_client_impl_t
*mcip
)
1822 for (i
= mac_tx_percpu_cnt
; i
>= 0; i
--)
1823 mutex_exit(&mcip
->mci_tx_pcpu
[i
].pcpu_tx_lock
);
1827 mac_tx_unlock_allbutzero(mac_client_impl_t
*mcip
)
1831 for (i
= mac_tx_percpu_cnt
; i
> 0; i
--)
1832 mutex_exit(&mcip
->mci_tx_pcpu
[i
].pcpu_tx_lock
);
1836 mac_tx_sum_refcnt(mac_client_impl_t
*mcip
)
1841 for (i
= 0; i
<= mac_tx_percpu_cnt
; i
++)
1842 refcnt
+= mcip
->mci_tx_pcpu
[i
].pcpu_tx_refcnt
;
1848 * Stop future Tx packets coming down from the client in preparation for
1849 * quiescing the Tx side. This is needed for dynamic reclaim and reassignment
1850 * of rings between clients
1853 mac_tx_client_block(mac_client_impl_t
*mcip
)
1855 mac_tx_lock_all(mcip
);
1856 mcip
->mci_tx_flag
|= MCI_TX_QUIESCE
;
1857 while (mac_tx_sum_refcnt(mcip
) != 0) {
1858 mac_tx_unlock_allbutzero(mcip
);
1859 cv_wait(&mcip
->mci_tx_cv
, &mcip
->mci_tx_pcpu
[0].pcpu_tx_lock
);
1860 mutex_exit(&mcip
->mci_tx_pcpu
[0].pcpu_tx_lock
);
1861 mac_tx_lock_all(mcip
);
1863 mac_tx_unlock_all(mcip
);
1867 mac_tx_client_unblock(mac_client_impl_t
*mcip
)
1869 mac_tx_lock_all(mcip
);
1870 mcip
->mci_tx_flag
&= ~MCI_TX_QUIESCE
;
1871 mac_tx_unlock_all(mcip
);
1873 * We may fail to disable flow control for the last MAC_NOTE_TX
1874 * notification because the MAC client is quiesced. Send the
1875 * notification again.
1877 i_mac_notify(mcip
->mci_mip
, MAC_NOTE_TX
);
1881 * Wait for an SRS to quiesce. The SRS worker will signal us when the
1885 mac_srs_quiesce_wait(mac_soft_ring_set_t
*srs
, uint_t srs_flag
)
1887 mutex_enter(&srs
->srs_lock
);
1888 while (!(srs
->srs_state
& srs_flag
))
1889 cv_wait(&srs
->srs_quiesce_done_cv
, &srs
->srs_lock
);
1890 mutex_exit(&srs
->srs_lock
);
1894 * Quiescing an Rx SRS is achieved by the following sequence. The protocol
1895 * works bottom up by cutting off packet flow from the bottommost point in the
1896 * mac, then the SRS, and then the soft rings. There are 2 use cases of this
1897 * mechanism. One is a temporary quiesce of the SRS, such as say while changing
1898 * the Rx callbacks. Another use case is Rx SRS teardown. In the former case
1899 * the QUIESCE prefix/suffix is used and in the latter the CONDEMNED is used
1900 * for the SRS and MR flags. In the former case the threads pause waiting for
1901 * a restart, while in the latter case the threads exit. The Tx SRS teardown
1902 * is also mostly similar to the above.
1904 * 1. Stop future hardware classified packets at the lowest level in the mac.
1905 * Remove any hardware classification rule (CONDEMNED case) and mark the
1906 * rings as CONDEMNED or QUIESCE as appropriate. This prevents the mr_refcnt
1907 * from increasing. Upcalls from the driver that come through hardware
1908 * classification will be dropped in mac_rx from now on. Then we wait for
1909 * the mr_refcnt to drop to zero. When the mr_refcnt reaches zero we are
1910 * sure there aren't any upcall threads from the driver through hardware
1911 * classification. In the case of SRS teardown we also remove the
1912 * classification rule in the driver.
1914 * 2. Stop future software classified packets by marking the flow entry with
1915 * FE_QUIESCE or FE_CONDEMNED as appropriate which prevents the refcnt from
1916 * increasing. We also remove the flow entry from the table in the latter
1917 * case. Then wait for the fe_refcnt to reach an appropriate quiescent value
1918 * that indicates there aren't any active threads using that flow entry.
1920 * 3. Quiesce the SRS and softrings by signaling the SRS. The SRS poll thread,
1921 * SRS worker thread, and the soft ring threads are quiesced in sequence
1922 * with the SRS worker thread serving as a master controller. This
1923 * mechansim is explained in mac_srs_worker_quiesce().
1925 * The restart mechanism to reactivate the SRS and softrings is explained
1926 * in mac_srs_worker_restart(). Here we just signal the SRS worker to start the
1930 mac_rx_srs_quiesce(mac_soft_ring_set_t
*srs
, uint_t srs_quiesce_flag
)
1932 flow_entry_t
*flent
= srs
->srs_flent
;
1933 uint_t mr_flag
, srs_done_flag
;
1935 ASSERT(MAC_PERIM_HELD((mac_handle_t
)FLENT_TO_MIP(flent
)));
1936 ASSERT(!(srs
->srs_type
& SRST_TX
));
1938 if (srs_quiesce_flag
== SRS_CONDEMNED
) {
1939 mr_flag
= MR_CONDEMNED
;
1940 srs_done_flag
= SRS_CONDEMNED_DONE
;
1941 if (srs
->srs_type
& SRST_CLIENT_POLL_ENABLED
)
1942 mac_srs_client_poll_disable(srs
->srs_mcip
, srs
);
1944 ASSERT(srs_quiesce_flag
== SRS_QUIESCE
);
1945 mr_flag
= MR_QUIESCE
;
1946 srs_done_flag
= SRS_QUIESCE_DONE
;
1947 if (srs
->srs_type
& SRST_CLIENT_POLL_ENABLED
)
1948 mac_srs_client_poll_quiesce(srs
->srs_mcip
, srs
);
1951 if (srs
->srs_ring
!= NULL
) {
1952 mac_rx_ring_quiesce(srs
->srs_ring
, mr_flag
);
1955 * SRS is driven by software classification. In case
1956 * of CONDEMNED, the top level teardown functions will
1957 * deal with flow removal.
1959 if (srs_quiesce_flag
!= SRS_CONDEMNED
) {
1960 FLOW_MARK(flent
, FE_QUIESCE
);
1961 mac_flow_wait(flent
, FLOW_DRIVER_UPCALL
);
1966 * Signal the SRS to quiesce itself, and then cv_wait for the
1967 * SRS quiesce to complete. The SRS worker thread will wake us
1968 * up when the quiesce is complete
1970 mac_srs_signal(srs
, srs_quiesce_flag
);
1971 mac_srs_quiesce_wait(srs
, srs_done_flag
);
1978 mac_rx_srs_remove(mac_soft_ring_set_t
*srs
)
1980 flow_entry_t
*flent
= srs
->srs_flent
;
1983 mac_rx_srs_quiesce(srs
, SRS_CONDEMNED
);
1985 * Locate and remove our entry in the fe_rx_srs[] array, and
1986 * adjust the fe_rx_srs array entries and array count by
1987 * moving the last entry into the vacated spot.
1989 mutex_enter(&flent
->fe_lock
);
1990 for (i
= 0; i
< flent
->fe_rx_srs_cnt
; i
++) {
1991 if (flent
->fe_rx_srs
[i
] == srs
)
1995 ASSERT(i
!= 0 && i
< flent
->fe_rx_srs_cnt
);
1996 if (i
!= flent
->fe_rx_srs_cnt
- 1) {
1997 flent
->fe_rx_srs
[i
] =
1998 flent
->fe_rx_srs
[flent
->fe_rx_srs_cnt
- 1];
1999 i
= flent
->fe_rx_srs_cnt
- 1;
2002 flent
->fe_rx_srs
[i
] = NULL
;
2003 flent
->fe_rx_srs_cnt
--;
2004 mutex_exit(&flent
->fe_lock
);
2010 mac_srs_clear_flag(mac_soft_ring_set_t
*srs
, uint_t flag
)
2012 mutex_enter(&srs
->srs_lock
);
2013 srs
->srs_state
&= ~flag
;
2014 mutex_exit(&srs
->srs_lock
);
2018 mac_rx_srs_restart(mac_soft_ring_set_t
*srs
)
2020 flow_entry_t
*flent
= srs
->srs_flent
;
2023 ASSERT(MAC_PERIM_HELD((mac_handle_t
)FLENT_TO_MIP(flent
)));
2024 ASSERT((srs
->srs_type
& SRST_TX
) == 0);
2027 * This handles a change in the number of SRSs between the quiesce and
2028 * and restart operation of a flow.
2030 if (!SRS_QUIESCED(srs
))
2034 * Signal the SRS to restart itself. Wait for the restart to complete
2035 * Note that we only restart the SRS if it is not marked as
2036 * permanently quiesced.
2038 if (!SRS_QUIESCED_PERMANENT(srs
)) {
2039 mac_srs_signal(srs
, SRS_RESTART
);
2040 mac_srs_quiesce_wait(srs
, SRS_RESTART_DONE
);
2041 mac_srs_clear_flag(srs
, SRS_RESTART_DONE
);
2043 mac_srs_client_poll_restart(srs
->srs_mcip
, srs
);
2046 /* Finally clear the flags to let the packets in */
2049 MAC_RING_UNMARK(mr
, MR_QUIESCE
);
2050 /* In case the ring was stopped, safely restart it */
2051 if (mr
->mr_state
!= MR_INUSE
)
2052 (void) mac_start_ring(mr
);
2054 FLOW_UNMARK(flent
, FE_QUIESCE
);
2059 * Temporary quiesce of a flow and associated Rx SRS.
2060 * Please see block comment above mac_rx_classify_flow_rem.
2064 mac_rx_classify_flow_quiesce(flow_entry_t
*flent
, void *arg
)
2068 for (i
= 0; i
< flent
->fe_rx_srs_cnt
; i
++) {
2069 mac_rx_srs_quiesce((mac_soft_ring_set_t
*)flent
->fe_rx_srs
[i
],
2076 * Restart a flow and associated Rx SRS that has been quiesced temporarily
2077 * Please see block comment above mac_rx_classify_flow_rem
2081 mac_rx_classify_flow_restart(flow_entry_t
*flent
, void *arg
)
2085 for (i
= 0; i
< flent
->fe_rx_srs_cnt
; i
++)
2086 mac_rx_srs_restart((mac_soft_ring_set_t
*)flent
->fe_rx_srs
[i
]);
2092 mac_srs_perm_quiesce(mac_client_handle_t mch
, boolean_t on
)
2094 mac_client_impl_t
*mcip
= (mac_client_impl_t
*)mch
;
2095 flow_entry_t
*flent
= mcip
->mci_flent
;
2096 mac_impl_t
*mip
= mcip
->mci_mip
;
2097 mac_soft_ring_set_t
*mac_srs
;
2100 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
2105 for (i
= 0; i
< flent
->fe_rx_srs_cnt
; i
++) {
2106 mac_srs
= flent
->fe_rx_srs
[i
];
2107 mutex_enter(&mac_srs
->srs_lock
);
2109 mac_srs
->srs_state
|= SRS_QUIESCE_PERM
;
2111 mac_srs
->srs_state
&= ~SRS_QUIESCE_PERM
;
2112 mutex_exit(&mac_srs
->srs_lock
);
2117 mac_rx_client_quiesce(mac_client_handle_t mch
)
2119 mac_client_impl_t
*mcip
= (mac_client_impl_t
*)mch
;
2120 mac_impl_t
*mip
= mcip
->mci_mip
;
2122 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
2124 if (MCIP_DATAPATH_SETUP(mcip
)) {
2125 (void) mac_rx_classify_flow_quiesce(mcip
->mci_flent
,
2127 (void) mac_flow_walk_nolock(mcip
->mci_subflow_tab
,
2128 mac_rx_classify_flow_quiesce
, NULL
);
2133 mac_rx_client_restart(mac_client_handle_t mch
)
2135 mac_client_impl_t
*mcip
= (mac_client_impl_t
*)mch
;
2136 mac_impl_t
*mip
= mcip
->mci_mip
;
2138 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
2140 if (MCIP_DATAPATH_SETUP(mcip
)) {
2141 (void) mac_rx_classify_flow_restart(mcip
->mci_flent
, NULL
);
2142 (void) mac_flow_walk_nolock(mcip
->mci_subflow_tab
,
2143 mac_rx_classify_flow_restart
, NULL
);
2148 * This function only quiesces the Tx SRS and softring worker threads. Callers
2149 * need to make sure that there aren't any mac client threads doing current or
2150 * future transmits in the mac before calling this function.
2153 mac_tx_srs_quiesce(mac_soft_ring_set_t
*srs
, uint_t srs_quiesce_flag
)
2155 mac_client_impl_t
*mcip
= srs
->srs_mcip
;
2157 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mcip
->mci_mip
));
2159 ASSERT(srs
->srs_type
& SRST_TX
);
2160 ASSERT(srs_quiesce_flag
== SRS_CONDEMNED
||
2161 srs_quiesce_flag
== SRS_QUIESCE
);
2164 * Signal the SRS to quiesce itself, and then cv_wait for the
2165 * SRS quiesce to complete. The SRS worker thread will wake us
2166 * up when the quiesce is complete
2168 mac_srs_signal(srs
, srs_quiesce_flag
);
2169 mac_srs_quiesce_wait(srs
, srs_quiesce_flag
== SRS_QUIESCE
?
2170 SRS_QUIESCE_DONE
: SRS_CONDEMNED_DONE
);
2174 mac_tx_srs_restart(mac_soft_ring_set_t
*srs
)
2177 * Resizing the fanout could result in creation of new SRSs.
2178 * They may not necessarily be in the quiesced state in which
2179 * case it need be restarted
2181 if (!SRS_QUIESCED(srs
))
2184 mac_srs_signal(srs
, SRS_RESTART
);
2185 mac_srs_quiesce_wait(srs
, SRS_RESTART_DONE
);
2186 mac_srs_clear_flag(srs
, SRS_RESTART_DONE
);
2190 * Temporary quiesce of a flow and associated Rx SRS.
2191 * Please see block comment above mac_rx_srs_quiesce
2195 mac_tx_flow_quiesce(flow_entry_t
*flent
, void *arg
)
2198 * The fe_tx_srs is null for a subflow on an interface that is
2201 if (flent
->fe_tx_srs
!= NULL
)
2202 mac_tx_srs_quiesce(flent
->fe_tx_srs
, SRS_QUIESCE
);
2208 mac_tx_flow_restart(flow_entry_t
*flent
, void *arg
)
2211 * The fe_tx_srs is null for a subflow on an interface that is
2214 if (flent
->fe_tx_srs
!= NULL
)
2215 mac_tx_srs_restart(flent
->fe_tx_srs
);
2220 i_mac_tx_client_quiesce(mac_client_handle_t mch
, uint_t srs_quiesce_flag
)
2222 mac_client_impl_t
*mcip
= (mac_client_impl_t
*)mch
;
2224 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mcip
->mci_mip
));
2226 mac_tx_client_block(mcip
);
2227 if (MCIP_TX_SRS(mcip
) != NULL
) {
2228 mac_tx_srs_quiesce(MCIP_TX_SRS(mcip
), srs_quiesce_flag
);
2229 (void) mac_flow_walk_nolock(mcip
->mci_subflow_tab
,
2230 mac_tx_flow_quiesce
, NULL
);
2235 mac_tx_client_quiesce(mac_client_handle_t mch
)
2237 i_mac_tx_client_quiesce(mch
, SRS_QUIESCE
);
2241 mac_tx_client_condemn(mac_client_handle_t mch
)
2243 i_mac_tx_client_quiesce(mch
, SRS_CONDEMNED
);
2247 mac_tx_client_restart(mac_client_handle_t mch
)
2249 mac_client_impl_t
*mcip
= (mac_client_impl_t
*)mch
;
2251 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mcip
->mci_mip
));
2253 mac_tx_client_unblock(mcip
);
2254 if (MCIP_TX_SRS(mcip
) != NULL
) {
2255 mac_tx_srs_restart(MCIP_TX_SRS(mcip
));
2256 (void) mac_flow_walk_nolock(mcip
->mci_subflow_tab
,
2257 mac_tx_flow_restart
, NULL
);
2262 mac_tx_client_flush(mac_client_impl_t
*mcip
)
2264 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mcip
->mci_mip
));
2266 mac_tx_client_quiesce((mac_client_handle_t
)mcip
);
2267 mac_tx_client_restart((mac_client_handle_t
)mcip
);
2271 mac_client_quiesce(mac_client_impl_t
*mcip
)
2273 mac_rx_client_quiesce((mac_client_handle_t
)mcip
);
2274 mac_tx_client_quiesce((mac_client_handle_t
)mcip
);
2278 mac_client_restart(mac_client_impl_t
*mcip
)
2280 mac_rx_client_restart((mac_client_handle_t
)mcip
);
2281 mac_tx_client_restart((mac_client_handle_t
)mcip
);
2285 * Allocate a minor number.
2288 mac_minor_hold(boolean_t sleep
)
2293 * Grab a value from the arena.
2295 atomic_inc_32(&minor_count
);
2298 minor
= (uint_t
)id_alloc(minor_ids
);
2300 minor
= (uint_t
)id_alloc_nosleep(minor_ids
);
2303 atomic_dec_32(&minor_count
);
2311 * Release a previously allocated minor number.
2314 mac_minor_rele(minor_t minor
)
2317 * Return the value to the arena.
2319 id_free(minor_ids
, minor
);
2320 atomic_dec_32(&minor_count
);
2324 mac_no_notification(mac_handle_t mh
)
2326 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
2328 return (((mip
->mi_state_flags
& MIS_LEGACY
) != 0) ?
2329 mip
->mi_capab_legacy
.ml_unsup_note
: 0);
2333 * Prevent any new opens of this mac in preparation for unregister
2336 i_mac_disable(mac_impl_t
*mip
)
2338 mac_client_impl_t
*mcip
;
2340 rw_enter(&i_mac_impl_lock
, RW_WRITER
);
2341 if (mip
->mi_state_flags
& MIS_DISABLED
) {
2342 /* Already disabled, return success */
2343 rw_exit(&i_mac_impl_lock
);
2347 * See if there are any other references to this mac_t (e.g., VLAN's).
2348 * If so return failure. If all the other checks below pass, then
2349 * set mi_disabled atomically under the i_mac_impl_lock to prevent
2350 * any new VLAN's from being created or new mac client opens of this
2353 if (mip
->mi_ref
> 0) {
2354 rw_exit(&i_mac_impl_lock
);
2359 * mac clients must delete all multicast groups they join before
2360 * closing. bcast groups are reference counted, the last client
2361 * to delete the group will wait till the group is physically
2362 * deleted. Since all clients have closed this mac end point
2363 * mi_bcast_ngrps must be zero at this point
2365 ASSERT(mip
->mi_bcast_ngrps
== 0);
2368 * Don't let go of this if it has some flows.
2369 * All other code guarantees no flows are added to a disabled
2370 * mac, therefore it is sufficient to check for the flow table
2373 mcip
= mac_primary_client_handle(mip
);
2374 if ((mcip
!= NULL
) && mac_link_has_flows((mac_client_handle_t
)mcip
)) {
2375 rw_exit(&i_mac_impl_lock
);
2379 mip
->mi_state_flags
|= MIS_DISABLED
;
2380 rw_exit(&i_mac_impl_lock
);
2385 mac_disable_nowait(mac_handle_t mh
)
2387 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
2390 if ((err
= i_mac_perim_enter_nowait(mip
)) != 0)
2392 err
= i_mac_disable(mip
);
2393 i_mac_perim_exit(mip
);
2398 mac_disable(mac_handle_t mh
)
2400 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
2403 i_mac_perim_enter(mip
);
2404 err
= i_mac_disable(mip
);
2405 i_mac_perim_exit(mip
);
2408 * Clean up notification thread and wait for it to exit.
2411 i_mac_notify_exit(mip
);
2417 * Called when the MAC instance has a non empty flow table, to de-multiplex
2418 * incoming packets to the right flow.
2419 * The MAC's rw lock is assumed held as a READER.
2423 mac_rx_classify(mac_impl_t
*mip
, mac_resource_handle_t mrh
, mblk_t
*mp
)
2425 flow_entry_t
*flent
= NULL
;
2426 uint_t flags
= FLOW_INBOUND
;
2430 * If the mac is a port of an aggregation, pass FLOW_IGNORE_VLAN
2431 * to mac_flow_lookup() so that the VLAN packets can be successfully
2432 * passed to the non-VLAN aggregation flows.
2434 * Note that there is possibly a race between this and
2435 * mac_unicast_remove/add() and VLAN packets could be incorrectly
2436 * classified to non-VLAN flows of non-aggregation mac clients. These
2437 * VLAN packets will be then filtered out by the mac module.
2439 if ((mip
->mi_state_flags
& MIS_EXCLUSIVE
) != 0)
2440 flags
|= FLOW_IGNORE_VLAN
;
2442 err
= mac_flow_lookup(mip
->mi_flow_tab
, mp
, flags
, &flent
);
2444 /* no registered receive function */
2447 mac_client_impl_t
*mcip
;
2450 * This flent might just be an additional one on the MAC client,
2451 * i.e. for classification purposes (different fdesc), however
2452 * the resources, SRS et. al., are in the mci_flent, so if
2453 * this isn't the mci_flent, we need to get it.
2455 if ((mcip
= flent
->fe_mcip
) != NULL
&&
2456 mcip
->mci_flent
!= flent
) {
2457 FLOW_REFRELE(flent
);
2458 flent
= mcip
->mci_flent
;
2459 FLOW_TRY_REFHOLD(flent
, err
);
2463 (flent
->fe_cb_fn
)(flent
->fe_cb_arg1
, flent
->fe_cb_arg2
, mp
,
2465 FLOW_REFRELE(flent
);
2471 mac_rx_flow(mac_handle_t mh
, mac_resource_handle_t mrh
, mblk_t
*mp_chain
)
2473 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
2474 mblk_t
*bp
, *bp1
, **bpp
, *list
= NULL
;
2477 * We walk the chain and attempt to classify each packet.
2478 * The packets that couldn't be classified will be returned
2479 * back to the caller.
2483 while (bp
!= NULL
) {
2488 if (mac_rx_classify(mip
, mrh
, bp1
) != NULL
) {
2497 mac_tx_flow_srs_wakeup(flow_entry_t
*flent
, void *arg
)
2499 mac_ring_handle_t ring
= arg
;
2501 if (flent
->fe_tx_srs
)
2502 mac_tx_srs_wakeup(flent
->fe_tx_srs
, ring
);
2507 i_mac_tx_srs_notify(mac_impl_t
*mip
, mac_ring_handle_t ring
)
2509 mac_client_impl_t
*cclient
;
2510 mac_soft_ring_set_t
*mac_srs
;
2513 * After grabbing the mi_rw_lock, the list of clients can't change.
2514 * If there are any clients mi_disabled must be B_FALSE and can't
2515 * get set since there are clients. If there aren't any clients we
2516 * don't do anything. In any case the mip has to be valid. The driver
2517 * must make sure that it goes single threaded (with respect to mac
2518 * calls) and wait for all pending mac calls to finish before calling
2521 rw_enter(&i_mac_impl_lock
, RW_READER
);
2522 if (mip
->mi_state_flags
& MIS_DISABLED
) {
2523 rw_exit(&i_mac_impl_lock
);
2528 * Get MAC tx srs from walking mac_client_handle list.
2530 rw_enter(&mip
->mi_rw_lock
, RW_READER
);
2531 for (cclient
= mip
->mi_clients_list
; cclient
!= NULL
;
2532 cclient
= cclient
->mci_client_next
) {
2533 if ((mac_srs
= MCIP_TX_SRS(cclient
)) != NULL
) {
2534 mac_tx_srs_wakeup(mac_srs
, ring
);
2537 * Aggr opens underlying ports in exclusive mode
2538 * and registers flow control callbacks using
2539 * mac_tx_client_notify(). When opened in
2540 * exclusive mode, Tx SRS won't be created
2541 * during mac_unicast_add().
2543 if (cclient
->mci_state_flags
& MCIS_EXCLUSIVE
) {
2544 mac_tx_invoke_callbacks(cclient
,
2545 (mac_tx_cookie_t
)ring
);
2548 (void) mac_flow_walk(cclient
->mci_subflow_tab
,
2549 mac_tx_flow_srs_wakeup
, ring
);
2551 rw_exit(&mip
->mi_rw_lock
);
2552 rw_exit(&i_mac_impl_lock
);
2557 mac_multicast_refresh(mac_handle_t mh
, mac_multicst_t refresh
, void *arg
,
2560 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
2562 i_mac_perim_enter((mac_impl_t
*)mh
);
2564 * If no specific refresh function was given then default to the
2565 * driver's m_multicst entry point.
2567 if (refresh
== NULL
) {
2568 refresh
= mip
->mi_multicst
;
2569 arg
= mip
->mi_driver
;
2572 mac_bcast_refresh(mip
, refresh
, arg
, add
);
2573 i_mac_perim_exit((mac_impl_t
*)mh
);
2577 mac_promisc_refresh(mac_handle_t mh
, mac_setpromisc_t refresh
, void *arg
)
2579 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
2582 * If no specific refresh function was given then default to the
2583 * driver's m_promisc entry point.
2585 if (refresh
== NULL
) {
2586 refresh
= mip
->mi_setpromisc
;
2587 arg
= mip
->mi_driver
;
2589 ASSERT(refresh
!= NULL
);
2592 * Call the refresh function with the current promiscuity.
2594 refresh(arg
, (mip
->mi_devpromisc
!= 0));
2598 * The mac client requests that the mac not to change its margin size to
2599 * be less than the specified value. If "current" is B_TRUE, then the client
2600 * requests the mac not to change its margin size to be smaller than the
2601 * current size. Further, return the current margin size value in this case.
2603 * We keep every requested size in an ordered list from largest to smallest.
2606 mac_margin_add(mac_handle_t mh
, uint32_t *marginp
, boolean_t current
)
2608 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
2609 mac_margin_req_t
**pp
, *p
;
2612 rw_enter(&(mip
->mi_rw_lock
), RW_WRITER
);
2614 *marginp
= mip
->mi_margin
;
2617 * If the current margin value cannot satisfy the margin requested,
2618 * return ENOTSUP directly.
2620 if (*marginp
> mip
->mi_margin
) {
2626 * Check whether the given margin is already in the list. If so,
2627 * bump the reference count.
2629 for (pp
= &mip
->mi_mmrp
; (p
= *pp
) != NULL
; pp
= &p
->mmr_nextp
) {
2630 if (p
->mmr_margin
== *marginp
) {
2632 * The margin requested is already in the list,
2633 * so just bump the reference count.
2638 if (p
->mmr_margin
< *marginp
)
2643 p
= kmem_zalloc(sizeof (mac_margin_req_t
), KM_SLEEP
);
2644 p
->mmr_margin
= *marginp
;
2650 rw_exit(&(mip
->mi_rw_lock
));
2655 * The mac client requests to cancel its previous mac_margin_add() request.
2656 * We remove the requested margin size from the list.
2659 mac_margin_remove(mac_handle_t mh
, uint32_t margin
)
2661 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
2662 mac_margin_req_t
**pp
, *p
;
2665 rw_enter(&(mip
->mi_rw_lock
), RW_WRITER
);
2667 * Find the entry in the list for the given margin.
2669 for (pp
= &(mip
->mi_mmrp
); (p
= *pp
) != NULL
; pp
= &(p
->mmr_nextp
)) {
2670 if (p
->mmr_margin
== margin
) {
2671 if (--p
->mmr_ref
== 0)
2675 * There is still a reference to this address so
2676 * there's nothing more to do.
2683 * We did not find an entry for the given margin.
2690 ASSERT(p
->mmr_ref
== 0);
2693 * Remove it from the list.
2696 kmem_free(p
, sizeof (mac_margin_req_t
));
2698 rw_exit(&(mip
->mi_rw_lock
));
2703 mac_margin_update(mac_handle_t mh
, uint32_t margin
)
2705 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
2706 uint32_t margin_needed
= 0;
2708 rw_enter(&(mip
->mi_rw_lock
), RW_WRITER
);
2710 if (mip
->mi_mmrp
!= NULL
)
2711 margin_needed
= mip
->mi_mmrp
->mmr_margin
;
2713 if (margin_needed
<= margin
)
2714 mip
->mi_margin
= margin
;
2716 rw_exit(&(mip
->mi_rw_lock
));
2718 if (margin_needed
<= margin
)
2719 i_mac_notify(mip
, MAC_NOTE_MARGIN
);
2721 return (margin_needed
<= margin
);
2725 * MAC clients use this interface to request that a MAC device not change its
2726 * MTU below the specified amount. At this time, that amount must be within the
2727 * range of the device's current minimum and the device's current maximum. eg. a
2728 * client cannot request a 3000 byte MTU when the device's MTU is currently
2731 * If "current" is set to B_TRUE, then the request is to simply to reserve the
2732 * current underlying mac's maximum for this mac client and return it in mtup.
2735 mac_mtu_add(mac_handle_t mh
, uint32_t *mtup
, boolean_t current
)
2737 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
2738 mac_mtu_req_t
*prev
, *cur
;
2739 mac_propval_range_t mpr
;
2742 i_mac_perim_enter(mip
);
2743 rw_enter(&mip
->mi_rw_lock
, RW_WRITER
);
2745 if (current
== B_TRUE
)
2746 *mtup
= mip
->mi_sdu_max
;
2748 err
= mac_prop_info(mh
, MAC_PROP_MTU
, "mtu", NULL
, 0, &mpr
, NULL
);
2750 rw_exit(&mip
->mi_rw_lock
);
2751 i_mac_perim_exit(mip
);
2755 if (*mtup
> mip
->mi_sdu_max
||
2756 *mtup
< mpr
.mpr_range_uint32
[0].mpur_min
) {
2757 rw_exit(&mip
->mi_rw_lock
);
2758 i_mac_perim_exit(mip
);
2763 for (cur
= mip
->mi_mtrp
; cur
!= NULL
; cur
= cur
->mtr_nextp
) {
2764 if (*mtup
== cur
->mtr_mtu
) {
2766 rw_exit(&mip
->mi_rw_lock
);
2767 i_mac_perim_exit(mip
);
2771 if (*mtup
> cur
->mtr_mtu
)
2777 cur
= kmem_alloc(sizeof (mac_mtu_req_t
), KM_SLEEP
);
2778 cur
->mtr_mtu
= *mtup
;
2781 cur
->mtr_nextp
= prev
->mtr_nextp
;
2782 prev
->mtr_nextp
= cur
;
2784 cur
->mtr_nextp
= mip
->mi_mtrp
;
2788 rw_exit(&mip
->mi_rw_lock
);
2789 i_mac_perim_exit(mip
);
2794 mac_mtu_remove(mac_handle_t mh
, uint32_t mtu
)
2796 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
2797 mac_mtu_req_t
*cur
, *prev
;
2799 i_mac_perim_enter(mip
);
2800 rw_enter(&mip
->mi_rw_lock
, RW_WRITER
);
2803 for (cur
= mip
->mi_mtrp
; cur
!= NULL
; cur
= cur
->mtr_nextp
) {
2804 if (cur
->mtr_mtu
== mtu
) {
2805 ASSERT(cur
->mtr_ref
> 0);
2807 if (cur
->mtr_ref
== 0) {
2809 mip
->mi_mtrp
= cur
->mtr_nextp
;
2811 prev
->mtr_nextp
= cur
->mtr_nextp
;
2813 kmem_free(cur
, sizeof (mac_mtu_req_t
));
2815 rw_exit(&mip
->mi_rw_lock
);
2816 i_mac_perim_exit(mip
);
2823 rw_exit(&mip
->mi_rw_lock
);
2824 i_mac_perim_exit(mip
);
2829 * MAC Type Plugin functions.
2833 mactype_getplugin(const char *pname
)
2835 mactype_t
*mtype
= NULL
;
2836 boolean_t tried_modload
= B_FALSE
;
2838 mutex_enter(&i_mactype_lock
);
2840 find_registered_mactype
:
2841 if (mod_hash_find(i_mactype_hash
, (mod_hash_key_t
)pname
,
2842 (mod_hash_val_t
*)&mtype
) != 0) {
2843 if (!tried_modload
) {
2845 * If the plugin has not yet been loaded, then
2846 * attempt to load it now. If modload() succeeds,
2847 * the plugin should have registered using
2848 * mactype_register(), in which case we can go back
2849 * and attempt to find it again.
2851 if (modload(MACTYPE_KMODDIR
, (char *)pname
) != -1) {
2852 tried_modload
= B_TRUE
;
2853 goto find_registered_mactype
;
2858 * Note that there's no danger that the plugin we've loaded
2859 * could be unloaded between the modload() step and the
2860 * reference count bump here, as we're holding
2861 * i_mactype_lock, which mactype_unregister() also holds.
2863 atomic_inc_32(&mtype
->mt_ref
);
2866 mutex_exit(&i_mactype_lock
);
2870 mactype_register_t
*
2871 mactype_alloc(uint_t mactype_version
)
2873 mactype_register_t
*mtrp
;
2876 * Make sure there isn't a version mismatch between the plugin and
2877 * the framework. In the future, if multiple versions are
2878 * supported, this check could become more sophisticated.
2880 if (mactype_version
!= MACTYPE_VERSION
)
2883 mtrp
= kmem_zalloc(sizeof (mactype_register_t
), KM_SLEEP
);
2884 mtrp
->mtr_version
= mactype_version
;
2889 mactype_free(mactype_register_t
*mtrp
)
2891 kmem_free(mtrp
, sizeof (mactype_register_t
));
2895 mactype_register(mactype_register_t
*mtrp
)
2898 mactype_ops_t
*ops
= mtrp
->mtr_ops
;
2900 /* Do some sanity checking before we register this MAC type. */
2901 if (mtrp
->mtr_ident
== NULL
|| ops
== NULL
)
2905 * Verify that all mandatory callbacks are set in the ops
2908 if (ops
->mtops_unicst_verify
== NULL
||
2909 ops
->mtops_multicst_verify
== NULL
||
2910 ops
->mtops_sap_verify
== NULL
||
2911 ops
->mtops_header
== NULL
||
2912 ops
->mtops_header_info
== NULL
) {
2916 mtp
= kmem_zalloc(sizeof (*mtp
), KM_SLEEP
);
2917 mtp
->mt_ident
= mtrp
->mtr_ident
;
2919 mtp
->mt_type
= mtrp
->mtr_mactype
;
2920 mtp
->mt_nativetype
= mtrp
->mtr_nativetype
;
2921 mtp
->mt_addr_length
= mtrp
->mtr_addrlen
;
2922 if (mtrp
->mtr_brdcst_addr
!= NULL
) {
2923 mtp
->mt_brdcst_addr
= kmem_alloc(mtrp
->mtr_addrlen
, KM_SLEEP
);
2924 bcopy(mtrp
->mtr_brdcst_addr
, mtp
->mt_brdcst_addr
,
2928 mtp
->mt_stats
= mtrp
->mtr_stats
;
2929 mtp
->mt_statcount
= mtrp
->mtr_statcount
;
2931 mtp
->mt_mapping
= mtrp
->mtr_mapping
;
2932 mtp
->mt_mappingcount
= mtrp
->mtr_mappingcount
;
2934 if (mod_hash_insert(i_mactype_hash
,
2935 (mod_hash_key_t
)mtp
->mt_ident
, (mod_hash_val_t
)mtp
) != 0) {
2936 kmem_free(mtp
->mt_brdcst_addr
, mtp
->mt_addr_length
);
2937 kmem_free(mtp
, sizeof (*mtp
));
2944 mactype_unregister(const char *ident
)
2951 * Let's not allow MAC drivers to use this plugin while we're
2952 * trying to unregister it. Holding i_mactype_lock also prevents a
2953 * plugin from unregistering while a MAC driver is attempting to
2954 * hold a reference to it in i_mactype_getplugin().
2956 mutex_enter(&i_mactype_lock
);
2958 if ((err
= mod_hash_find(i_mactype_hash
, (mod_hash_key_t
)ident
,
2959 (mod_hash_val_t
*)&mtp
)) != 0) {
2960 /* A plugin is trying to unregister, but it never registered. */
2965 if (mtp
->mt_ref
!= 0) {
2970 err
= mod_hash_remove(i_mactype_hash
, (mod_hash_key_t
)ident
, &val
);
2973 /* This should never happen, thus the ASSERT() above. */
2977 ASSERT(mtp
== (mactype_t
*)val
);
2979 if (mtp
->mt_brdcst_addr
!= NULL
)
2980 kmem_free(mtp
->mt_brdcst_addr
, mtp
->mt_addr_length
);
2981 kmem_free(mtp
, sizeof (mactype_t
));
2983 mutex_exit(&i_mactype_lock
);
2988 * Checks the size of the value size specified for a property as
2989 * part of a property operation. Returns B_TRUE if the size is
2990 * correct, B_FALSE otherwise.
2993 mac_prop_check_size(mac_prop_id_t id
, uint_t valsize
, boolean_t is_range
)
2998 return (valsize
>= sizeof (mac_propval_range_t
));
3002 minsize
= sizeof (dld_ioc_zid_t
);
3004 case MAC_PROP_AUTOPUSH
:
3006 minsize
= sizeof (struct dlautopush
);
3008 case MAC_PROP_TAGMODE
:
3009 minsize
= sizeof (link_tagmode_t
);
3011 case MAC_PROP_RESOURCE
:
3012 case MAC_PROP_RESOURCE_EFF
:
3013 minsize
= sizeof (mac_resource_props_t
);
3015 case MAC_PROP_DUPLEX
:
3016 minsize
= sizeof (link_duplex_t
);
3018 case MAC_PROP_SPEED
:
3019 minsize
= sizeof (uint64_t);
3021 case MAC_PROP_STATUS
:
3022 minsize
= sizeof (link_state_t
);
3024 case MAC_PROP_AUTONEG
:
3025 case MAC_PROP_EN_AUTONEG
:
3026 minsize
= sizeof (uint8_t);
3029 case MAC_PROP_LLIMIT
:
3030 case MAC_PROP_LDECAY
:
3031 minsize
= sizeof (uint32_t);
3033 case MAC_PROP_FLOWCTRL
:
3034 minsize
= sizeof (link_flowctrl_t
);
3036 case MAC_PROP_ADV_5000FDX_CAP
:
3037 case MAC_PROP_EN_5000FDX_CAP
:
3038 case MAC_PROP_ADV_2500FDX_CAP
:
3039 case MAC_PROP_EN_2500FDX_CAP
:
3040 case MAC_PROP_ADV_100GFDX_CAP
:
3041 case MAC_PROP_EN_100GFDX_CAP
:
3042 case MAC_PROP_ADV_50GFDX_CAP
:
3043 case MAC_PROP_EN_50GFDX_CAP
:
3044 case MAC_PROP_ADV_40GFDX_CAP
:
3045 case MAC_PROP_EN_40GFDX_CAP
:
3046 case MAC_PROP_ADV_25GFDX_CAP
:
3047 case MAC_PROP_EN_25GFDX_CAP
:
3048 case MAC_PROP_ADV_10GFDX_CAP
:
3049 case MAC_PROP_EN_10GFDX_CAP
:
3050 case MAC_PROP_ADV_1000HDX_CAP
:
3051 case MAC_PROP_EN_1000HDX_CAP
:
3052 case MAC_PROP_ADV_100FDX_CAP
:
3053 case MAC_PROP_EN_100FDX_CAP
:
3054 case MAC_PROP_ADV_100HDX_CAP
:
3055 case MAC_PROP_EN_100HDX_CAP
:
3056 case MAC_PROP_ADV_10FDX_CAP
:
3057 case MAC_PROP_EN_10FDX_CAP
:
3058 case MAC_PROP_ADV_10HDX_CAP
:
3059 case MAC_PROP_EN_10HDX_CAP
:
3060 case MAC_PROP_ADV_100T4_CAP
:
3061 case MAC_PROP_EN_100T4_CAP
:
3062 minsize
= sizeof (uint8_t);
3065 minsize
= sizeof (uint16_t);
3067 case MAC_PROP_IPTUN_HOPLIMIT
:
3068 minsize
= sizeof (uint32_t);
3070 case MAC_PROP_IPTUN_ENCAPLIMIT
:
3071 minsize
= sizeof (uint32_t);
3073 case MAC_PROP_MAX_TX_RINGS_AVAIL
:
3074 case MAC_PROP_MAX_RX_RINGS_AVAIL
:
3075 case MAC_PROP_MAX_RXHWCLNT_AVAIL
:
3076 case MAC_PROP_MAX_TXHWCLNT_AVAIL
:
3077 minsize
= sizeof (uint_t
);
3079 case MAC_PROP_WL_ESSID
:
3080 minsize
= sizeof (wl_linkstatus_t
);
3082 case MAC_PROP_WL_BSSID
:
3083 minsize
= sizeof (wl_bssid_t
);
3085 case MAC_PROP_WL_BSSTYPE
:
3086 minsize
= sizeof (wl_bss_type_t
);
3088 case MAC_PROP_WL_LINKSTATUS
:
3089 minsize
= sizeof (wl_linkstatus_t
);
3091 case MAC_PROP_WL_DESIRED_RATES
:
3092 minsize
= sizeof (wl_rates_t
);
3094 case MAC_PROP_WL_SUPPORTED_RATES
:
3095 minsize
= sizeof (wl_rates_t
);
3097 case MAC_PROP_WL_AUTH_MODE
:
3098 minsize
= sizeof (wl_authmode_t
);
3100 case MAC_PROP_WL_ENCRYPTION
:
3101 minsize
= sizeof (wl_encryption_t
);
3103 case MAC_PROP_WL_RSSI
:
3104 minsize
= sizeof (wl_rssi_t
);
3106 case MAC_PROP_WL_PHY_CONFIG
:
3107 minsize
= sizeof (wl_phy_conf_t
);
3109 case MAC_PROP_WL_CAPABILITY
:
3110 minsize
= sizeof (wl_capability_t
);
3112 case MAC_PROP_WL_WPA
:
3113 minsize
= sizeof (wl_wpa_t
);
3115 case MAC_PROP_WL_SCANRESULTS
:
3116 minsize
= sizeof (wl_wpa_ess_t
);
3118 case MAC_PROP_WL_POWER_MODE
:
3119 minsize
= sizeof (wl_ps_mode_t
);
3121 case MAC_PROP_WL_RADIO
:
3122 minsize
= sizeof (wl_radio_t
);
3124 case MAC_PROP_WL_ESS_LIST
:
3125 minsize
= sizeof (wl_ess_list_t
);
3127 case MAC_PROP_WL_KEY_TAB
:
3128 minsize
= sizeof (wl_wep_key_tab_t
);
3130 case MAC_PROP_WL_CREATE_IBSS
:
3131 minsize
= sizeof (wl_create_ibss_t
);
3133 case MAC_PROP_WL_SETOPTIE
:
3134 minsize
= sizeof (wl_wpa_ie_t
);
3136 case MAC_PROP_WL_DELKEY
:
3137 minsize
= sizeof (wl_del_key_t
);
3139 case MAC_PROP_WL_KEY
:
3140 minsize
= sizeof (wl_key_t
);
3142 case MAC_PROP_WL_MLME
:
3143 minsize
= sizeof (wl_mlme_t
);
3145 case MAC_PROP_VN_PROMISC_FILTERED
:
3146 minsize
= sizeof (boolean_t
);
3150 return (valsize
>= minsize
);
3154 * mac_set_prop() sets MAC or hardware driver properties:
3156 * - MAC-managed properties such as resource properties include maxbw,
3157 * priority, and cpu binding list, as well as the default port VID
3158 * used by bridging. These properties are consumed by the MAC layer
3159 * itself and not passed down to the driver. For resource control
3160 * properties, this function invokes mac_set_resources() which will
3161 * cache the property value in mac_impl_t and may call
3162 * mac_client_set_resource() to update property value of the primary
3163 * mac client, if it exists.
3165 * - Properties which act on the hardware and must be passed to the
3166 * driver, such as MTU, through the driver's mc_setprop() entry point.
3169 mac_set_prop(mac_handle_t mh
, mac_prop_id_t id
, char *name
, void *val
,
3173 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
3175 ASSERT(MAC_PERIM_HELD(mh
));
3178 case MAC_PROP_RESOURCE
: {
3179 mac_resource_props_t
*mrp
;
3181 /* call mac_set_resources() for MAC properties */
3182 ASSERT(valsize
>= sizeof (mac_resource_props_t
));
3183 mrp
= kmem_zalloc(sizeof (*mrp
), KM_SLEEP
);
3184 bcopy(val
, mrp
, sizeof (*mrp
));
3185 err
= mac_set_resources(mh
, mrp
);
3186 kmem_free(mrp
, sizeof (*mrp
));
3191 ASSERT(valsize
>= sizeof (uint16_t));
3192 if (mip
->mi_state_flags
& MIS_IS_VNIC
)
3194 err
= mac_set_pvid(mh
, *(uint16_t *)val
);
3197 case MAC_PROP_MTU
: {
3200 ASSERT(valsize
>= sizeof (uint32_t));
3201 bcopy(val
, &mtu
, sizeof (mtu
));
3202 err
= mac_set_mtu(mh
, mtu
, NULL
);
3206 case MAC_PROP_LLIMIT
:
3207 case MAC_PROP_LDECAY
: {
3210 if (valsize
< sizeof (learnval
) ||
3211 (mip
->mi_state_flags
& MIS_IS_VNIC
))
3213 bcopy(val
, &learnval
, sizeof (learnval
));
3214 if (learnval
== 0 && id
== MAC_PROP_LDECAY
)
3216 if (id
== MAC_PROP_LLIMIT
)
3217 mip
->mi_llimit
= learnval
;
3219 mip
->mi_ldecay
= learnval
;
3225 /* For other driver properties, call driver's callback */
3226 if (mip
->mi_callbacks
->mc_callbacks
& MC_SETPROP
) {
3227 err
= mip
->mi_callbacks
->mc_setprop(mip
->mi_driver
,
3228 name
, id
, valsize
, val
);
3235 * mac_get_prop() gets MAC or device driver properties.
3237 * If the property is a driver property, mac_get_prop() calls driver's callback
3238 * entry point to get it.
3239 * If the property is a MAC property, mac_get_prop() invokes mac_get_resources()
3240 * which returns the cached value in mac_impl_t.
3243 mac_get_prop(mac_handle_t mh
, mac_prop_id_t id
, char *name
, void *val
,
3247 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
3251 bzero(val
, valsize
);
3254 case MAC_PROP_RESOURCE
: {
3255 mac_resource_props_t
*mrp
;
3257 /* If mac property, read from cache */
3258 ASSERT(valsize
>= sizeof (mac_resource_props_t
));
3259 mrp
= kmem_zalloc(sizeof (*mrp
), KM_SLEEP
);
3260 mac_get_resources(mh
, mrp
);
3261 bcopy(mrp
, val
, sizeof (*mrp
));
3262 kmem_free(mrp
, sizeof (*mrp
));
3265 case MAC_PROP_RESOURCE_EFF
: {
3266 mac_resource_props_t
*mrp
;
3268 /* If mac effective property, read from client */
3269 ASSERT(valsize
>= sizeof (mac_resource_props_t
));
3270 mrp
= kmem_zalloc(sizeof (*mrp
), KM_SLEEP
);
3271 mac_get_effective_resources(mh
, mrp
);
3272 bcopy(mrp
, val
, sizeof (*mrp
));
3273 kmem_free(mrp
, sizeof (*mrp
));
3278 ASSERT(valsize
>= sizeof (uint16_t));
3279 if (mip
->mi_state_flags
& MIS_IS_VNIC
)
3281 *(uint16_t *)val
= mac_get_pvid(mh
);
3284 case MAC_PROP_LLIMIT
:
3285 case MAC_PROP_LDECAY
:
3286 ASSERT(valsize
>= sizeof (uint32_t));
3287 if (mip
->mi_state_flags
& MIS_IS_VNIC
)
3289 if (id
== MAC_PROP_LLIMIT
)
3290 bcopy(&mip
->mi_llimit
, val
, sizeof (mip
->mi_llimit
));
3292 bcopy(&mip
->mi_ldecay
, val
, sizeof (mip
->mi_ldecay
));
3295 case MAC_PROP_MTU
: {
3298 ASSERT(valsize
>= sizeof (uint32_t));
3299 mac_sdu_get2(mh
, NULL
, &sdu
, NULL
);
3300 bcopy(&sdu
, val
, sizeof (sdu
));
3304 case MAC_PROP_STATUS
: {
3305 link_state_t link_state
;
3307 if (valsize
< sizeof (link_state
))
3309 link_state
= mac_link_get(mh
);
3310 bcopy(&link_state
, val
, sizeof (link_state
));
3315 case MAC_PROP_MAX_RX_RINGS_AVAIL
:
3316 case MAC_PROP_MAX_TX_RINGS_AVAIL
:
3317 ASSERT(valsize
>= sizeof (uint_t
));
3318 rings
= id
== MAC_PROP_MAX_RX_RINGS_AVAIL
?
3319 mac_rxavail_get(mh
) : mac_txavail_get(mh
);
3320 bcopy(&rings
, val
, sizeof (uint_t
));
3323 case MAC_PROP_MAX_RXHWCLNT_AVAIL
:
3324 case MAC_PROP_MAX_TXHWCLNT_AVAIL
:
3325 ASSERT(valsize
>= sizeof (uint_t
));
3326 vlinks
= id
== MAC_PROP_MAX_RXHWCLNT_AVAIL
?
3327 mac_rxhwlnksavail_get(mh
) : mac_txhwlnksavail_get(mh
);
3328 bcopy(&vlinks
, val
, sizeof (uint_t
));
3331 case MAC_PROP_RXRINGSRANGE
:
3332 case MAC_PROP_TXRINGSRANGE
:
3334 * The value for these properties are returned through
3335 * the MAC_PROP_RESOURCE property.
3344 /* If driver property, request from driver */
3345 if (mip
->mi_callbacks
->mc_callbacks
& MC_GETPROP
) {
3346 err
= mip
->mi_callbacks
->mc_getprop(mip
->mi_driver
, name
, id
,
3354 * Helper function to initialize the range structure for use in
3355 * mac_get_prop. If the type can be other than uint32, we can
3356 * pass that as an arg.
3359 _mac_set_range(mac_propval_range_t
*range
, uint32_t min
, uint32_t max
)
3361 range
->mpr_count
= 1;
3362 range
->mpr_type
= MAC_PROPVAL_UINT32
;
3363 range
->mpr_range_uint32
[0].mpur_min
= min
;
3364 range
->mpr_range_uint32
[0].mpur_max
= max
;
3368 * Returns information about the specified property, such as default
3369 * values or permissions.
3372 mac_prop_info(mac_handle_t mh
, mac_prop_id_t id
, char *name
,
3373 void *default_val
, uint_t default_size
, mac_propval_range_t
*range
,
3376 mac_prop_info_state_t state
;
3377 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
3381 * A property is read/write by default unless the driver says
3385 *perm
= MAC_PROP_PERM_RW
;
3387 if (default_val
!= NULL
)
3388 bzero(default_val
, default_size
);
3391 * First, handle framework properties for which we don't need to
3392 * involve the driver.
3395 case MAC_PROP_RESOURCE
:
3397 case MAC_PROP_LLIMIT
:
3398 case MAC_PROP_LDECAY
:
3401 case MAC_PROP_MAX_RX_RINGS_AVAIL
:
3402 case MAC_PROP_MAX_TX_RINGS_AVAIL
:
3403 case MAC_PROP_MAX_RXHWCLNT_AVAIL
:
3404 case MAC_PROP_MAX_TXHWCLNT_AVAIL
:
3406 *perm
= MAC_PROP_PERM_READ
;
3409 case MAC_PROP_RXRINGSRANGE
:
3410 case MAC_PROP_TXRINGSRANGE
:
3412 * Currently, we support range for RX and TX rings properties.
3413 * When we extend this support to maxbw, cpus and priority,
3414 * we should move this to mac_get_resources.
3415 * There is no default value for RX or TX rings.
3417 if ((mip
->mi_state_flags
& MIS_IS_VNIC
) &&
3418 mac_is_vnic_primary(mh
)) {
3420 * We don't support setting rings for a VLAN
3421 * data link because it shares its ring with the
3422 * primary MAC client.
3425 *perm
= MAC_PROP_PERM_READ
;
3427 range
->mpr_count
= 0;
3428 } else if (range
!= NULL
) {
3429 if (mip
->mi_state_flags
& MIS_IS_VNIC
)
3430 mh
= mac_get_lower_mac_handle(mh
);
3431 mip
= (mac_impl_t
*)mh
;
3432 if ((id
== MAC_PROP_RXRINGSRANGE
&&
3433 mip
->mi_rx_group_type
== MAC_GROUP_TYPE_STATIC
) ||
3434 (id
== MAC_PROP_TXRINGSRANGE
&&
3435 mip
->mi_tx_group_type
== MAC_GROUP_TYPE_STATIC
)) {
3436 if (id
== MAC_PROP_RXRINGSRANGE
) {
3437 if ((mac_rxhwlnksavail_get(mh
) +
3438 mac_rxhwlnksrsvd_get(mh
)) <= 1) {
3440 * doesn't support groups or
3443 range
->mpr_count
= 0;
3446 * supports specifying groups,
3449 _mac_set_range(range
, 0, 0);
3452 if ((mac_txhwlnksavail_get(mh
) +
3453 mac_txhwlnksrsvd_get(mh
)) <= 1) {
3455 * doesn't support groups or
3458 range
->mpr_count
= 0;
3461 * supports specifying groups,
3464 _mac_set_range(range
, 0, 0);
3468 max
= id
== MAC_PROP_RXRINGSRANGE
?
3469 mac_rxavail_get(mh
) + mac_rxrsvd_get(mh
) :
3470 mac_txavail_get(mh
) + mac_txrsvd_get(mh
);
3473 * doesn't support groups or
3476 range
->mpr_count
= 0;
3479 * -1 because we have to leave out the
3482 _mac_set_range(range
, 1, max
- 1);
3488 case MAC_PROP_STATUS
:
3490 *perm
= MAC_PROP_PERM_READ
;
3495 * Get the property info from the driver if it implements the
3496 * property info entry point.
3498 bzero(&state
, sizeof (state
));
3500 if (mip
->mi_callbacks
->mc_callbacks
& MC_PROPINFO
) {
3501 state
.pr_default
= default_val
;
3502 state
.pr_default_size
= default_size
;
3505 * The caller specifies the maximum number of ranges
3506 * it can accomodate using mpr_count. We don't touch
3507 * this value until the driver returns from its
3508 * mc_propinfo() callback, and ensure we don't exceed
3509 * this number of range as the driver defines
3510 * supported range from its mc_propinfo().
3512 * pr_range_cur_count keeps track of how many ranges
3513 * were defined by the driver from its mc_propinfo()
3516 * On exit, the user-specified range mpr_count returns
3517 * the number of ranges specified by the driver on
3518 * success, or the number of ranges it wanted to
3519 * define if that number of ranges could not be
3520 * accomodated by the specified range structure. In
3521 * the latter case, the caller will be able to
3522 * allocate a larger range structure, and query the
3525 state
.pr_range_cur_count
= 0;
3526 state
.pr_range
= range
;
3528 mip
->mi_callbacks
->mc_propinfo(mip
->mi_driver
, name
, id
,
3529 (mac_prop_info_handle_t
)&state
);
3531 if (state
.pr_flags
& MAC_PROP_INFO_RANGE
)
3532 range
->mpr_count
= state
.pr_range_cur_count
;
3535 * The operation could fail if the buffer supplied by
3536 * the user was too small for the range or default
3537 * value of the property.
3539 if (state
.pr_errno
!= 0)
3540 return (state
.pr_errno
);
3542 if (perm
!= NULL
&& state
.pr_flags
& MAC_PROP_INFO_PERM
)
3543 *perm
= state
.pr_perm
;
3547 * The MAC layer may want to provide default values or allowed
3548 * ranges for properties if the driver does not provide a
3549 * property info entry point, or that entry point exists, but
3550 * it did not provide a default value or allowed ranges for
3554 case MAC_PROP_MTU
: {
3557 mac_sdu_get2(mh
, NULL
, &sdu
, NULL
);
3559 if (range
!= NULL
&& !(state
.pr_flags
&
3560 MAC_PROP_INFO_RANGE
)) {
3562 _mac_set_range(range
, sdu
, sdu
);
3565 if (default_val
!= NULL
&& !(state
.pr_flags
&
3566 MAC_PROP_INFO_DEFAULT
)) {
3567 if (mip
->mi_info
.mi_media
== DL_ETHER
)
3569 /* default MTU value */
3570 bcopy(&sdu
, default_val
, sizeof (sdu
));
3579 mac_fastpath_disable(mac_handle_t mh
)
3581 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
3583 if ((mip
->mi_state_flags
& MIS_LEGACY
) == 0)
3586 return (mip
->mi_capab_legacy
.ml_fastpath_disable(mip
->mi_driver
));
3590 mac_fastpath_enable(mac_handle_t mh
)
3592 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
3594 if ((mip
->mi_state_flags
& MIS_LEGACY
) == 0)
3597 mip
->mi_capab_legacy
.ml_fastpath_enable(mip
->mi_driver
);
3601 mac_register_priv_prop(mac_impl_t
*mip
, char **priv_props
)
3605 if (priv_props
== NULL
)
3609 while (priv_props
[nprops
] != NULL
)
3615 mip
->mi_priv_prop
= kmem_zalloc(nprops
* sizeof (char *), KM_SLEEP
);
3617 for (i
= 0; i
< nprops
; i
++) {
3618 mip
->mi_priv_prop
[i
] = kmem_zalloc(MAXLINKPROPNAME
, KM_SLEEP
);
3619 (void) strlcpy(mip
->mi_priv_prop
[i
], priv_props
[i
],
3623 mip
->mi_priv_prop_count
= nprops
;
3627 mac_unregister_priv_prop(mac_impl_t
*mip
)
3631 if (mip
->mi_priv_prop_count
== 0) {
3632 ASSERT(mip
->mi_priv_prop
== NULL
);
3636 for (i
= 0; i
< mip
->mi_priv_prop_count
; i
++)
3637 kmem_free(mip
->mi_priv_prop
[i
], MAXLINKPROPNAME
);
3638 kmem_free(mip
->mi_priv_prop
, mip
->mi_priv_prop_count
*
3641 mip
->mi_priv_prop
= NULL
;
3642 mip
->mi_priv_prop_count
= 0;
3646 * mac_ring_t 'mr' macros. Some rogue drivers may access ring structure
3647 * (by invoking mac_rx()) even after processing mac_stop_ring(). In such
3648 * cases if MAC free's the ring structure after mac_stop_ring(), any
3649 * illegal access to the ring structure coming from the driver will panic
3650 * the system. In order to protect the system from such inadverent access,
3651 * we maintain a cache of rings in the mac_impl_t after they get free'd up.
3652 * When packets are received on free'd up rings, MAC (through the generation
3653 * count mechanism) will drop such packets.
3656 mac_ring_alloc(mac_impl_t
*mip
)
3660 mutex_enter(&mip
->mi_ring_lock
);
3661 if (mip
->mi_ring_freelist
!= NULL
) {
3662 ring
= mip
->mi_ring_freelist
;
3663 mip
->mi_ring_freelist
= ring
->mr_next
;
3664 bzero(ring
, sizeof (mac_ring_t
));
3665 mutex_exit(&mip
->mi_ring_lock
);
3667 mutex_exit(&mip
->mi_ring_lock
);
3668 ring
= kmem_cache_alloc(mac_ring_cache
, KM_SLEEP
);
3670 ASSERT((ring
!= NULL
) && (ring
->mr_state
== MR_FREE
));
3675 mac_ring_free(mac_impl_t
*mip
, mac_ring_t
*ring
)
3677 ASSERT(ring
->mr_state
== MR_FREE
);
3679 mutex_enter(&mip
->mi_ring_lock
);
3680 ring
->mr_state
= MR_FREE
;
3682 ring
->mr_next
= mip
->mi_ring_freelist
;
3683 ring
->mr_mip
= NULL
;
3684 mip
->mi_ring_freelist
= ring
;
3685 mac_ring_stat_delete(ring
);
3686 mutex_exit(&mip
->mi_ring_lock
);
3690 mac_ring_freeall(mac_impl_t
*mip
)
3692 mac_ring_t
*ring_next
;
3693 mutex_enter(&mip
->mi_ring_lock
);
3694 mac_ring_t
*ring
= mip
->mi_ring_freelist
;
3695 while (ring
!= NULL
) {
3696 ring_next
= ring
->mr_next
;
3697 kmem_cache_free(mac_ring_cache
, ring
);
3700 mip
->mi_ring_freelist
= NULL
;
3701 mutex_exit(&mip
->mi_ring_lock
);
3705 mac_start_ring(mac_ring_t
*ring
)
3709 ASSERT(ring
->mr_state
== MR_FREE
);
3711 if (ring
->mr_start
!= NULL
) {
3712 rv
= ring
->mr_start(ring
->mr_driver
, ring
->mr_gen_num
);
3717 ring
->mr_state
= MR_INUSE
;
3722 mac_stop_ring(mac_ring_t
*ring
)
3724 ASSERT(ring
->mr_state
== MR_INUSE
);
3726 if (ring
->mr_stop
!= NULL
)
3727 ring
->mr_stop(ring
->mr_driver
);
3729 ring
->mr_state
= MR_FREE
;
3732 * Increment the ring generation number for this ring.
3738 mac_start_group(mac_group_t
*group
)
3742 if (group
->mrg_start
!= NULL
)
3743 rv
= group
->mrg_start(group
->mrg_driver
);
3749 mac_stop_group(mac_group_t
*group
)
3751 if (group
->mrg_stop
!= NULL
)
3752 group
->mrg_stop(group
->mrg_driver
);
3756 * Called from mac_start() on the default Rx group. Broadcast and multicast
3757 * packets are received only on the default group. Hence the default group
3758 * needs to be up even if the primary client is not up, for the other groups
3759 * to be functional. We do this by calling this function at mac_start time
3760 * itself. However the broadcast packets that are received can't make their
3761 * way beyond mac_rx until a mac client creates a broadcast flow.
3764 mac_start_group_and_rings(mac_group_t
*group
)
3769 ASSERT(group
->mrg_state
== MAC_GROUP_STATE_REGISTERED
);
3770 if ((rv
= mac_start_group(group
)) != 0)
3773 for (ring
= group
->mrg_rings
; ring
!= NULL
; ring
= ring
->mr_next
) {
3774 ASSERT(ring
->mr_state
== MR_FREE
);
3775 if ((rv
= mac_start_ring(ring
)) != 0)
3777 ring
->mr_classify_type
= MAC_SW_CLASSIFIER
;
3782 mac_stop_group_and_rings(group
);
3786 /* Called from mac_stop on the default Rx group */
3788 mac_stop_group_and_rings(mac_group_t
*group
)
3792 for (ring
= group
->mrg_rings
; ring
!= NULL
; ring
= ring
->mr_next
) {
3793 if (ring
->mr_state
!= MR_FREE
) {
3794 mac_stop_ring(ring
);
3796 ring
->mr_classify_type
= MAC_NO_CLASSIFIER
;
3799 mac_stop_group(group
);
3804 mac_init_ring(mac_impl_t
*mip
, mac_group_t
*group
, int index
,
3805 mac_capab_rings_t
*cap_rings
)
3807 mac_ring_t
*ring
, *rnext
;
3808 mac_ring_info_t ring_info
;
3809 ddi_intr_handle_t ddi_handle
;
3811 ring
= mac_ring_alloc(mip
);
3813 /* Prepare basic information of ring */
3816 * Ring index is numbered to be unique across a particular device.
3817 * Ring index computation makes following assumptions:
3818 * - For drivers with static grouping (e.g. ixgbe, bge),
3819 * ring index exchanged with the driver (e.g. during mr_rget)
3820 * is unique only across the group the ring belongs to.
3821 * - Drivers with dynamic grouping (e.g. nxge), start
3822 * with single group (mrg_index = 0).
3824 ring
->mr_index
= group
->mrg_index
* group
->mrg_info
.mgi_count
+ index
;
3825 ring
->mr_type
= group
->mrg_type
;
3826 ring
->mr_gh
= (mac_group_handle_t
)group
;
3828 /* Insert the new ring to the list. */
3829 ring
->mr_next
= group
->mrg_rings
;
3830 group
->mrg_rings
= ring
;
3832 /* Zero to reuse the info data structure */
3833 bzero(&ring_info
, sizeof (ring_info
));
3835 /* Query ring information from driver */
3836 cap_rings
->mr_rget(mip
->mi_driver
, group
->mrg_type
, group
->mrg_index
,
3837 index
, &ring_info
, (mac_ring_handle_t
)ring
);
3839 ring
->mr_info
= ring_info
;
3842 * The interrupt handle could be shared among multiple rings.
3843 * Thus if there is a bunch of rings that are sharing an
3844 * interrupt, then only one ring among the bunch will be made
3845 * available for interrupt re-targeting; the rest will have
3846 * ddi_shared flag set to TRUE and would not be available for
3847 * be interrupt re-targeting.
3849 if ((ddi_handle
= ring_info
.mri_intr
.mi_ddi_handle
) != NULL
) {
3850 rnext
= ring
->mr_next
;
3851 while (rnext
!= NULL
) {
3852 if (rnext
->mr_info
.mri_intr
.mi_ddi_handle
==
3855 * If default ring (mr_index == 0) is part
3856 * of a group of rings sharing an
3857 * interrupt, then set ddi_shared flag for
3858 * the default ring and give another ring
3859 * the chance to be re-targeted.
3861 if (rnext
->mr_index
== 0 &&
3862 !rnext
->mr_info
.mri_intr
.mi_ddi_shared
) {
3863 rnext
->mr_info
.mri_intr
.mi_ddi_shared
=
3866 ring
->mr_info
.mri_intr
.mi_ddi_shared
=
3871 rnext
= rnext
->mr_next
;
3874 * If rnext is NULL, then no matching ddi_handle was found.
3875 * Rx rings get registered first. So if this is a Tx ring,
3876 * then go through all the Rx rings and see if there is a
3877 * matching ddi handle.
3879 if (rnext
== NULL
&& ring
->mr_type
== MAC_RING_TYPE_TX
) {
3880 mac_compare_ddi_handle(mip
->mi_rx_groups
,
3881 mip
->mi_rx_group_count
, ring
);
3885 /* Update ring's status */
3886 ring
->mr_state
= MR_FREE
;
3889 /* Update the ring count of the group */
3890 group
->mrg_cur_count
++;
3892 /* Create per ring kstats */
3893 if (ring
->mr_stat
!= NULL
) {
3895 mac_ring_stat_create(ring
);
3902 * Rings are chained together for easy regrouping.
3905 mac_init_group(mac_impl_t
*mip
, mac_group_t
*group
, int size
,
3906 mac_capab_rings_t
*cap_rings
)
3911 * Initialize all ring members of this group. Size of zero will not
3912 * enter the loop, so it's safe for initializing an empty group.
3914 for (index
= size
- 1; index
>= 0; index
--)
3915 (void) mac_init_ring(mip
, group
, index
, cap_rings
);
3919 mac_init_rings(mac_impl_t
*mip
, mac_ring_type_t rtype
)
3921 mac_capab_rings_t
*cap_rings
;
3923 mac_group_t
*groups
;
3924 mac_group_info_t group_info
;
3925 uint_t group_free
= 0;
3931 boolean_t pseudo_txgrp
= B_FALSE
;
3934 case MAC_RING_TYPE_RX
:
3935 ASSERT(mip
->mi_rx_groups
== NULL
);
3937 cap_rings
= &mip
->mi_rx_rings_cap
;
3938 cap_rings
->mr_type
= MAC_RING_TYPE_RX
;
3940 case MAC_RING_TYPE_TX
:
3941 ASSERT(mip
->mi_tx_groups
== NULL
);
3943 cap_rings
= &mip
->mi_tx_rings_cap
;
3944 cap_rings
->mr_type
= MAC_RING_TYPE_TX
;
3950 if (!i_mac_capab_get((mac_handle_t
)mip
, MAC_CAPAB_RINGS
, cap_rings
))
3952 grpcnt
= cap_rings
->mr_gnum
;
3955 * If we have multiple TX rings, but only one TX group, we can
3956 * create pseudo TX groups (one per TX ring) in the MAC layer,
3957 * except for an aggr. For an aggr currently we maintain only
3958 * one group with all the rings (for all its ports), going
3959 * forwards we might change this.
3961 if (rtype
== MAC_RING_TYPE_TX
&&
3962 cap_rings
->mr_gnum
== 0 && cap_rings
->mr_rnum
> 0 &&
3963 (mip
->mi_state_flags
& MIS_IS_AGGR
) == 0) {
3965 * The -1 here is because we create a default TX group
3966 * with all the rings in it.
3968 grpcnt
= cap_rings
->mr_rnum
- 1;
3969 pseudo_txgrp
= B_TRUE
;
3973 * Allocate a contiguous buffer for all groups.
3975 groups
= kmem_zalloc(sizeof (mac_group_t
) * (grpcnt
+ 1), KM_SLEEP
);
3977 ring_left
= cap_rings
->mr_rnum
;
3980 * Get all ring groups if any, and get their ring members
3983 for (g
= 0; g
< grpcnt
; g
++) {
3986 /* Prepare basic information of the group */
3987 group
->mrg_index
= g
;
3988 group
->mrg_type
= rtype
;
3989 group
->mrg_state
= MAC_GROUP_STATE_UNINIT
;
3990 group
->mrg_mh
= (mac_handle_t
)mip
;
3991 group
->mrg_next
= group
+ 1;
3993 /* Zero to reuse the info data structure */
3994 bzero(&group_info
, sizeof (group_info
));
3998 * This is a pseudo group that we created, apart
3999 * from setting the state there is nothing to be
4002 group
->mrg_state
= MAC_GROUP_STATE_REGISTERED
;
4006 /* Query group information from driver */
4007 cap_rings
->mr_gget(mip
->mi_driver
, rtype
, g
, &group_info
,
4008 (mac_group_handle_t
)group
);
4010 switch (cap_rings
->mr_group_type
) {
4011 case MAC_GROUP_TYPE_DYNAMIC
:
4012 if (cap_rings
->mr_gaddring
== NULL
||
4013 cap_rings
->mr_gremring
== NULL
) {
4015 mac__init__rings_no_addremring
,
4016 char *, mip
->mi_name
,
4017 mac_group_add_ring_t
,
4018 cap_rings
->mr_gaddring
,
4019 mac_group_add_ring_t
,
4020 cap_rings
->mr_gremring
);
4026 case MAC_RING_TYPE_RX
:
4028 * The first RX group must have non-zero
4029 * rings, and the following groups must
4032 if (g
== 0 && group_info
.mgi_count
== 0) {
4034 mac__init__rings__rx__def__zero
,
4035 char *, mip
->mi_name
);
4039 if (g
> 0 && group_info
.mgi_count
!= 0) {
4041 mac__init__rings__rx__nonzero
,
4042 char *, mip
->mi_name
,
4043 int, g
, int, group_info
.mgi_count
);
4048 case MAC_RING_TYPE_TX
:
4050 * All TX ring groups must have zero rings.
4052 if (group_info
.mgi_count
!= 0) {
4054 mac__init__rings__tx__nonzero
,
4055 char *, mip
->mi_name
,
4056 int, g
, int, group_info
.mgi_count
);
4063 case MAC_GROUP_TYPE_STATIC
:
4065 * Note that an empty group is allowed, e.g., an aggr
4066 * would start with an empty group.
4070 /* unknown group type */
4071 DTRACE_PROBE2(mac__init__rings__unknown__type
,
4072 char *, mip
->mi_name
,
4073 int, cap_rings
->mr_group_type
);
4080 * Driver must register group->mgi_addmac/remmac() for rx groups
4081 * to support multiple MAC addresses.
4083 if (rtype
== MAC_RING_TYPE_RX
&&
4084 ((group_info
.mgi_addmac
== NULL
) ||
4085 (group_info
.mgi_remmac
== NULL
))) {
4090 /* Cache driver-supplied information */
4091 group
->mrg_info
= group_info
;
4093 /* Update the group's status and group count. */
4094 mac_set_group_state(group
, MAC_GROUP_STATE_REGISTERED
);
4097 group
->mrg_rings
= NULL
;
4098 group
->mrg_cur_count
= 0;
4099 mac_init_group(mip
, group
, group_info
.mgi_count
, cap_rings
);
4100 ring_left
-= group_info
.mgi_count
;
4102 /* The current group size should be equal to default value */
4103 ASSERT(group
->mrg_cur_count
== group_info
.mgi_count
);
4106 /* Build up a dummy group for free resources as a pool */
4107 group
= groups
+ grpcnt
;
4109 /* Prepare basic information of the group */
4110 group
->mrg_index
= -1;
4111 group
->mrg_type
= rtype
;
4112 group
->mrg_state
= MAC_GROUP_STATE_UNINIT
;
4113 group
->mrg_mh
= (mac_handle_t
)mip
;
4114 group
->mrg_next
= NULL
;
4117 * If there are ungrouped rings, allocate a continuous buffer for
4118 * remaining resources.
4120 if (ring_left
!= 0) {
4121 group
->mrg_rings
= NULL
;
4122 group
->mrg_cur_count
= 0;
4123 mac_init_group(mip
, group
, ring_left
, cap_rings
);
4125 /* The current group size should be equal to ring_left */
4126 ASSERT(group
->mrg_cur_count
== ring_left
);
4130 /* Update this group's status */
4131 mac_set_group_state(group
, MAC_GROUP_STATE_REGISTERED
);
4133 group
->mrg_rings
= NULL
;
4135 ASSERT(ring_left
== 0);
4139 /* Cache other important information to finalize the initialization */
4141 case MAC_RING_TYPE_RX
:
4142 mip
->mi_rx_group_type
= cap_rings
->mr_group_type
;
4143 mip
->mi_rx_group_count
= cap_rings
->mr_gnum
;
4144 mip
->mi_rx_groups
= groups
;
4145 mip
->mi_rx_donor_grp
= groups
;
4146 if (mip
->mi_rx_group_type
== MAC_GROUP_TYPE_DYNAMIC
) {
4148 * The default ring is reserved since it is
4149 * used for sending the broadcast etc. packets.
4151 mip
->mi_rxrings_avail
=
4152 mip
->mi_rx_groups
->mrg_cur_count
- 1;
4153 mip
->mi_rxrings_rsvd
= 1;
4156 * The default group cannot be reserved. It is used by
4157 * all the clients that do not have an exclusive group.
4159 mip
->mi_rxhwclnt_avail
= mip
->mi_rx_group_count
- 1;
4160 mip
->mi_rxhwclnt_used
= 1;
4162 case MAC_RING_TYPE_TX
:
4163 mip
->mi_tx_group_type
= pseudo_txgrp
? MAC_GROUP_TYPE_DYNAMIC
:
4164 cap_rings
->mr_group_type
;
4165 mip
->mi_tx_group_count
= grpcnt
;
4166 mip
->mi_tx_group_free
= group_free
;
4167 mip
->mi_tx_groups
= groups
;
4169 group
= groups
+ grpcnt
;
4170 ring
= group
->mrg_rings
;
4172 * The ring can be NULL in the case of aggr. Aggr will
4173 * have an empty Tx group which will get populated
4174 * later when pseudo Tx rings are added after
4175 * mac_register() is done.
4178 ASSERT(mip
->mi_state_flags
& MIS_IS_AGGR
);
4180 * pass the group to aggr so it can add Tx
4181 * rings to the group later.
4183 cap_rings
->mr_gget(mip
->mi_driver
, rtype
, 0, NULL
,
4184 (mac_group_handle_t
)group
);
4186 * Even though there are no rings at this time
4187 * (rings will come later), set the group
4188 * state to registered.
4190 group
->mrg_state
= MAC_GROUP_STATE_REGISTERED
;
4193 * Ring 0 is used as the default one and it could be
4194 * assigned to a client as well.
4196 while ((ring
->mr_index
!= 0) && (ring
->mr_next
!= NULL
))
4197 ring
= ring
->mr_next
;
4198 ASSERT(ring
->mr_index
== 0);
4199 mip
->mi_default_tx_ring
= (mac_ring_handle_t
)ring
;
4201 if (mip
->mi_tx_group_type
== MAC_GROUP_TYPE_DYNAMIC
) {
4202 mip
->mi_txrings_avail
= group
->mrg_cur_count
- 1;
4204 * The default ring cannot be reserved.
4206 mip
->mi_txrings_rsvd
= 1;
4209 * The default group cannot be reserved. It will be shared
4210 * by clients that do not have an exclusive group.
4212 mip
->mi_txhwclnt_avail
= mip
->mi_tx_group_count
;
4213 mip
->mi_txhwclnt_used
= 1;
4220 mac_free_rings(mip
, rtype
);
4226 * The ddi interrupt handle could be shared amoung rings. If so, compare
4227 * the new ring's ddi handle with the existing ones and set ddi_shared
4231 mac_compare_ddi_handle(mac_group_t
*groups
, uint_t grpcnt
, mac_ring_t
*cring
)
4235 ddi_intr_handle_t ddi_handle
;
4238 ddi_handle
= cring
->mr_info
.mri_intr
.mi_ddi_handle
;
4239 for (g
= 0; g
< grpcnt
; g
++) {
4241 for (ring
= group
->mrg_rings
; ring
!= NULL
;
4242 ring
= ring
->mr_next
) {
4245 if (ring
->mr_info
.mri_intr
.mi_ddi_handle
==
4247 if (cring
->mr_type
== MAC_RING_TYPE_RX
&&
4248 ring
->mr_index
== 0 &&
4249 !ring
->mr_info
.mri_intr
.mi_ddi_shared
) {
4250 ring
->mr_info
.mri_intr
.mi_ddi_shared
=
4253 cring
->mr_info
.mri_intr
.mi_ddi_shared
=
4263 * Called to free all groups of particular type (RX or TX). It's assumed that
4264 * no clients are using these groups.
4267 mac_free_rings(mac_impl_t
*mip
, mac_ring_type_t rtype
)
4269 mac_group_t
*group
, *groups
;
4273 case MAC_RING_TYPE_RX
:
4274 if (mip
->mi_rx_groups
== NULL
)
4277 groups
= mip
->mi_rx_groups
;
4278 group_count
= mip
->mi_rx_group_count
;
4280 mip
->mi_rx_groups
= NULL
;
4281 mip
->mi_rx_donor_grp
= NULL
;
4282 mip
->mi_rx_group_count
= 0;
4284 case MAC_RING_TYPE_TX
:
4285 ASSERT(mip
->mi_tx_group_count
== mip
->mi_tx_group_free
);
4287 if (mip
->mi_tx_groups
== NULL
)
4290 groups
= mip
->mi_tx_groups
;
4291 group_count
= mip
->mi_tx_group_count
;
4293 mip
->mi_tx_groups
= NULL
;
4294 mip
->mi_tx_group_count
= 0;
4295 mip
->mi_tx_group_free
= 0;
4296 mip
->mi_default_tx_ring
= NULL
;
4302 for (group
= groups
; group
!= NULL
; group
= group
->mrg_next
) {
4305 if (group
->mrg_cur_count
== 0)
4308 ASSERT(group
->mrg_rings
!= NULL
);
4310 while ((ring
= group
->mrg_rings
) != NULL
) {
4311 group
->mrg_rings
= ring
->mr_next
;
4312 mac_ring_free(mip
, ring
);
4316 /* Free all the cached rings */
4317 mac_ring_freeall(mip
);
4318 /* Free the block of group data strutures */
4319 kmem_free(groups
, sizeof (mac_group_t
) * (group_count
+ 1));
4323 * Associate a MAC address with a receive group.
4325 * The return value of this function should always be checked properly, because
4326 * any type of failure could cause unexpected results. A group can be added
4327 * or removed with a MAC address only after it has been reserved. Ideally,
4328 * a successful reservation always leads to calling mac_group_addmac() to
4329 * steer desired traffic. Failure of adding an unicast MAC address doesn't
4330 * always imply that the group is functioning abnormally.
4332 * Currently this function is called everywhere, and it reflects assumptions
4333 * about MAC addresses in the implementation. CR 6735196.
4336 mac_group_addmac(mac_group_t
*group
, const uint8_t *addr
)
4338 ASSERT(group
->mrg_type
== MAC_RING_TYPE_RX
);
4339 ASSERT(group
->mrg_info
.mgi_addmac
!= NULL
);
4341 return (group
->mrg_info
.mgi_addmac(group
->mrg_info
.mgi_driver
, addr
));
4345 * Remove the association between MAC address and receive group.
4348 mac_group_remmac(mac_group_t
*group
, const uint8_t *addr
)
4350 ASSERT(group
->mrg_type
== MAC_RING_TYPE_RX
);
4351 ASSERT(group
->mrg_info
.mgi_remmac
!= NULL
);
4353 return (group
->mrg_info
.mgi_remmac(group
->mrg_info
.mgi_driver
, addr
));
4357 * This is the entry point for packets transmitted through the bridging code.
4358 * If no bridge is in place, MAC_RING_TX transmits using tx ring. The 'rh'
4359 * pointer may be NULL to select the default ring.
4362 mac_bridge_tx(mac_impl_t
*mip
, mac_ring_handle_t rh
, mblk_t
*mp
)
4367 * Once we take a reference on the bridge link, the bridge
4368 * module itself can't unload, so the callback pointers are
4371 mutex_enter(&mip
->mi_bridge_lock
);
4372 if ((mh
= mip
->mi_bridge_link
) != NULL
)
4373 mac_bridge_ref_cb(mh
, B_TRUE
);
4374 mutex_exit(&mip
->mi_bridge_lock
);
4376 MAC_RING_TX(mip
, rh
, mp
, mp
);
4378 mp
= mac_bridge_tx_cb(mh
, rh
, mp
);
4379 mac_bridge_ref_cb(mh
, B_FALSE
);
4386 * Find a ring from its index.
4389 mac_find_ring(mac_group_handle_t gh
, int index
)
4391 mac_group_t
*group
= (mac_group_t
*)gh
;
4392 mac_ring_t
*ring
= group
->mrg_rings
;
4394 for (ring
= group
->mrg_rings
; ring
!= NULL
; ring
= ring
->mr_next
)
4395 if (ring
->mr_index
== index
)
4398 return ((mac_ring_handle_t
)ring
);
4401 * Add a ring to an existing group.
4403 * The ring must be either passed directly (for example if the ring
4404 * movement is initiated by the framework), or specified through a driver
4405 * index (for example when the ring is added by the driver.
4407 * The caller needs to call mac_perim_enter() before calling this function.
4410 i_mac_group_add_ring(mac_group_t
*group
, mac_ring_t
*ring
, int index
)
4412 mac_impl_t
*mip
= (mac_impl_t
*)group
->mrg_mh
;
4413 mac_capab_rings_t
*cap_rings
;
4414 boolean_t driver_call
= (ring
== NULL
);
4415 mac_group_type_t group_type
;
4417 flow_entry_t
*flent
;
4419 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
4421 switch (group
->mrg_type
) {
4422 case MAC_RING_TYPE_RX
:
4423 cap_rings
= &mip
->mi_rx_rings_cap
;
4424 group_type
= mip
->mi_rx_group_type
;
4426 case MAC_RING_TYPE_TX
:
4427 cap_rings
= &mip
->mi_tx_rings_cap
;
4428 group_type
= mip
->mi_tx_group_type
;
4435 * There should be no ring with the same ring index in the target
4438 ASSERT(mac_find_ring((mac_group_handle_t
)group
,
4439 driver_call
? index
: ring
->mr_index
) == NULL
);
4443 * The function is called as a result of a request from
4444 * a driver to add a ring to an existing group, for example
4445 * from the aggregation driver. Allocate a new mac_ring_t
4448 ring
= mac_init_ring(mip
, group
, index
, cap_rings
);
4449 ASSERT(group
->mrg_state
> MAC_GROUP_STATE_UNINIT
);
4452 * The function is called as a result of a MAC layer request
4453 * to add a ring to an existing group. In this case the
4454 * ring is being moved between groups, which requires
4455 * the underlying driver to support dynamic grouping,
4456 * and the mac_ring_t already exists.
4458 ASSERT(group_type
== MAC_GROUP_TYPE_DYNAMIC
);
4459 ASSERT(group
->mrg_driver
== NULL
||
4460 cap_rings
->mr_gaddring
!= NULL
);
4461 ASSERT(ring
->mr_gh
== NULL
);
4465 * At this point the ring should not be in use, and it should be
4466 * of the right for the target group.
4468 ASSERT(ring
->mr_state
< MR_INUSE
);
4469 ASSERT(ring
->mr_srs
== NULL
);
4470 ASSERT(ring
->mr_type
== group
->mrg_type
);
4474 * Add the driver level hardware ring if the process was not
4475 * initiated by the driver, and the target group is not the
4478 if (group
->mrg_driver
!= NULL
) {
4479 cap_rings
->mr_gaddring(group
->mrg_driver
,
4480 ring
->mr_driver
, ring
->mr_type
);
4484 * Insert the ring ahead existing rings.
4486 ring
->mr_next
= group
->mrg_rings
;
4487 group
->mrg_rings
= ring
;
4488 ring
->mr_gh
= (mac_group_handle_t
)group
;
4489 group
->mrg_cur_count
++;
4493 * If the group has not been actively used, we're done.
4495 if (group
->mrg_index
!= -1 &&
4496 group
->mrg_state
< MAC_GROUP_STATE_RESERVED
)
4500 * Start the ring if needed. Failure causes to undo the grouping action.
4502 if (ring
->mr_state
!= MR_INUSE
) {
4503 if ((ret
= mac_start_ring(ring
)) != 0) {
4505 cap_rings
->mr_gremring(group
->mrg_driver
,
4506 ring
->mr_driver
, ring
->mr_type
);
4508 group
->mrg_cur_count
--;
4509 group
->mrg_rings
= ring
->mr_next
;
4514 mac_ring_free(mip
, ring
);
4521 * Set up SRS/SR according to the ring type.
4523 switch (ring
->mr_type
) {
4524 case MAC_RING_TYPE_RX
:
4526 * Setup SRS on top of the new ring if the group is
4527 * reserved for someones exclusive use.
4529 if (group
->mrg_state
== MAC_GROUP_STATE_RESERVED
) {
4530 mac_client_impl_t
*mcip
;
4532 mcip
= MAC_GROUP_ONLY_CLIENT(group
);
4534 * Even though this group is reserved we migth still
4535 * have multiple clients, i.e a VLAN shares the
4536 * group with the primary mac client.
4539 flent
= mcip
->mci_flent
;
4540 ASSERT(flent
->fe_rx_srs_cnt
> 0);
4541 mac_rx_srs_group_setup(mcip
, flent
, SRST_LINK
);
4542 mac_fanout_setup(mcip
, flent
,
4543 MCIP_RESOURCE_PROPS(mcip
), mac_rx_deliver
,
4546 ring
->mr_classify_type
= MAC_SW_CLASSIFIER
;
4550 case MAC_RING_TYPE_TX
:
4552 mac_grp_client_t
*mgcp
= group
->mrg_clients
;
4553 mac_client_impl_t
*mcip
;
4554 mac_soft_ring_set_t
*mac_srs
;
4557 if (MAC_GROUP_NO_CLIENT(group
)) {
4558 if (ring
->mr_state
== MR_INUSE
)
4559 mac_stop_ring(ring
);
4564 * If the rings are being moved to a group that has
4565 * clients using it, then add the new rings to the
4568 while (mgcp
!= NULL
) {
4571 mcip
= mgcp
->mgc_client
;
4572 flent
= mcip
->mci_flent
;
4573 is_aggr
= (mcip
->mci_state_flags
& MCIS_IS_AGGR
);
4574 mac_srs
= MCIP_TX_SRS(mcip
);
4575 tx
= &mac_srs
->srs_tx
;
4576 mac_tx_client_quiesce((mac_client_handle_t
)mcip
);
4578 * If we are growing from 1 to multiple rings.
4580 if (tx
->st_mode
== SRS_TX_BW
||
4581 tx
->st_mode
== SRS_TX_SERIALIZE
||
4582 tx
->st_mode
== SRS_TX_DEFAULT
) {
4583 mac_ring_t
*tx_ring
= tx
->st_arg2
;
4586 mac_tx_srs_stat_recreate(mac_srs
, B_TRUE
);
4587 mac_tx_srs_add_ring(mac_srs
, tx_ring
);
4588 if (mac_srs
->srs_type
& SRST_BW_CONTROL
) {
4589 tx
->st_mode
= is_aggr
? SRS_TX_BW_AGGR
:
4592 tx
->st_mode
= is_aggr
? SRS_TX_AGGR
:
4595 tx
->st_func
= mac_tx_get_func(tx
->st_mode
);
4597 mac_tx_srs_add_ring(mac_srs
, ring
);
4598 mac_fanout_setup(mcip
, flent
, MCIP_RESOURCE_PROPS(mcip
),
4599 mac_rx_deliver
, mcip
, NULL
, NULL
);
4600 mac_tx_client_restart((mac_client_handle_t
)mcip
);
4601 mgcp
= mgcp
->mgc_next
;
4609 * For aggr, the default ring will be NULL to begin with. If it
4610 * is NULL, then pick the first ring that gets added as the
4611 * default ring. Any ring in an aggregation can be removed at
4612 * any time (by the user action of removing a link) and if the
4613 * current default ring gets removed, then a new one gets
4614 * picked (see i_mac_group_rem_ring()).
4616 if (mip
->mi_state_flags
& MIS_IS_AGGR
&&
4617 mip
->mi_default_tx_ring
== NULL
&&
4618 ring
->mr_type
== MAC_RING_TYPE_TX
) {
4619 mip
->mi_default_tx_ring
= (mac_ring_handle_t
)ring
;
4622 MAC_RING_UNMARK(ring
, MR_INCIPIENT
);
4627 * Remove a ring from it's current group. MAC internal function for dynamic
4630 * The caller needs to call mac_perim_enter() before calling this function.
4633 i_mac_group_rem_ring(mac_group_t
*group
, mac_ring_t
*ring
,
4634 boolean_t driver_call
)
4636 mac_impl_t
*mip
= (mac_impl_t
*)group
->mrg_mh
;
4637 mac_capab_rings_t
*cap_rings
= NULL
;
4638 mac_group_type_t group_type
;
4640 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
4642 ASSERT(mac_find_ring((mac_group_handle_t
)group
,
4643 ring
->mr_index
) == (mac_ring_handle_t
)ring
);
4644 ASSERT((mac_group_t
*)ring
->mr_gh
== group
);
4645 ASSERT(ring
->mr_type
== group
->mrg_type
);
4647 if (ring
->mr_state
== MR_INUSE
)
4648 mac_stop_ring(ring
);
4649 switch (ring
->mr_type
) {
4650 case MAC_RING_TYPE_RX
:
4651 group_type
= mip
->mi_rx_group_type
;
4652 cap_rings
= &mip
->mi_rx_rings_cap
;
4655 * Only hardware classified packets hold a reference to the
4656 * ring all the way up the Rx path. mac_rx_srs_remove()
4657 * will take care of quiescing the Rx path and removing the
4658 * SRS. The software classified path neither holds a reference
4659 * nor any association with the ring in mac_rx.
4661 if (ring
->mr_srs
!= NULL
) {
4662 mac_rx_srs_remove(ring
->mr_srs
);
4663 ring
->mr_srs
= NULL
;
4667 case MAC_RING_TYPE_TX
:
4669 mac_grp_client_t
*mgcp
;
4670 mac_client_impl_t
*mcip
;
4671 mac_soft_ring_set_t
*mac_srs
;
4673 mac_ring_t
*rem_ring
;
4674 mac_group_t
*defgrp
;
4675 uint_t ring_info
= 0;
4678 * For TX this function is invoked in three
4681 * 1) In the case of a failure during the
4682 * initial creation of a group when a share is
4683 * associated with a MAC client. So the SRS is not
4684 * yet setup, and will be setup later after the
4685 * group has been reserved and populated.
4687 * 2) From mac_release_tx_group() when freeing
4690 * 3) In the case of aggr, when a port gets removed,
4691 * the pseudo Tx rings that it exposed gets removed.
4693 * In the first two cases the SRS and its soft
4694 * rings are already quiesced.
4697 mac_client_impl_t
*mcip
;
4698 mac_soft_ring_set_t
*mac_srs
;
4699 mac_soft_ring_t
*sringp
;
4700 mac_srs_tx_t
*srs_tx
;
4702 if (mip
->mi_state_flags
& MIS_IS_AGGR
&&
4703 mip
->mi_default_tx_ring
==
4704 (mac_ring_handle_t
)ring
) {
4705 /* pick a new default Tx ring */
4706 mip
->mi_default_tx_ring
=
4707 (group
->mrg_rings
!= ring
) ?
4708 (mac_ring_handle_t
)group
->mrg_rings
:
4709 (mac_ring_handle_t
)(ring
->mr_next
);
4711 /* Presently only aggr case comes here */
4712 if (group
->mrg_state
!= MAC_GROUP_STATE_RESERVED
)
4715 mcip
= MAC_GROUP_ONLY_CLIENT(group
);
4716 ASSERT(mcip
!= NULL
);
4717 ASSERT(mcip
->mci_state_flags
& MCIS_IS_AGGR
);
4718 mac_srs
= MCIP_TX_SRS(mcip
);
4719 ASSERT(mac_srs
->srs_tx
.st_mode
== SRS_TX_AGGR
||
4720 mac_srs
->srs_tx
.st_mode
== SRS_TX_BW_AGGR
);
4721 srs_tx
= &mac_srs
->srs_tx
;
4723 * Wakeup any callers blocked on this
4724 * Tx ring due to flow control.
4726 sringp
= srs_tx
->st_soft_rings
[ring
->mr_index
];
4727 ASSERT(sringp
!= NULL
);
4728 mac_tx_invoke_callbacks(mcip
, (mac_tx_cookie_t
)sringp
);
4729 mac_tx_client_quiesce((mac_client_handle_t
)mcip
);
4730 mac_tx_srs_del_ring(mac_srs
, ring
);
4731 mac_tx_client_restart((mac_client_handle_t
)mcip
);
4734 ASSERT(ring
!= (mac_ring_t
*)mip
->mi_default_tx_ring
);
4735 group_type
= mip
->mi_tx_group_type
;
4736 cap_rings
= &mip
->mi_tx_rings_cap
;
4738 * See if we need to take it out of the MAC clients using
4741 if (MAC_GROUP_NO_CLIENT(group
))
4743 mgcp
= group
->mrg_clients
;
4744 defgrp
= MAC_DEFAULT_TX_GROUP(mip
);
4745 while (mgcp
!= NULL
) {
4746 mcip
= mgcp
->mgc_client
;
4747 mac_srs
= MCIP_TX_SRS(mcip
);
4748 tx
= &mac_srs
->srs_tx
;
4749 mac_tx_client_quiesce((mac_client_handle_t
)mcip
);
4751 * If we are here when removing rings from the
4752 * defgroup, mac_reserve_tx_ring would have
4753 * already deleted the ring from the MAC
4754 * clients in the group.
4756 if (group
!= defgrp
) {
4757 mac_tx_invoke_callbacks(mcip
,
4759 mac_tx_srs_get_soft_ring(mac_srs
, ring
));
4760 mac_tx_srs_del_ring(mac_srs
, ring
);
4763 * Additionally, if we are left with only
4764 * one ring in the group after this, we need
4765 * to modify the mode etc. to. (We haven't
4766 * yet taken the ring out, so we check with 2).
4768 if (group
->mrg_cur_count
== 2) {
4769 if (ring
->mr_next
== NULL
)
4770 rem_ring
= group
->mrg_rings
;
4772 rem_ring
= ring
->mr_next
;
4773 mac_tx_invoke_callbacks(mcip
,
4775 mac_tx_srs_get_soft_ring(mac_srs
,
4777 mac_tx_srs_del_ring(mac_srs
, rem_ring
);
4778 if (rem_ring
->mr_state
!= MR_INUSE
) {
4779 (void) mac_start_ring(rem_ring
);
4781 tx
->st_arg2
= (void *)rem_ring
;
4782 mac_tx_srs_stat_recreate(mac_srs
, B_FALSE
);
4783 ring_info
= mac_hwring_getinfo(
4784 (mac_ring_handle_t
)rem_ring
);
4786 * We are shrinking from multiple
4789 if (mac_srs
->srs_type
& SRST_BW_CONTROL
) {
4790 tx
->st_mode
= SRS_TX_BW
;
4791 } else if (mac_tx_serialize
||
4792 (ring_info
& MAC_RING_TX_SERIALIZE
)) {
4793 tx
->st_mode
= SRS_TX_SERIALIZE
;
4795 tx
->st_mode
= SRS_TX_DEFAULT
;
4797 tx
->st_func
= mac_tx_get_func(tx
->st_mode
);
4799 mac_tx_client_restart((mac_client_handle_t
)mcip
);
4800 mgcp
= mgcp
->mgc_next
;
4809 * Remove the ring from the group.
4811 if (ring
== group
->mrg_rings
)
4812 group
->mrg_rings
= ring
->mr_next
;
4816 pre
= group
->mrg_rings
;
4817 while (pre
->mr_next
!= ring
)
4819 pre
->mr_next
= ring
->mr_next
;
4821 group
->mrg_cur_count
--;
4824 ASSERT(group_type
== MAC_GROUP_TYPE_DYNAMIC
);
4825 ASSERT(group
->mrg_driver
== NULL
||
4826 cap_rings
->mr_gremring
!= NULL
);
4829 * Remove the driver level hardware ring.
4831 if (group
->mrg_driver
!= NULL
) {
4832 cap_rings
->mr_gremring(group
->mrg_driver
,
4833 ring
->mr_driver
, ring
->mr_type
);
4839 mac_ring_free(mip
, ring
);
4845 * Move a ring to the target group. If needed, remove the ring from the group
4846 * that it currently belongs to.
4848 * The caller need to enter MAC's perimeter by calling mac_perim_enter().
4851 mac_group_mov_ring(mac_impl_t
*mip
, mac_group_t
*d_group
, mac_ring_t
*ring
)
4853 mac_group_t
*s_group
= (mac_group_t
*)ring
->mr_gh
;
4856 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
4857 ASSERT(d_group
!= NULL
);
4858 ASSERT(s_group
->mrg_mh
== d_group
->mrg_mh
);
4860 if (s_group
== d_group
)
4864 * Remove it from current group first.
4866 if (s_group
!= NULL
)
4867 i_mac_group_rem_ring(s_group
, ring
, B_FALSE
);
4870 * Add it to the new group.
4872 rv
= i_mac_group_add_ring(d_group
, ring
, 0);
4875 * Failed to add ring back to source group. If
4876 * that fails, the ring is stuck in limbo, log message.
4878 if (i_mac_group_add_ring(s_group
, ring
, 0)) {
4879 cmn_err(CE_WARN
, "%s: failed to move ring %p\n",
4880 mip
->mi_name
, (void *)ring
);
4888 * Find a MAC address according to its value.
4891 mac_find_macaddr(mac_impl_t
*mip
, uint8_t *mac_addr
)
4895 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
4897 for (map
= mip
->mi_addresses
; map
!= NULL
; map
= map
->ma_next
) {
4898 if (bcmp(mac_addr
, map
->ma_addr
, map
->ma_len
) == 0)
4906 * Check whether the MAC address is shared by multiple clients.
4909 mac_check_macaddr_shared(mac_address_t
*map
)
4911 ASSERT(MAC_PERIM_HELD((mac_handle_t
)map
->ma_mip
));
4913 return (map
->ma_nusers
> 1);
4917 * Remove the specified MAC address from the MAC address list and free it.
4920 mac_free_macaddr(mac_address_t
*map
)
4922 mac_impl_t
*mip
= map
->ma_mip
;
4924 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
4925 ASSERT(mip
->mi_addresses
!= NULL
);
4927 map
= mac_find_macaddr(mip
, map
->ma_addr
);
4929 ASSERT(map
!= NULL
);
4930 ASSERT(map
->ma_nusers
== 0);
4932 if (map
== mip
->mi_addresses
) {
4933 mip
->mi_addresses
= map
->ma_next
;
4937 pre
= mip
->mi_addresses
;
4938 while (pre
->ma_next
!= map
)
4940 pre
->ma_next
= map
->ma_next
;
4943 kmem_free(map
, sizeof (mac_address_t
));
4947 * Add a MAC address reference for a client. If the desired MAC address
4948 * exists, add a reference to it. Otherwise, add the new address by adding
4949 * it to a reserved group or setting promiscuous mode. Won't try different
4950 * group is the group is non-NULL, so the caller must explictly share
4951 * default group when needed.
4953 * Note, the primary MAC address is initialized at registration time, so
4954 * to add it to default group only need to activate it if its reference
4955 * count is still zero. Also, some drivers may not have advertised RINGS
4959 mac_add_macaddr(mac_impl_t
*mip
, mac_group_t
*group
, uint8_t *mac_addr
,
4964 boolean_t allocated_map
= B_FALSE
;
4966 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
4968 map
= mac_find_macaddr(mip
, mac_addr
);
4971 * If the new MAC address has not been added. Allocate a new one
4975 map
= kmem_zalloc(sizeof (mac_address_t
), KM_SLEEP
);
4976 map
->ma_len
= mip
->mi_type
->mt_addr_length
;
4977 bcopy(mac_addr
, map
->ma_addr
, map
->ma_len
);
4979 map
->ma_group
= group
;
4982 /* add the new MAC address to the head of the address list */
4983 map
->ma_next
= mip
->mi_addresses
;
4984 mip
->mi_addresses
= map
;
4986 allocated_map
= B_TRUE
;
4989 ASSERT(map
->ma_group
== NULL
|| map
->ma_group
== group
);
4990 if (map
->ma_group
== NULL
)
4991 map
->ma_group
= group
;
4994 * If the MAC address is already in use, simply account for the
4997 if (map
->ma_nusers
++ > 0)
5001 * Activate this MAC address by adding it to the reserved group.
5003 if (group
!= NULL
) {
5004 err
= mac_group_addmac(group
, (const uint8_t *)mac_addr
);
5006 map
->ma_type
= MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED
;
5012 * The MAC address addition failed. If the client requires a
5013 * hardware classified MAC address, fail the operation.
5021 * Try promiscuous mode.
5023 * For drivers that don't advertise RINGS capability, do
5024 * nothing for the primary address.
5026 if ((group
== NULL
) &&
5027 (bcmp(map
->ma_addr
, mip
->mi_addr
, map
->ma_len
) == 0)) {
5028 map
->ma_type
= MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED
;
5033 * Enable promiscuous mode in order to receive traffic
5034 * to the new MAC address.
5036 if ((err
= i_mac_promisc_set(mip
, B_TRUE
)) == 0) {
5037 map
->ma_type
= MAC_ADDRESS_TYPE_UNICAST_PROMISC
;
5042 * Free the MAC address that could not be added. Don't free
5043 * a pre-existing address, it could have been the entry
5044 * for the primary MAC address which was pre-allocated by
5045 * mac_init_macaddr(), and which must remain on the list.
5050 mac_free_macaddr(map
);
5055 * Remove a reference to a MAC address. This may cause to remove the MAC
5056 * address from an associated group or to turn off promiscuous mode.
5057 * The caller needs to handle the failure properly.
5060 mac_remove_macaddr(mac_address_t
*map
)
5062 mac_impl_t
*mip
= map
->ma_mip
;
5065 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
5067 ASSERT(map
== mac_find_macaddr(mip
, map
->ma_addr
));
5070 * If it's not the last client using this MAC address, only update
5071 * the MAC clients count.
5073 if (--map
->ma_nusers
> 0)
5077 * The MAC address is no longer used by any MAC client, so remove
5078 * it from its associated group, or turn off promiscuous mode
5079 * if it was enabled for the MAC address.
5081 switch (map
->ma_type
) {
5082 case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED
:
5084 * Don't free the preset primary address for drivers that
5085 * don't advertise RINGS capability.
5087 if (map
->ma_group
== NULL
)
5090 err
= mac_group_remmac(map
->ma_group
, map
->ma_addr
);
5092 map
->ma_group
= NULL
;
5094 case MAC_ADDRESS_TYPE_UNICAST_PROMISC
:
5095 err
= i_mac_promisc_set(mip
, B_FALSE
);
5105 * We created MAC address for the primary one at registration, so we
5106 * won't free it here. mac_fini_macaddr() will take care of it.
5108 if (bcmp(map
->ma_addr
, mip
->mi_addr
, map
->ma_len
) != 0)
5109 mac_free_macaddr(map
);
5115 * Update an existing MAC address. The caller need to make sure that the new
5116 * value has not been used.
5119 mac_update_macaddr(mac_address_t
*map
, uint8_t *mac_addr
)
5121 mac_impl_t
*mip
= map
->ma_mip
;
5124 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
5125 ASSERT(mac_find_macaddr(mip
, mac_addr
) == NULL
);
5127 switch (map
->ma_type
) {
5128 case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED
:
5130 * Update the primary address for drivers that are not
5133 if (mip
->mi_rx_groups
== NULL
) {
5134 err
= mip
->mi_unicst(mip
->mi_driver
, (const uint8_t *)
5142 * If this MAC address is not currently in use,
5143 * simply break out and update the value.
5145 if (map
->ma_nusers
== 0)
5149 * Need to replace the MAC address associated with a group.
5151 err
= mac_group_remmac(map
->ma_group
, map
->ma_addr
);
5155 err
= mac_group_addmac(map
->ma_group
, mac_addr
);
5158 * Failure hints hardware error. The MAC layer needs to
5159 * have error notification facility to handle this.
5160 * Now, simply try to restore the value.
5163 (void) mac_group_addmac(map
->ma_group
, map
->ma_addr
);
5166 case MAC_ADDRESS_TYPE_UNICAST_PROMISC
:
5168 * Need to do nothing more if in promiscuous mode.
5176 * Successfully replaced the MAC address.
5179 bcopy(mac_addr
, map
->ma_addr
, map
->ma_len
);
5185 * Freshen the MAC address with new value. Its caller must have updated the
5186 * hardware MAC address before calling this function.
5187 * This funcitons is supposed to be used to handle the MAC address change
5188 * notification from underlying drivers.
5191 mac_freshen_macaddr(mac_address_t
*map
, uint8_t *mac_addr
)
5193 mac_impl_t
*mip
= map
->ma_mip
;
5195 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
5196 ASSERT(mac_find_macaddr(mip
, mac_addr
) == NULL
);
5199 * Freshen the MAC address with new value.
5201 bcopy(mac_addr
, map
->ma_addr
, map
->ma_len
);
5202 bcopy(mac_addr
, mip
->mi_addr
, map
->ma_len
);
5205 * Update all MAC clients that share this MAC address.
5207 mac_unicast_update_clients(mip
, map
);
5211 * Set up the primary MAC address.
5214 mac_init_macaddr(mac_impl_t
*mip
)
5219 * The reference count is initialized to zero, until it's really
5222 map
= kmem_zalloc(sizeof (mac_address_t
), KM_SLEEP
);
5223 map
->ma_len
= mip
->mi_type
->mt_addr_length
;
5224 bcopy(mip
->mi_addr
, map
->ma_addr
, map
->ma_len
);
5227 * If driver advertises RINGS capability, it shouldn't have initialized
5228 * its primary MAC address. For other drivers, including VNIC, the
5229 * primary address must work after registration.
5231 if (mip
->mi_rx_groups
== NULL
)
5232 map
->ma_type
= MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED
;
5236 mip
->mi_addresses
= map
;
5240 * Clean up the primary MAC address. Note, only one primary MAC address
5241 * is allowed. All other MAC addresses must have been freed appropriately.
5244 mac_fini_macaddr(mac_impl_t
*mip
)
5246 mac_address_t
*map
= mip
->mi_addresses
;
5252 * If mi_addresses is initialized, there should be exactly one
5253 * entry left on the list with no users.
5255 ASSERT(map
->ma_nusers
== 0);
5256 ASSERT(map
->ma_next
== NULL
);
5258 kmem_free(map
, sizeof (mac_address_t
));
5259 mip
->mi_addresses
= NULL
;
5263 * Logging related functions.
5265 * Note that Kernel statistics have been extended to maintain fine
5266 * granularity of statistics viz. hardware lane, software lane, fanout
5267 * stats etc. However, extended accounting continues to support only
5268 * aggregate statistics like before.
5271 /* Write the flow description to a netinfo_t record */
5273 mac_write_flow_desc(flow_entry_t
*flent
, mac_client_impl_t
*mcip
)
5278 mac_resource_props_t
*mrp
;
5280 ninfo
= kmem_zalloc(sizeof (netinfo_t
), KM_NOSLEEP
);
5283 ndesc
= kmem_zalloc(sizeof (net_desc_t
), KM_NOSLEEP
);
5284 if (ndesc
== NULL
) {
5285 kmem_free(ninfo
, sizeof (netinfo_t
));
5290 * Grab the fe_lock to see a self-consistent fe_flow_desc.
5291 * Updates to the fe_flow_desc are done under the fe_lock
5293 mutex_enter(&flent
->fe_lock
);
5294 fdesc
= &flent
->fe_flow_desc
;
5295 mrp
= &flent
->fe_resource_props
;
5297 ndesc
->nd_name
= flent
->fe_flow_name
;
5298 ndesc
->nd_devname
= mcip
->mci_name
;
5299 bcopy(fdesc
->fd_src_mac
, ndesc
->nd_ehost
, ETHERADDRL
);
5300 bcopy(fdesc
->fd_dst_mac
, ndesc
->nd_edest
, ETHERADDRL
);
5301 ndesc
->nd_sap
= htonl(fdesc
->fd_sap
);
5302 ndesc
->nd_isv4
= (uint8_t)fdesc
->fd_ipversion
== IPV4_VERSION
;
5303 ndesc
->nd_bw_limit
= mrp
->mrp_maxbw
;
5304 if (ndesc
->nd_isv4
) {
5305 ndesc
->nd_saddr
[3] = htonl(fdesc
->fd_local_addr
.s6_addr32
[3]);
5306 ndesc
->nd_daddr
[3] = htonl(fdesc
->fd_remote_addr
.s6_addr32
[3]);
5308 bcopy(&fdesc
->fd_local_addr
, ndesc
->nd_saddr
, IPV6_ADDR_LEN
);
5309 bcopy(&fdesc
->fd_remote_addr
, ndesc
->nd_daddr
, IPV6_ADDR_LEN
);
5311 ndesc
->nd_sport
= htons(fdesc
->fd_local_port
);
5312 ndesc
->nd_dport
= htons(fdesc
->fd_remote_port
);
5313 ndesc
->nd_protocol
= (uint8_t)fdesc
->fd_protocol
;
5314 mutex_exit(&flent
->fe_lock
);
5316 ninfo
->ni_record
= ndesc
;
5317 ninfo
->ni_size
= sizeof (net_desc_t
);
5318 ninfo
->ni_type
= EX_NET_FLDESC_REC
;
5323 /* Write the flow statistics to a netinfo_t record */
5325 mac_write_flow_stats(flow_entry_t
*flent
)
5329 mac_soft_ring_set_t
*mac_srs
;
5330 mac_rx_stats_t
*mac_rx_stat
;
5331 mac_tx_stats_t
*mac_tx_stat
;
5334 ninfo
= kmem_zalloc(sizeof (netinfo_t
), KM_NOSLEEP
);
5337 nstat
= kmem_zalloc(sizeof (net_stat_t
), KM_NOSLEEP
);
5338 if (nstat
== NULL
) {
5339 kmem_free(ninfo
, sizeof (netinfo_t
));
5343 nstat
->ns_name
= flent
->fe_flow_name
;
5344 for (i
= 0; i
< flent
->fe_rx_srs_cnt
; i
++) {
5345 mac_srs
= (mac_soft_ring_set_t
*)flent
->fe_rx_srs
[i
];
5346 mac_rx_stat
= &mac_srs
->srs_rx
.sr_stat
;
5348 nstat
->ns_ibytes
+= mac_rx_stat
->mrs_intrbytes
+
5349 mac_rx_stat
->mrs_pollbytes
+ mac_rx_stat
->mrs_lclbytes
;
5350 nstat
->ns_ipackets
+= mac_rx_stat
->mrs_intrcnt
+
5351 mac_rx_stat
->mrs_pollcnt
+ mac_rx_stat
->mrs_lclcnt
;
5352 nstat
->ns_oerrors
+= mac_rx_stat
->mrs_ierrors
;
5355 mac_srs
= (mac_soft_ring_set_t
*)(flent
->fe_tx_srs
);
5356 if (mac_srs
!= NULL
) {
5357 mac_tx_stat
= &mac_srs
->srs_tx
.st_stat
;
5359 nstat
->ns_obytes
= mac_tx_stat
->mts_obytes
;
5360 nstat
->ns_opackets
= mac_tx_stat
->mts_opackets
;
5361 nstat
->ns_oerrors
= mac_tx_stat
->mts_oerrors
;
5364 ninfo
->ni_record
= nstat
;
5365 ninfo
->ni_size
= sizeof (net_stat_t
);
5366 ninfo
->ni_type
= EX_NET_FLSTAT_REC
;
5371 /* Write the link description to a netinfo_t record */
5373 mac_write_link_desc(mac_client_impl_t
*mcip
)
5377 flow_entry_t
*flent
= mcip
->mci_flent
;
5379 ninfo
= kmem_zalloc(sizeof (netinfo_t
), KM_NOSLEEP
);
5382 ndesc
= kmem_zalloc(sizeof (net_desc_t
), KM_NOSLEEP
);
5383 if (ndesc
== NULL
) {
5384 kmem_free(ninfo
, sizeof (netinfo_t
));
5388 ndesc
->nd_name
= mcip
->mci_name
;
5389 ndesc
->nd_devname
= mcip
->mci_name
;
5390 ndesc
->nd_isv4
= B_TRUE
;
5392 * Grab the fe_lock to see a self-consistent fe_flow_desc.
5393 * Updates to the fe_flow_desc are done under the fe_lock
5394 * after removing the flent from the flow table.
5396 mutex_enter(&flent
->fe_lock
);
5397 bcopy(flent
->fe_flow_desc
.fd_src_mac
, ndesc
->nd_ehost
, ETHERADDRL
);
5398 mutex_exit(&flent
->fe_lock
);
5400 ninfo
->ni_record
= ndesc
;
5401 ninfo
->ni_size
= sizeof (net_desc_t
);
5402 ninfo
->ni_type
= EX_NET_LNDESC_REC
;
5407 /* Write the link statistics to a netinfo_t record */
5409 mac_write_link_stats(mac_client_impl_t
*mcip
)
5413 flow_entry_t
*flent
;
5414 mac_soft_ring_set_t
*mac_srs
;
5415 mac_rx_stats_t
*mac_rx_stat
;
5416 mac_tx_stats_t
*mac_tx_stat
;
5419 ninfo
= kmem_zalloc(sizeof (netinfo_t
), KM_NOSLEEP
);
5422 nstat
= kmem_zalloc(sizeof (net_stat_t
), KM_NOSLEEP
);
5423 if (nstat
== NULL
) {
5424 kmem_free(ninfo
, sizeof (netinfo_t
));
5428 nstat
->ns_name
= mcip
->mci_name
;
5429 flent
= mcip
->mci_flent
;
5430 if (flent
!= NULL
) {
5431 for (i
= 0; i
< flent
->fe_rx_srs_cnt
; i
++) {
5432 mac_srs
= (mac_soft_ring_set_t
*)flent
->fe_rx_srs
[i
];
5433 mac_rx_stat
= &mac_srs
->srs_rx
.sr_stat
;
5435 nstat
->ns_ibytes
+= mac_rx_stat
->mrs_intrbytes
+
5436 mac_rx_stat
->mrs_pollbytes
+
5437 mac_rx_stat
->mrs_lclbytes
;
5438 nstat
->ns_ipackets
+= mac_rx_stat
->mrs_intrcnt
+
5439 mac_rx_stat
->mrs_pollcnt
+ mac_rx_stat
->mrs_lclcnt
;
5440 nstat
->ns_oerrors
+= mac_rx_stat
->mrs_ierrors
;
5444 mac_srs
= (mac_soft_ring_set_t
*)(mcip
->mci_flent
->fe_tx_srs
);
5445 if (mac_srs
!= NULL
) {
5446 mac_tx_stat
= &mac_srs
->srs_tx
.st_stat
;
5448 nstat
->ns_obytes
= mac_tx_stat
->mts_obytes
;
5449 nstat
->ns_opackets
= mac_tx_stat
->mts_opackets
;
5450 nstat
->ns_oerrors
= mac_tx_stat
->mts_oerrors
;
5453 ninfo
->ni_record
= nstat
;
5454 ninfo
->ni_size
= sizeof (net_stat_t
);
5455 ninfo
->ni_type
= EX_NET_LNSTAT_REC
;
5460 typedef struct i_mac_log_state_s
{
5465 } i_mac_log_state_t
;
5468 * For a given flow, if the description has not been logged before, do it now.
5469 * If it is a VNIC, then we have collected information about it from the MAC
5470 * table, so skip it.
5472 * Called through mac_flow_walk_nolock()
5474 * Return 0 if successful.
5477 mac_log_flowinfo(flow_entry_t
*flent
, void *arg
)
5479 mac_client_impl_t
*mcip
= flent
->fe_mcip
;
5480 i_mac_log_state_t
*lstate
= arg
;
5487 * If the name starts with "vnic", and fe_user_generated is true (to
5488 * exclude the mcast and active flow entries created implicitly for
5489 * a vnic, it is a VNIC flow. i.e. vnic1 is a vnic flow,
5490 * vnic/bge1/mcast1 is not and neither is vnic/bge1/active.
5492 if (strncasecmp(flent
->fe_flow_name
, "vnic", 4) == 0 &&
5493 (flent
->fe_type
& FLOW_USER
) != 0) {
5497 if (!flent
->fe_desc_logged
) {
5499 * We don't return error because we want to continue the
5500 * walk in case this is the last walk which means we
5501 * need to reset fe_desc_logged in all the flows.
5503 if ((ninfo
= mac_write_flow_desc(flent
, mcip
)) == NULL
)
5505 list_insert_tail(lstate
->mi_list
, ninfo
);
5506 flent
->fe_desc_logged
= B_TRUE
;
5510 * Regardless of the error, we want to proceed in case we have to
5511 * reset fe_desc_logged.
5513 ninfo
= mac_write_flow_stats(flent
);
5517 list_insert_tail(lstate
->mi_list
, ninfo
);
5519 if (mcip
!= NULL
&& !(mcip
->mci_state_flags
& MCIS_DESC_LOGGED
))
5520 flent
->fe_desc_logged
= B_FALSE
;
5526 * Log the description for each mac client of this mac_impl_t, if it
5527 * hasn't already been done. Additionally, log statistics for the link as
5528 * well. Walk the flow table and log information for each flow as well.
5529 * If it is the last walk (mci_last), then we turn off mci_desc_logged (and
5530 * also fe_desc_logged, if flow logging is on) since we want to log the
5531 * description if and when logging is restarted.
5533 * Return 0 upon success or -1 upon failure
5536 i_mac_impl_log(mac_impl_t
*mip
, i_mac_log_state_t
*lstate
)
5538 mac_client_impl_t
*mcip
;
5541 i_mac_perim_enter(mip
);
5543 * Only walk the client list for NIC and etherstub
5545 if ((mip
->mi_state_flags
& MIS_DISABLED
) ||
5546 ((mip
->mi_state_flags
& MIS_IS_VNIC
) &&
5547 (mac_get_lower_mac_handle((mac_handle_t
)mip
) != NULL
))) {
5548 i_mac_perim_exit(mip
);
5552 for (mcip
= mip
->mi_clients_list
; mcip
!= NULL
;
5553 mcip
= mcip
->mci_client_next
) {
5554 if (!MCIP_DATAPATH_SETUP(mcip
))
5556 if (lstate
->mi_lenable
) {
5557 if (!(mcip
->mci_state_flags
& MCIS_DESC_LOGGED
)) {
5558 ninfo
= mac_write_link_desc(mcip
);
5559 if (ninfo
== NULL
) {
5561 * We can't terminate it if this is the last
5562 * walk, else there might be some links with
5563 * mi_desc_logged set to true, which means
5564 * their description won't be logged the next
5565 * time logging is started (similarly for the
5566 * flows within such links). We can continue
5567 * without walking the flow table (i.e. to
5568 * set fe_desc_logged to false) because we
5569 * won't have written any flow stuff for this
5570 * link as we haven't logged the link itself.
5572 i_mac_perim_exit(mip
);
5573 if (lstate
->mi_last
)
5578 mcip
->mci_state_flags
|= MCIS_DESC_LOGGED
;
5579 list_insert_tail(lstate
->mi_list
, ninfo
);
5583 ninfo
= mac_write_link_stats(mcip
);
5584 if (ninfo
== NULL
&& !lstate
->mi_last
) {
5585 i_mac_perim_exit(mip
);
5588 list_insert_tail(lstate
->mi_list
, ninfo
);
5590 if (lstate
->mi_last
)
5591 mcip
->mci_state_flags
&= ~MCIS_DESC_LOGGED
;
5593 if (lstate
->mi_fenable
) {
5594 if (mcip
->mci_subflow_tab
!= NULL
) {
5595 (void) mac_flow_walk_nolock(
5596 mcip
->mci_subflow_tab
, mac_log_flowinfo
,
5601 i_mac_perim_exit(mip
);
5606 * modhash walker function to add a mac_impl_t to a list
5610 i_mac_impl_list_walker(mod_hash_key_t key
, mod_hash_val_t
*val
, void *arg
)
5612 list_t
*list
= (list_t
*)arg
;
5613 mac_impl_t
*mip
= (mac_impl_t
*)val
;
5615 if ((mip
->mi_state_flags
& MIS_DISABLED
) == 0) {
5616 list_insert_tail(list
, mip
);
5620 return (MH_WALK_CONTINUE
);
5624 i_mac_log_info(list_t
*net_log_list
, i_mac_log_state_t
*lstate
)
5626 list_t mac_impl_list
;
5630 /* Create list of mac_impls */
5631 ASSERT(RW_LOCK_HELD(&i_mac_impl_lock
));
5632 list_create(&mac_impl_list
, sizeof (mac_impl_t
), offsetof(mac_impl_t
,
5634 mod_hash_walk(i_mac_impl_hash
, i_mac_impl_list_walker
, &mac_impl_list
);
5635 rw_exit(&i_mac_impl_lock
);
5637 /* Create log entries for each mac_impl */
5638 for (mip
= list_head(&mac_impl_list
); mip
!= NULL
;
5639 mip
= list_next(&mac_impl_list
, mip
)) {
5640 if (i_mac_impl_log(mip
, lstate
) != 0)
5644 /* Remove elements and destroy list of mac_impls */
5645 rw_enter(&i_mac_impl_lock
, RW_WRITER
);
5646 while ((mip
= list_remove_tail(&mac_impl_list
)) != NULL
) {
5649 rw_exit(&i_mac_impl_lock
);
5650 list_destroy(&mac_impl_list
);
5653 * Write log entries to files outside of locks, free associated
5654 * structures, and remove entries from the list.
5656 while ((ninfo
= list_head(net_log_list
)) != NULL
) {
5657 (void) exacct_commit_netinfo(ninfo
->ni_record
, ninfo
->ni_type
);
5658 list_remove(net_log_list
, ninfo
);
5659 kmem_free(ninfo
->ni_record
, ninfo
->ni_size
);
5660 kmem_free(ninfo
, sizeof (*ninfo
));
5662 list_destroy(net_log_list
);
5666 * The timer thread that runs every mac_logging_interval seconds and logs
5667 * link and/or flow information.
5671 mac_log_linkinfo(void *arg
)
5673 i_mac_log_state_t lstate
;
5674 list_t net_log_list
;
5676 list_create(&net_log_list
, sizeof (netinfo_t
),
5677 offsetof(netinfo_t
, ni_link
));
5679 rw_enter(&i_mac_impl_lock
, RW_READER
);
5680 if (!mac_flow_log_enable
&& !mac_link_log_enable
) {
5681 rw_exit(&i_mac_impl_lock
);
5684 lstate
.mi_fenable
= mac_flow_log_enable
;
5685 lstate
.mi_lenable
= mac_link_log_enable
;
5686 lstate
.mi_last
= B_FALSE
;
5687 lstate
.mi_list
= &net_log_list
;
5689 /* Write log entries for each mac_impl in the list */
5690 i_mac_log_info(&net_log_list
, &lstate
);
5692 if (mac_flow_log_enable
|| mac_link_log_enable
) {
5693 mac_logging_timer
= timeout(mac_log_linkinfo
, NULL
,
5694 SEC_TO_TICK(mac_logging_interval
));
5698 typedef struct i_mac_fastpath_state_s
{
5699 boolean_t mf_disable
;
5701 } i_mac_fastpath_state_t
;
5703 /* modhash walker function to enable or disable fastpath */
5706 i_mac_fastpath_walker(mod_hash_key_t key
, mod_hash_val_t
*val
,
5709 i_mac_fastpath_state_t
*state
= arg
;
5710 mac_handle_t mh
= (mac_handle_t
)val
;
5712 if (state
->mf_disable
)
5713 state
->mf_err
= mac_fastpath_disable(mh
);
5715 mac_fastpath_enable(mh
);
5717 return (state
->mf_err
== 0 ? MH_WALK_CONTINUE
: MH_WALK_TERMINATE
);
5721 * Start the logging timer.
5724 mac_start_logusage(mac_logtype_t type
, uint_t interval
)
5726 i_mac_fastpath_state_t dstate
= {B_TRUE
, 0};
5727 i_mac_fastpath_state_t estate
= {B_FALSE
, 0};
5730 rw_enter(&i_mac_impl_lock
, RW_WRITER
);
5732 case MAC_LOGTYPE_FLOW
:
5733 if (mac_flow_log_enable
) {
5734 rw_exit(&i_mac_impl_lock
);
5738 case MAC_LOGTYPE_LINK
:
5739 if (mac_link_log_enable
) {
5740 rw_exit(&i_mac_impl_lock
);
5748 /* Disable fastpath */
5749 mod_hash_walk(i_mac_impl_hash
, i_mac_fastpath_walker
, &dstate
);
5750 if ((err
= dstate
.mf_err
) != 0) {
5751 /* Reenable fastpath */
5752 mod_hash_walk(i_mac_impl_hash
, i_mac_fastpath_walker
, &estate
);
5753 rw_exit(&i_mac_impl_lock
);
5758 case MAC_LOGTYPE_FLOW
:
5759 mac_flow_log_enable
= B_TRUE
;
5761 case MAC_LOGTYPE_LINK
:
5762 mac_link_log_enable
= B_TRUE
;
5766 mac_logging_interval
= interval
;
5767 rw_exit(&i_mac_impl_lock
);
5768 mac_log_linkinfo(NULL
);
5773 * Stop the logging timer if both link and flow logging are turned off.
5776 mac_stop_logusage(mac_logtype_t type
)
5778 i_mac_log_state_t lstate
;
5779 i_mac_fastpath_state_t estate
= {B_FALSE
, 0};
5780 list_t net_log_list
;
5782 list_create(&net_log_list
, sizeof (netinfo_t
),
5783 offsetof(netinfo_t
, ni_link
));
5785 rw_enter(&i_mac_impl_lock
, RW_WRITER
);
5787 lstate
.mi_fenable
= mac_flow_log_enable
;
5788 lstate
.mi_lenable
= mac_link_log_enable
;
5789 lstate
.mi_list
= &net_log_list
;
5792 lstate
.mi_last
= B_TRUE
;
5795 case MAC_LOGTYPE_FLOW
:
5796 if (lstate
.mi_fenable
) {
5797 ASSERT(mac_link_log_enable
);
5798 mac_flow_log_enable
= B_FALSE
;
5799 mac_link_log_enable
= B_FALSE
;
5803 case MAC_LOGTYPE_LINK
:
5804 if (!lstate
.mi_lenable
|| mac_flow_log_enable
) {
5805 rw_exit(&i_mac_impl_lock
);
5808 mac_link_log_enable
= B_FALSE
;
5814 /* Reenable fastpath */
5815 mod_hash_walk(i_mac_impl_hash
, i_mac_fastpath_walker
, &estate
);
5817 (void) untimeout(mac_logging_timer
);
5818 mac_logging_timer
= 0;
5820 /* Write log entries for each mac_impl in the list */
5821 i_mac_log_info(&net_log_list
, &lstate
);
5825 * Walk the rx and tx SRS/SRs for a flow and update the priority value.
5828 mac_flow_update_priority(mac_client_impl_t
*mcip
, flow_entry_t
*flent
)
5832 mac_soft_ring_set_t
*mac_srs
;
5834 if (flent
->fe_rx_srs_cnt
<= 0)
5837 if (((mac_soft_ring_set_t
*)flent
->fe_rx_srs
[0])->srs_type
==
5839 pri
= FLOW_PRIORITY(mcip
->mci_min_pri
,
5841 flent
->fe_resource_props
.mrp_priority
);
5843 pri
= mcip
->mci_max_pri
;
5846 for (count
= 0; count
< flent
->fe_rx_srs_cnt
; count
++) {
5847 mac_srs
= flent
->fe_rx_srs
[count
];
5848 mac_update_srs_priority(mac_srs
, pri
);
5851 * If we have a Tx SRS, we need to modify all the threads associated
5854 if (flent
->fe_tx_srs
!= NULL
)
5855 mac_update_srs_priority(flent
->fe_tx_srs
, pri
);
5859 * RX and TX rings are reserved according to different semantics depending
5860 * on the requests from the MAC clients and type of rings:
5862 * On the Tx side, by default we reserve individual rings, independently from
5865 * On the Rx side, the reservation is at the granularity of the group
5866 * of rings, and used for v12n level 1 only. It has a special case for the
5869 * If a share is allocated to a MAC client, we allocate a TX group and an
5870 * RX group to the client, and assign TX rings and RX rings to these
5871 * groups according to information gathered from the driver through
5872 * the share capability.
5874 * The foreseable evolution of Rx rings will handle v12n level 2 and higher
5875 * to allocate individual rings out of a group and program the hw classifier
5876 * based on IP address or higher level criteria.
5880 * mac_reserve_tx_ring()
5881 * Reserve a unused ring by marking it with MR_INUSE state.
5882 * As reserved, the ring is ready to function.
5884 * Notes for Hybrid I/O:
5886 * If a specific ring is needed, it is specified through the desired_ring
5887 * argument. Otherwise that argument is set to NULL.
5888 * If the desired ring was previous allocated to another client, this
5889 * function swaps it with a new ring from the group of unassigned rings.
5892 mac_reserve_tx_ring(mac_impl_t
*mip
, mac_ring_t
*desired_ring
)
5895 mac_grp_client_t
*mgcp
;
5896 mac_client_impl_t
*mcip
;
5897 mac_soft_ring_set_t
*srs
;
5899 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
5902 * Find an available ring and start it before changing its status.
5903 * The unassigned rings are at the end of the mi_tx_groups
5906 group
= MAC_DEFAULT_TX_GROUP(mip
);
5908 /* Can't take the default ring out of the default group */
5909 ASSERT(desired_ring
!= (mac_ring_t
*)mip
->mi_default_tx_ring
);
5911 if (desired_ring
->mr_state
== MR_FREE
) {
5912 ASSERT(MAC_GROUP_NO_CLIENT(group
));
5913 if (mac_start_ring(desired_ring
) != 0)
5915 return (desired_ring
);
5918 * There are clients using this ring, so let's move the clients
5919 * away from using this ring.
5921 for (mgcp
= group
->mrg_clients
; mgcp
!= NULL
; mgcp
= mgcp
->mgc_next
) {
5922 mcip
= mgcp
->mgc_client
;
5923 mac_tx_client_quiesce((mac_client_handle_t
)mcip
);
5924 srs
= MCIP_TX_SRS(mcip
);
5925 ASSERT(mac_tx_srs_ring_present(srs
, desired_ring
));
5926 mac_tx_invoke_callbacks(mcip
,
5927 (mac_tx_cookie_t
)mac_tx_srs_get_soft_ring(srs
,
5929 mac_tx_srs_del_ring(srs
, desired_ring
);
5930 mac_tx_client_restart((mac_client_handle_t
)mcip
);
5932 return (desired_ring
);
5936 * For a reserved group with multiple clients, return the primary client.
5938 static mac_client_impl_t
*
5939 mac_get_grp_primary(mac_group_t
*grp
)
5941 mac_grp_client_t
*mgcp
= grp
->mrg_clients
;
5942 mac_client_impl_t
*mcip
;
5944 while (mgcp
!= NULL
) {
5945 mcip
= mgcp
->mgc_client
;
5946 if (mcip
->mci_flent
->fe_type
& FLOW_PRIMARY_MAC
)
5948 mgcp
= mgcp
->mgc_next
;
5954 * Hybrid I/O specifies the ring that should be given to a share.
5955 * If the ring is already used by clients, then we need to release
5956 * the ring back to the default group so that we can give it to
5957 * the share. This means the clients using this ring now get a
5958 * replacement ring. If there aren't any replacement rings, this
5959 * function returns a failure.
5962 mac_reclaim_ring_from_grp(mac_impl_t
*mip
, mac_ring_type_t ring_type
,
5963 mac_ring_t
*ring
, mac_ring_t
**rings
, int nrings
)
5965 mac_group_t
*group
= (mac_group_t
*)ring
->mr_gh
;
5966 mac_resource_props_t
*mrp
;
5967 mac_client_impl_t
*mcip
;
5968 mac_group_t
*defgrp
;
5974 mcip
= MAC_GROUP_ONLY_CLIENT(group
);
5976 mcip
= mac_get_grp_primary(group
);
5977 ASSERT(mcip
!= NULL
);
5978 ASSERT(mcip
->mci_share
== (uintptr_t)NULL
);
5980 mrp
= MCIP_RESOURCE_PROPS(mcip
);
5981 if (ring_type
== MAC_RING_TYPE_RX
) {
5982 defgrp
= mip
->mi_rx_donor_grp
;
5983 if ((mrp
->mrp_mask
& MRP_RX_RINGS
) == 0) {
5984 /* Need to put this mac client in the default group */
5985 if (mac_rx_switch_group(mcip
, group
, defgrp
) != 0)
5989 * Switch this ring with some other ring from
5990 * the default group.
5992 for (tring
= defgrp
->mrg_rings
; tring
!= NULL
;
5993 tring
= tring
->mr_next
) {
5994 if (tring
->mr_index
== 0)
5996 for (j
= 0; j
< nrings
; j
++) {
5997 if (rings
[j
] == tring
)
6005 if (mac_group_mov_ring(mip
, group
, tring
) != 0)
6007 if (mac_group_mov_ring(mip
, defgrp
, ring
) != 0) {
6008 (void) mac_group_mov_ring(mip
, defgrp
, tring
);
6012 ASSERT(ring
->mr_gh
== (mac_group_handle_t
)defgrp
);
6016 defgrp
= MAC_DEFAULT_TX_GROUP(mip
);
6017 if (ring
== (mac_ring_t
*)mip
->mi_default_tx_ring
) {
6019 * See if we can get a spare ring to replace the default
6022 if (defgrp
->mrg_cur_count
== 1) {
6024 * Need to get a ring from another client, see if
6025 * there are any clients that can be moved to
6026 * the default group, thereby freeing some rings.
6028 for (i
= 0; i
< mip
->mi_tx_group_count
; i
++) {
6029 tgrp
= &mip
->mi_tx_groups
[i
];
6030 if (tgrp
->mrg_state
==
6031 MAC_GROUP_STATE_REGISTERED
) {
6034 mcip
= MAC_GROUP_ONLY_CLIENT(tgrp
);
6036 mcip
= mac_get_grp_primary(tgrp
);
6037 ASSERT(mcip
!= NULL
);
6038 mrp
= MCIP_RESOURCE_PROPS(mcip
);
6039 if ((mrp
->mrp_mask
& MRP_TX_RINGS
) == 0) {
6040 ASSERT(tgrp
->mrg_cur_count
== 1);
6042 * If this ring is part of the
6043 * rings asked by the share we cannot
6044 * use it as the default ring.
6046 for (j
= 0; j
< nrings
; j
++) {
6047 if (rings
[j
] == tgrp
->mrg_rings
)
6052 mac_tx_client_quiesce(
6053 (mac_client_handle_t
)mcip
);
6054 mac_tx_switch_group(mcip
, tgrp
,
6056 mac_tx_client_restart(
6057 (mac_client_handle_t
)mcip
);
6062 * All the rings are reserved, can't give up the
6065 if (defgrp
->mrg_cur_count
<= 1)
6069 * Swap the default ring with another.
6071 for (tring
= defgrp
->mrg_rings
; tring
!= NULL
;
6072 tring
= tring
->mr_next
) {
6074 * If this ring is part of the rings asked by the
6075 * share we cannot use it as the default ring.
6077 for (j
= 0; j
< nrings
; j
++) {
6078 if (rings
[j
] == tring
)
6084 ASSERT(tring
!= NULL
);
6085 mip
->mi_default_tx_ring
= (mac_ring_handle_t
)tring
;
6089 * The Tx ring is with a group reserved by a MAC client. See if
6092 ASSERT(group
->mrg_state
== MAC_GROUP_STATE_RESERVED
);
6093 mcip
= MAC_GROUP_ONLY_CLIENT(group
);
6095 mcip
= mac_get_grp_primary(group
);
6096 ASSERT(mcip
!= NULL
);
6097 mrp
= MCIP_RESOURCE_PROPS(mcip
);
6098 mac_tx_client_quiesce((mac_client_handle_t
)mcip
);
6099 if ((mrp
->mrp_mask
& MRP_TX_RINGS
) == 0) {
6100 ASSERT(group
->mrg_cur_count
== 1);
6101 /* Put this mac client in the default group */
6102 mac_tx_switch_group(mcip
, group
, defgrp
);
6105 * Switch this ring with some other ring from
6106 * the default group.
6108 for (tring
= defgrp
->mrg_rings
; tring
!= NULL
;
6109 tring
= tring
->mr_next
) {
6110 if (tring
== (mac_ring_t
*)mip
->mi_default_tx_ring
)
6113 * If this ring is part of the rings asked by the
6114 * share we cannot use it for swapping.
6116 for (j
= 0; j
< nrings
; j
++) {
6117 if (rings
[j
] == tring
)
6123 if (tring
== NULL
) {
6124 mac_tx_client_restart((mac_client_handle_t
)mcip
);
6127 if (mac_group_mov_ring(mip
, group
, tring
) != 0) {
6128 mac_tx_client_restart((mac_client_handle_t
)mcip
);
6131 if (mac_group_mov_ring(mip
, defgrp
, ring
) != 0) {
6132 (void) mac_group_mov_ring(mip
, defgrp
, tring
);
6133 mac_tx_client_restart((mac_client_handle_t
)mcip
);
6137 mac_tx_client_restart((mac_client_handle_t
)mcip
);
6138 ASSERT(ring
->mr_gh
== (mac_group_handle_t
)defgrp
);
6143 * Populate a zero-ring group with rings. If the share is non-NULL,
6144 * the rings are chosen according to that share.
6145 * Invoked after allocating a new RX or TX group through
6146 * mac_reserve_rx_group() or mac_reserve_tx_group(), respectively.
6147 * Returns zero on success, an errno otherwise.
6150 i_mac_group_allocate_rings(mac_impl_t
*mip
, mac_ring_type_t ring_type
,
6151 mac_group_t
*src_group
, mac_group_t
*new_group
, mac_share_handle_t share
,
6154 mac_ring_t
**rings
, *ring
;
6156 int rv
= 0, i
= 0, j
;
6158 ASSERT((ring_type
== MAC_RING_TYPE_RX
&&
6159 mip
->mi_rx_group_type
== MAC_GROUP_TYPE_DYNAMIC
) ||
6160 (ring_type
== MAC_RING_TYPE_TX
&&
6161 mip
->mi_tx_group_type
== MAC_GROUP_TYPE_DYNAMIC
));
6164 * First find the rings to allocate to the group.
6166 if (share
!= (uintptr_t)NULL
) {
6167 /* get rings through ms_squery() */
6168 mip
->mi_share_capab
.ms_squery(share
, ring_type
, NULL
, &nrings
);
6169 ASSERT(nrings
!= 0);
6170 rings
= kmem_alloc(nrings
* sizeof (mac_ring_handle_t
),
6172 mip
->mi_share_capab
.ms_squery(share
, ring_type
,
6173 (mac_ring_handle_t
*)rings
, &nrings
);
6174 for (i
= 0; i
< nrings
; i
++) {
6176 * If we have given this ring to a non-default
6177 * group, we need to check if we can get this
6181 if (ring
->mr_gh
!= (mac_group_handle_t
)src_group
||
6182 ring
== (mac_ring_t
*)mip
->mi_default_tx_ring
) {
6183 if (mac_reclaim_ring_from_grp(mip
, ring_type
,
6184 ring
, rings
, nrings
) != 0) {
6192 * Pick one ring from default group.
6194 * for now pick the second ring which requires the first ring
6195 * at index 0 to stay in the default group, since it is the
6196 * ring which carries the multicast traffic.
6197 * We need a better way for a driver to indicate this,
6198 * for example a per-ring flag.
6200 rings
= kmem_alloc(ringcnt
* sizeof (mac_ring_handle_t
),
6202 for (ring
= src_group
->mrg_rings
; ring
!= NULL
;
6203 ring
= ring
->mr_next
) {
6204 if (ring_type
== MAC_RING_TYPE_RX
&&
6205 ring
->mr_index
== 0) {
6208 if (ring_type
== MAC_RING_TYPE_TX
&&
6209 ring
== (mac_ring_t
*)mip
->mi_default_tx_ring
) {
6216 ASSERT(ring
!= NULL
);
6218 /* Not enough rings as required */
6219 if (nrings
!= ringcnt
) {
6225 switch (ring_type
) {
6226 case MAC_RING_TYPE_RX
:
6227 if (src_group
->mrg_cur_count
- nrings
< 1) {
6228 /* we ran out of rings */
6233 /* move receive rings to new group */
6234 for (i
= 0; i
< nrings
; i
++) {
6235 rv
= mac_group_mov_ring(mip
, new_group
, rings
[i
]);
6237 /* move rings back on failure */
6238 for (j
= 0; j
< i
; j
++) {
6239 (void) mac_group_mov_ring(mip
,
6240 src_group
, rings
[j
]);
6247 case MAC_RING_TYPE_TX
: {
6248 mac_ring_t
*tmp_ring
;
6250 /* move the TX rings to the new group */
6251 for (i
= 0; i
< nrings
; i
++) {
6252 /* get the desired ring */
6253 tmp_ring
= mac_reserve_tx_ring(mip
, rings
[i
]);
6254 if (tmp_ring
== NULL
) {
6258 ASSERT(tmp_ring
== rings
[i
]);
6259 rv
= mac_group_mov_ring(mip
, new_group
, rings
[i
]);
6261 /* cleanup on failure */
6262 for (j
= 0; j
< i
; j
++) {
6263 (void) mac_group_mov_ring(mip
,
6264 MAC_DEFAULT_TX_GROUP(mip
),
6274 /* add group to share */
6275 if (share
!= (uintptr_t)NULL
)
6276 mip
->mi_share_capab
.ms_sadd(share
, new_group
->mrg_driver
);
6279 /* free temporary array of rings */
6280 kmem_free(rings
, nrings
* sizeof (mac_ring_handle_t
));
6286 mac_group_add_client(mac_group_t
*grp
, mac_client_impl_t
*mcip
)
6288 mac_grp_client_t
*mgcp
;
6290 for (mgcp
= grp
->mrg_clients
; mgcp
!= NULL
; mgcp
= mgcp
->mgc_next
) {
6291 if (mgcp
->mgc_client
== mcip
)
6295 VERIFY(mgcp
== NULL
);
6297 mgcp
= kmem_zalloc(sizeof (mac_grp_client_t
), KM_SLEEP
);
6298 mgcp
->mgc_client
= mcip
;
6299 mgcp
->mgc_next
= grp
->mrg_clients
;
6300 grp
->mrg_clients
= mgcp
;
6305 mac_group_remove_client(mac_group_t
*grp
, mac_client_impl_t
*mcip
)
6307 mac_grp_client_t
*mgcp
, **pprev
;
6309 for (pprev
= &grp
->mrg_clients
, mgcp
= *pprev
; mgcp
!= NULL
;
6310 pprev
= &mgcp
->mgc_next
, mgcp
= *pprev
) {
6311 if (mgcp
->mgc_client
== mcip
)
6315 ASSERT(mgcp
!= NULL
);
6317 *pprev
= mgcp
->mgc_next
;
6318 kmem_free(mgcp
, sizeof (mac_grp_client_t
));
6322 * mac_reserve_rx_group()
6324 * Finds an available group and exclusively reserves it for a client.
6325 * The group is chosen to suit the flow's resource controls (bandwidth and
6326 * fanout requirements) and the address type.
6327 * If the requestor is the pimary MAC then return the group with the
6328 * largest number of rings, otherwise the default ring when available.
6331 mac_reserve_rx_group(mac_client_impl_t
*mcip
, uint8_t *mac_addr
, boolean_t move
)
6333 mac_share_handle_t share
= mcip
->mci_share
;
6334 mac_impl_t
*mip
= mcip
->mci_mip
;
6335 mac_group_t
*grp
= NULL
;
6339 mac_resource_props_t
*mrp
= MCIP_RESOURCE_PROPS(mcip
);
6342 boolean_t need_exclgrp
= B_FALSE
;
6344 mac_group_t
*candidate_grp
= NULL
;
6345 mac_client_impl_t
*gclient
;
6346 mac_resource_props_t
*gmrp
;
6347 mac_group_t
*donorgrp
= NULL
;
6348 boolean_t rxhw
= mrp
->mrp_mask
& MRP_RX_RINGS
;
6349 boolean_t unspec
= mrp
->mrp_mask
& MRP_RXRINGS_UNSPEC
;
6350 boolean_t isprimary
;
6352 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
6354 isprimary
= mcip
->mci_flent
->fe_type
& FLOW_PRIMARY_MAC
;
6357 * Check if a group already has this mac address (case of VLANs)
6358 * unless we are moving this MAC client from one group to another.
6360 if (!move
&& (map
= mac_find_macaddr(mip
, mac_addr
)) != NULL
) {
6361 if (map
->ma_group
!= NULL
)
6362 return (map
->ma_group
);
6364 if (mip
->mi_rx_groups
== NULL
|| mip
->mi_rx_group_count
== 0)
6367 * If exclusive open, return NULL which will enable the
6368 * caller to use the default group.
6370 if (mcip
->mci_state_flags
& MCIS_EXCLUSIVE
)
6373 /* For dynamic groups default unspecified to 1 */
6374 if (rxhw
&& unspec
&&
6375 mip
->mi_rx_group_type
== MAC_GROUP_TYPE_DYNAMIC
) {
6376 mrp
->mrp_nrxrings
= 1;
6379 * For static grouping we allow only specifying rings=0 and
6382 if (rxhw
&& mrp
->mrp_nrxrings
> 0 &&
6383 mip
->mi_rx_group_type
== MAC_GROUP_TYPE_STATIC
) {
6388 * We have explicitly asked for a group (with nrxrings,
6391 if (unspec
|| mrp
->mrp_nrxrings
> 0) {
6392 need_exclgrp
= B_TRUE
;
6393 need_rings
= mrp
->mrp_nrxrings
;
6394 } else if (mrp
->mrp_nrxrings
== 0) {
6396 * We have asked for a software group.
6400 } else if (isprimary
&& mip
->mi_nactiveclients
== 1 &&
6401 mip
->mi_rx_group_type
== MAC_GROUP_TYPE_DYNAMIC
) {
6403 * If the primary is the only active client on this
6404 * mip and we have not asked for any rings, we give
6405 * it the default group so that the primary gets to
6406 * use all the rings.
6411 /* The group that can donate rings */
6412 donorgrp
= mip
->mi_rx_donor_grp
;
6415 * The number of rings that the default group can donate.
6416 * We need to leave at least one ring.
6418 donor_grp_rcnt
= donorgrp
->mrg_cur_count
- 1;
6421 * Try to exclusively reserve a RX group.
6423 * For flows requiring HW_DEFAULT_RING (unicast flow of the primary
6424 * client), try to reserve the a non-default RX group and give
6425 * it all the rings from the donor group, except the default ring
6427 * For flows requiring HW_RING (unicast flow of other clients), try
6428 * to reserve non-default RX group with the specified number of
6429 * rings, if available.
6431 * For flows that have not asked for software or hardware ring,
6432 * try to reserve a non-default group with 1 ring, if available.
6434 for (i
= 1; i
< mip
->mi_rx_group_count
; i
++) {
6435 grp
= &mip
->mi_rx_groups
[i
];
6437 DTRACE_PROBE3(rx__group__trying
, char *, mip
->mi_name
,
6438 int, grp
->mrg_index
, mac_group_state_t
, grp
->mrg_state
);
6441 * Check if this group could be a candidate group for
6442 * eviction if we need a group for this MAC client,
6443 * but there aren't any. A candidate group is one
6444 * that didn't ask for an exclusive group, but got
6445 * one and it has enough rings (combined with what
6446 * the donor group can donate) for the new MAC
6449 if (grp
->mrg_state
>= MAC_GROUP_STATE_RESERVED
) {
6451 * If the primary/donor group is not the default
6452 * group, don't bother looking for a candidate group.
6453 * If we don't have enough rings we will check
6454 * if the primary group can be vacated.
6456 if (candidate_grp
== NULL
&&
6457 donorgrp
== MAC_DEFAULT_RX_GROUP(mip
)) {
6458 ASSERT(!MAC_GROUP_NO_CLIENT(grp
));
6459 gclient
= MAC_GROUP_ONLY_CLIENT(grp
);
6460 if (gclient
== NULL
)
6461 gclient
= mac_get_grp_primary(grp
);
6462 ASSERT(gclient
!= NULL
);
6463 gmrp
= MCIP_RESOURCE_PROPS(gclient
);
6464 if (gclient
->mci_share
== (uintptr_t)NULL
&&
6465 (gmrp
->mrp_mask
& MRP_RX_RINGS
) == 0 &&
6467 (grp
->mrg_cur_count
+ donor_grp_rcnt
>=
6469 candidate_grp
= grp
;
6475 * This group could already be SHARED by other multicast
6476 * flows on this client. In that case, the group would
6477 * be shared and has already been started.
6479 ASSERT(grp
->mrg_state
!= MAC_GROUP_STATE_UNINIT
);
6481 if ((grp
->mrg_state
== MAC_GROUP_STATE_REGISTERED
) &&
6482 (mac_start_group(grp
) != 0)) {
6486 if (mip
->mi_rx_group_type
!= MAC_GROUP_TYPE_DYNAMIC
)
6488 ASSERT(grp
->mrg_cur_count
== 0);
6491 * Populate the group. Rings should be taken
6492 * from the donor group.
6494 nrings
= rxhw
? need_rings
: isprimary
? donor_grp_rcnt
: 1;
6497 * If the donor group can't donate, let's just walk and
6498 * see if someone can vacate a group, so that we have
6499 * enough rings for this, unless we already have
6500 * identified a candiate group..
6502 if (nrings
<= donor_grp_rcnt
) {
6503 err
= i_mac_group_allocate_rings(mip
, MAC_RING_TYPE_RX
,
6504 donorgrp
, grp
, share
, nrings
);
6507 * For a share i_mac_group_allocate_rings gets
6508 * the rings from the driver, let's populate
6509 * the property for the client now.
6511 if (share
!= (uintptr_t)NULL
) {
6512 mac_client_set_rings(
6513 (mac_client_handle_t
)mcip
,
6514 grp
->mrg_cur_count
, -1);
6516 if (mac_is_primary_client(mcip
) && !rxhw
)
6517 mip
->mi_rx_donor_grp
= grp
;
6522 DTRACE_PROBE3(rx__group__reserve__alloc__rings
, char *,
6523 mip
->mi_name
, int, grp
->mrg_index
, int, err
);
6526 * It's a dynamic group but the grouping operation
6529 mac_stop_group(grp
);
6531 /* We didn't find an exclusive group for this MAC client */
6532 if (i
>= mip
->mi_rx_group_count
) {
6538 * If we found a candidate group then we switch the
6539 * MAC client from the candidate_group to the default
6540 * group and give the group to this MAC client. If
6541 * we didn't find a candidate_group, check if the
6542 * primary is in its own group and if it can make way
6543 * for this MAC client.
6545 if (candidate_grp
== NULL
&&
6546 donorgrp
!= MAC_DEFAULT_RX_GROUP(mip
) &&
6547 donorgrp
->mrg_cur_count
>= need_rings
) {
6548 candidate_grp
= donorgrp
;
6550 if (candidate_grp
!= NULL
) {
6551 boolean_t prim_grp
= B_FALSE
;
6554 * Switch the MAC client from the candidate group
6555 * to the default group.. If this group was the
6556 * donor group, then after the switch we need
6557 * to update the donor group too.
6559 grp
= candidate_grp
;
6560 gclient
= MAC_GROUP_ONLY_CLIENT(grp
);
6561 if (gclient
== NULL
)
6562 gclient
= mac_get_grp_primary(grp
);
6563 if (grp
== mip
->mi_rx_donor_grp
)
6565 if (mac_rx_switch_group(gclient
, grp
,
6566 MAC_DEFAULT_RX_GROUP(mip
)) != 0) {
6570 mip
->mi_rx_donor_grp
=
6571 MAC_DEFAULT_RX_GROUP(mip
);
6572 donorgrp
= MAC_DEFAULT_RX_GROUP(mip
);
6577 * Now give this group with the required rings
6578 * to this MAC client.
6580 ASSERT(grp
->mrg_state
== MAC_GROUP_STATE_REGISTERED
);
6581 if (mac_start_group(grp
) != 0)
6584 if (mip
->mi_rx_group_type
!= MAC_GROUP_TYPE_DYNAMIC
)
6587 donor_grp_rcnt
= donorgrp
->mrg_cur_count
- 1;
6588 ASSERT(grp
->mrg_cur_count
== 0);
6589 ASSERT(donor_grp_rcnt
>= need_rings
);
6590 err
= i_mac_group_allocate_rings(mip
, MAC_RING_TYPE_RX
,
6591 donorgrp
, grp
, share
, need_rings
);
6594 * For a share i_mac_group_allocate_rings gets
6595 * the rings from the driver, let's populate
6596 * the property for the client now.
6598 if (share
!= (uintptr_t)NULL
) {
6599 mac_client_set_rings(
6600 (mac_client_handle_t
)mcip
,
6601 grp
->mrg_cur_count
, -1);
6603 DTRACE_PROBE2(rx__group__reserved
,
6604 char *, mip
->mi_name
, int, grp
->mrg_index
);
6607 DTRACE_PROBE3(rx__group__reserve__alloc__rings
, char *,
6608 mip
->mi_name
, int, grp
->mrg_index
, int, err
);
6609 mac_stop_group(grp
);
6613 ASSERT(grp
!= NULL
);
6615 DTRACE_PROBE2(rx__group__reserved
,
6616 char *, mip
->mi_name
, int, grp
->mrg_index
);
6621 * mac_rx_release_group()
6623 * This is called when there are no clients left for the group.
6624 * The group is stopped and marked MAC_GROUP_STATE_REGISTERED,
6625 * and if it is a non default group, the shares are removed and
6626 * all rings are assigned back to default group.
6629 mac_release_rx_group(mac_client_impl_t
*mcip
, mac_group_t
*group
)
6631 mac_impl_t
*mip
= mcip
->mci_mip
;
6634 ASSERT(group
!= MAC_DEFAULT_RX_GROUP(mip
));
6636 if (mip
->mi_rx_donor_grp
== group
)
6637 mip
->mi_rx_donor_grp
= MAC_DEFAULT_RX_GROUP(mip
);
6640 * This is the case where there are no clients left. Any
6641 * SRS etc on this group have also be quiesced.
6643 for (ring
= group
->mrg_rings
; ring
!= NULL
; ring
= ring
->mr_next
) {
6644 if (ring
->mr_classify_type
== MAC_HW_CLASSIFIER
) {
6645 ASSERT(group
->mrg_state
== MAC_GROUP_STATE_RESERVED
);
6647 * Remove the SRS associated with the HW ring.
6648 * As a result, polling will be disabled.
6650 ring
->mr_srs
= NULL
;
6652 ASSERT(group
->mrg_state
< MAC_GROUP_STATE_RESERVED
||
6653 ring
->mr_state
== MR_INUSE
);
6654 if (ring
->mr_state
== MR_INUSE
) {
6655 mac_stop_ring(ring
);
6660 /* remove group from share */
6661 if (mcip
->mci_share
!= (uintptr_t)NULL
) {
6662 mip
->mi_share_capab
.ms_sremove(mcip
->mci_share
,
6666 if (mip
->mi_rx_group_type
== MAC_GROUP_TYPE_DYNAMIC
) {
6670 * Rings were dynamically allocated to group.
6671 * Move rings back to default group.
6673 while ((ring
= group
->mrg_rings
) != NULL
) {
6674 (void) mac_group_mov_ring(mip
, mip
->mi_rx_donor_grp
,
6678 mac_stop_group(group
);
6680 * Possible improvement: See if we can assign the group just released
6681 * to a another client of the mip
6686 * When we move the primary's mac address between groups, we need to also
6687 * take all the clients sharing the same mac address along with it (VLANs)
6688 * We remove the mac address for such clients from the group after quiescing
6689 * them. When we add the mac address we restart the client. Note that
6690 * the primary's mac address is removed from the group after all the
6691 * other clients sharing the address are removed. Similarly, the primary's
6692 * mac address is added before all the other client's mac address are
6693 * added. While grp is the group where the clients reside, tgrp is
6694 * the group where the addresses have to be added.
6697 mac_rx_move_macaddr_prim(mac_client_impl_t
*mcip
, mac_group_t
*grp
,
6698 mac_group_t
*tgrp
, uint8_t *maddr
, boolean_t add
)
6700 mac_impl_t
*mip
= mcip
->mci_mip
;
6701 mac_grp_client_t
*mgcp
= grp
->mrg_clients
;
6702 mac_client_impl_t
*gmcip
;
6705 prim
= (mcip
->mci_state_flags
& MCIS_UNICAST_HW
) != 0;
6708 * If the clients are in a non-default group, we just have to
6709 * walk the group's client list. If it is in the default group
6710 * (which will be shared by other clients as well, we need to
6711 * check if the unicast address matches mcip's unicast.
6713 while (mgcp
!= NULL
) {
6714 gmcip
= mgcp
->mgc_client
;
6715 if (gmcip
!= mcip
&&
6716 (grp
!= MAC_DEFAULT_RX_GROUP(mip
) ||
6717 mcip
->mci_unicast
== gmcip
->mci_unicast
)) {
6719 mac_rx_client_quiesce(
6720 (mac_client_handle_t
)gmcip
);
6721 (void) mac_remove_macaddr(mcip
->mci_unicast
);
6723 (void) mac_add_macaddr(mip
, tgrp
, maddr
, prim
);
6724 mac_rx_client_restart(
6725 (mac_client_handle_t
)gmcip
);
6728 mgcp
= mgcp
->mgc_next
;
6734 * Move the MAC address from fgrp to tgrp. If this is the primary client,
6735 * we need to take any VLANs etc. together too.
6738 mac_rx_move_macaddr(mac_client_impl_t
*mcip
, mac_group_t
*fgrp
,
6741 mac_impl_t
*mip
= mcip
->mci_mip
;
6742 uint8_t maddr
[MAXMACADDRLEN
];
6745 boolean_t multiclnt
= B_FALSE
;
6747 mac_rx_client_quiesce((mac_client_handle_t
)mcip
);
6748 ASSERT(mcip
->mci_unicast
!= NULL
);
6749 bcopy(mcip
->mci_unicast
->ma_addr
, maddr
, mcip
->mci_unicast
->ma_len
);
6751 prim
= (mcip
->mci_state_flags
& MCIS_UNICAST_HW
) != 0;
6752 if (mcip
->mci_unicast
->ma_nusers
> 1) {
6753 mac_rx_move_macaddr_prim(mcip
, fgrp
, NULL
, maddr
, B_FALSE
);
6756 ASSERT(mcip
->mci_unicast
->ma_nusers
== 1);
6757 err
= mac_remove_macaddr(mcip
->mci_unicast
);
6759 mac_rx_client_restart((mac_client_handle_t
)mcip
);
6761 mac_rx_move_macaddr_prim(mcip
, fgrp
, fgrp
, maddr
,
6767 * Program the H/W Classifier first, if this fails we need
6768 * not proceed with the other stuff.
6770 if ((err
= mac_add_macaddr(mip
, tgrp
, maddr
, prim
)) != 0) {
6771 /* Revert back the H/W Classifier */
6772 if ((err
= mac_add_macaddr(mip
, fgrp
, maddr
, prim
)) != 0) {
6774 * This should not fail now since it worked earlier,
6778 "mac_rx_switch_group: switching %p back"
6779 " to group %p failed!!", (void *)mcip
,
6782 mac_rx_client_restart((mac_client_handle_t
)mcip
);
6784 mac_rx_move_macaddr_prim(mcip
, fgrp
, fgrp
, maddr
,
6789 mcip
->mci_unicast
= mac_find_macaddr(mip
, maddr
);
6790 mac_rx_client_restart((mac_client_handle_t
)mcip
);
6792 mac_rx_move_macaddr_prim(mcip
, fgrp
, tgrp
, maddr
, B_TRUE
);
6797 * Switch the MAC client from one group to another. This means we need
6798 * to remove the MAC address from the group, remove the MAC client,
6799 * teardown the SRSs and revert the group state. Then, we add the client
6800 * to the destination group, set the SRSs, and add the MAC address to the
6804 mac_rx_switch_group(mac_client_impl_t
*mcip
, mac_group_t
*fgrp
,
6808 mac_group_state_t next_state
;
6809 mac_client_impl_t
*group_only_mcip
;
6810 mac_client_impl_t
*gmcip
;
6811 mac_impl_t
*mip
= mcip
->mci_mip
;
6812 mac_grp_client_t
*mgcp
;
6814 ASSERT(fgrp
== mcip
->mci_flent
->fe_rx_ring_group
);
6816 if ((err
= mac_rx_move_macaddr(mcip
, fgrp
, tgrp
)) != 0)
6820 * The group might be reserved, but SRSs may not be set up, e.g.
6821 * primary and its vlans using a reserved group.
6823 if (fgrp
->mrg_state
== MAC_GROUP_STATE_RESERVED
&&
6824 MAC_GROUP_ONLY_CLIENT(fgrp
) != NULL
) {
6825 mac_rx_srs_group_teardown(mcip
->mci_flent
, B_TRUE
);
6827 if (fgrp
!= MAC_DEFAULT_RX_GROUP(mip
)) {
6828 mgcp
= fgrp
->mrg_clients
;
6829 while (mgcp
!= NULL
) {
6830 gmcip
= mgcp
->mgc_client
;
6831 mgcp
= mgcp
->mgc_next
;
6832 mac_group_remove_client(fgrp
, gmcip
);
6833 mac_group_add_client(tgrp
, gmcip
);
6834 gmcip
->mci_flent
->fe_rx_ring_group
= tgrp
;
6836 mac_release_rx_group(mcip
, fgrp
);
6837 ASSERT(MAC_GROUP_NO_CLIENT(fgrp
));
6838 mac_set_group_state(fgrp
, MAC_GROUP_STATE_REGISTERED
);
6840 mac_group_remove_client(fgrp
, mcip
);
6841 mac_group_add_client(tgrp
, mcip
);
6842 mcip
->mci_flent
->fe_rx_ring_group
= tgrp
;
6844 * If there are other clients (VLANs) sharing this address
6845 * we should be here only for the primary.
6847 if (mcip
->mci_unicast
->ma_nusers
> 1) {
6849 * We need to move all the clients that are using
6852 mgcp
= fgrp
->mrg_clients
;
6853 while (mgcp
!= NULL
) {
6854 gmcip
= mgcp
->mgc_client
;
6855 mgcp
= mgcp
->mgc_next
;
6856 if (mcip
->mci_unicast
== gmcip
->mci_unicast
) {
6857 mac_group_remove_client(fgrp
, gmcip
);
6858 mac_group_add_client(tgrp
, gmcip
);
6859 gmcip
->mci_flent
->fe_rx_ring_group
=
6865 * The default group will still take the multicast,
6866 * broadcast traffic etc., so it won't go to
6867 * MAC_GROUP_STATE_REGISTERED.
6869 if (fgrp
->mrg_state
== MAC_GROUP_STATE_RESERVED
)
6870 mac_rx_group_unmark(fgrp
, MR_CONDEMNED
);
6871 mac_set_group_state(fgrp
, MAC_GROUP_STATE_SHARED
);
6873 next_state
= mac_group_next_state(tgrp
, &group_only_mcip
,
6874 MAC_DEFAULT_RX_GROUP(mip
), B_TRUE
);
6875 mac_set_group_state(tgrp
, next_state
);
6877 * If the destination group is reserved, setup the SRSs etc.
6879 if (tgrp
->mrg_state
== MAC_GROUP_STATE_RESERVED
) {
6880 mac_rx_srs_group_setup(mcip
, mcip
->mci_flent
, SRST_LINK
);
6881 mac_fanout_setup(mcip
, mcip
->mci_flent
,
6882 MCIP_RESOURCE_PROPS(mcip
), mac_rx_deliver
, mcip
, NULL
,
6884 mac_rx_group_unmark(tgrp
, MR_INCIPIENT
);
6886 mac_rx_switch_grp_to_sw(tgrp
);
6892 * Reserves a TX group for the specified share. Invoked by mac_tx_srs_setup()
6893 * when a share was allocated to the client.
6896 mac_reserve_tx_group(mac_client_impl_t
*mcip
, boolean_t move
)
6898 mac_impl_t
*mip
= mcip
->mci_mip
;
6899 mac_group_t
*grp
= NULL
;
6903 mac_group_t
*defgrp
;
6904 mac_share_handle_t share
= mcip
->mci_share
;
6905 mac_resource_props_t
*mrp
= MCIP_RESOURCE_PROPS(mcip
);
6908 boolean_t need_exclgrp
= B_FALSE
;
6910 mac_group_t
*candidate_grp
= NULL
;
6911 mac_client_impl_t
*gclient
;
6912 mac_resource_props_t
*gmrp
;
6913 boolean_t txhw
= mrp
->mrp_mask
& MRP_TX_RINGS
;
6914 boolean_t unspec
= mrp
->mrp_mask
& MRP_TXRINGS_UNSPEC
;
6915 boolean_t isprimary
;
6917 isprimary
= mcip
->mci_flent
->fe_type
& FLOW_PRIMARY_MAC
;
6919 * When we come here for a VLAN on the primary (dladm create-vlan),
6920 * we need to pair it along with the primary (to keep it consistent
6921 * with the RX side). So, we check if the primary is already assigned
6922 * to a group and return the group if so. The other way is also
6923 * true, i.e. the VLAN is already created and now we are plumbing
6926 if (!move
&& isprimary
) {
6927 for (gclient
= mip
->mi_clients_list
; gclient
!= NULL
;
6928 gclient
= gclient
->mci_client_next
) {
6929 if (gclient
->mci_flent
->fe_type
& FLOW_PRIMARY_MAC
&&
6930 gclient
->mci_flent
->fe_tx_ring_group
!= NULL
) {
6931 return (gclient
->mci_flent
->fe_tx_ring_group
);
6936 if (mip
->mi_tx_groups
== NULL
|| mip
->mi_tx_group_count
== 0)
6939 /* For dynamic groups, default unspec to 1 */
6940 if (txhw
&& unspec
&&
6941 mip
->mi_tx_group_type
== MAC_GROUP_TYPE_DYNAMIC
) {
6942 mrp
->mrp_ntxrings
= 1;
6945 * For static grouping we allow only specifying rings=0 and
6948 if (txhw
&& mrp
->mrp_ntxrings
> 0 &&
6949 mip
->mi_tx_group_type
== MAC_GROUP_TYPE_STATIC
) {
6955 * We have explicitly asked for a group (with ntxrings,
6958 if (unspec
|| mrp
->mrp_ntxrings
> 0) {
6959 need_exclgrp
= B_TRUE
;
6960 need_rings
= mrp
->mrp_ntxrings
;
6961 } else if (mrp
->mrp_ntxrings
== 0) {
6963 * We have asked for a software group.
6968 defgrp
= MAC_DEFAULT_TX_GROUP(mip
);
6970 * The number of rings that the default group can donate.
6971 * We need to leave at least one ring - the default ring - in
6974 defnrings
= defgrp
->mrg_cur_count
- 1;
6977 * Primary gets default group unless explicitly told not
6978 * to (i.e. rings > 0).
6980 if (isprimary
&& !need_exclgrp
)
6983 nrings
= (mrp
->mrp_mask
& MRP_TX_RINGS
) != 0 ? mrp
->mrp_ntxrings
: 1;
6984 for (i
= 0; i
< mip
->mi_tx_group_count
; i
++) {
6985 grp
= &mip
->mi_tx_groups
[i
];
6986 if ((grp
->mrg_state
== MAC_GROUP_STATE_RESERVED
) ||
6987 (grp
->mrg_state
== MAC_GROUP_STATE_UNINIT
)) {
6989 * Select a candidate for replacement if we don't
6990 * get an exclusive group. A candidate group is one
6991 * that didn't ask for an exclusive group, but got
6992 * one and it has enough rings (combined with what
6993 * the default group can donate) for the new MAC
6996 if (grp
->mrg_state
== MAC_GROUP_STATE_RESERVED
&&
6997 candidate_grp
== NULL
) {
6998 gclient
= MAC_GROUP_ONLY_CLIENT(grp
);
6999 if (gclient
== NULL
)
7000 gclient
= mac_get_grp_primary(grp
);
7001 gmrp
= MCIP_RESOURCE_PROPS(gclient
);
7002 if (gclient
->mci_share
== (uintptr_t)NULL
&&
7003 (gmrp
->mrp_mask
& MRP_TX_RINGS
) == 0 &&
7005 (grp
->mrg_cur_count
+ defnrings
) >=
7007 candidate_grp
= grp
;
7013 * If the default can't donate let's just walk and
7014 * see if someone can vacate a group, so that we have
7015 * enough rings for this.
7017 if (mip
->mi_tx_group_type
!= MAC_GROUP_TYPE_DYNAMIC
||
7018 nrings
<= defnrings
) {
7019 if (grp
->mrg_state
== MAC_GROUP_STATE_REGISTERED
) {
7020 rv
= mac_start_group(grp
);
7027 /* The default group */
7028 if (i
>= mip
->mi_tx_group_count
) {
7030 * If we need an exclusive group and have identified a
7031 * candidate group we switch the MAC client from the
7032 * candidate group to the default group and give the
7033 * candidate group to this client.
7035 if (need_exclgrp
&& candidate_grp
!= NULL
) {
7037 * Switch the MAC client from the candidate group
7038 * to the default group.
7040 grp
= candidate_grp
;
7041 gclient
= MAC_GROUP_ONLY_CLIENT(grp
);
7042 if (gclient
== NULL
)
7043 gclient
= mac_get_grp_primary(grp
);
7044 mac_tx_client_quiesce((mac_client_handle_t
)gclient
);
7045 mac_tx_switch_group(gclient
, grp
, defgrp
);
7046 mac_tx_client_restart((mac_client_handle_t
)gclient
);
7049 * Give the candidate group with the specified number
7050 * of rings to this MAC client.
7052 ASSERT(grp
->mrg_state
== MAC_GROUP_STATE_REGISTERED
);
7053 rv
= mac_start_group(grp
);
7056 if (mip
->mi_tx_group_type
!= MAC_GROUP_TYPE_DYNAMIC
)
7059 ASSERT(grp
->mrg_cur_count
== 0);
7060 ASSERT(defgrp
->mrg_cur_count
> need_rings
);
7062 err
= i_mac_group_allocate_rings(mip
, MAC_RING_TYPE_TX
,
7063 defgrp
, grp
, share
, need_rings
);
7066 * For a share i_mac_group_allocate_rings gets
7067 * the rings from the driver, let's populate
7068 * the property for the client now.
7070 if (share
!= (uintptr_t)NULL
) {
7071 mac_client_set_rings(
7072 (mac_client_handle_t
)mcip
, -1,
7073 grp
->mrg_cur_count
);
7075 mip
->mi_tx_group_free
--;
7078 DTRACE_PROBE3(tx__group__reserve__alloc__rings
, char *,
7079 mip
->mi_name
, int, grp
->mrg_index
, int, err
);
7080 mac_stop_group(grp
);
7085 * We got an exclusive group, but it is not dynamic.
7087 if (mip
->mi_tx_group_type
!= MAC_GROUP_TYPE_DYNAMIC
) {
7088 mip
->mi_tx_group_free
--;
7092 rv
= i_mac_group_allocate_rings(mip
, MAC_RING_TYPE_TX
, defgrp
, grp
,
7095 DTRACE_PROBE3(tx__group__reserve__alloc__rings
,
7096 char *, mip
->mi_name
, int, grp
->mrg_index
, int, rv
);
7097 mac_stop_group(grp
);
7101 * For a share i_mac_group_allocate_rings gets the rings from the
7102 * driver, let's populate the property for the client now.
7104 if (share
!= (uintptr_t)NULL
) {
7105 mac_client_set_rings((mac_client_handle_t
)mcip
, -1,
7106 grp
->mrg_cur_count
);
7108 mip
->mi_tx_group_free
--;
7113 mac_release_tx_group(mac_client_impl_t
*mcip
, mac_group_t
*grp
)
7115 mac_impl_t
*mip
= mcip
->mci_mip
;
7116 mac_share_handle_t share
= mcip
->mci_share
;
7118 mac_soft_ring_set_t
*srs
= MCIP_TX_SRS(mcip
);
7119 mac_group_t
*defgrp
;
7121 defgrp
= MAC_DEFAULT_TX_GROUP(mip
);
7123 if (srs
->srs_soft_ring_count
> 0) {
7124 for (ring
= grp
->mrg_rings
; ring
!= NULL
;
7125 ring
= ring
->mr_next
) {
7126 ASSERT(mac_tx_srs_ring_present(srs
, ring
));
7127 mac_tx_invoke_callbacks(mcip
,
7129 mac_tx_srs_get_soft_ring(srs
, ring
));
7130 mac_tx_srs_del_ring(srs
, ring
);
7133 ASSERT(srs
->srs_tx
.st_arg2
!= NULL
);
7134 srs
->srs_tx
.st_arg2
= NULL
;
7135 mac_srs_stat_delete(srs
);
7138 if (share
!= (uintptr_t)NULL
)
7139 mip
->mi_share_capab
.ms_sremove(share
, grp
->mrg_driver
);
7141 /* move the ring back to the pool */
7142 if (mip
->mi_tx_group_type
== MAC_GROUP_TYPE_DYNAMIC
) {
7143 while ((ring
= grp
->mrg_rings
) != NULL
)
7144 (void) mac_group_mov_ring(mip
, defgrp
, ring
);
7146 mac_stop_group(grp
);
7147 mip
->mi_tx_group_free
++;
7151 * Disassociate a MAC client from a group, i.e go through the rings in the
7152 * group and delete all the soft rings tied to them.
7155 mac_tx_dismantle_soft_rings(mac_group_t
*fgrp
, flow_entry_t
*flent
)
7157 mac_client_impl_t
*mcip
= flent
->fe_mcip
;
7158 mac_soft_ring_set_t
*tx_srs
;
7162 tx_srs
= flent
->fe_tx_srs
;
7163 tx
= &tx_srs
->srs_tx
;
7165 /* Single ring case we haven't created any soft rings */
7166 if (tx
->st_mode
== SRS_TX_BW
|| tx
->st_mode
== SRS_TX_SERIALIZE
||
7167 tx
->st_mode
== SRS_TX_DEFAULT
) {
7169 mac_srs_stat_delete(tx_srs
);
7170 /* Fanout case, where we have to dismantle the soft rings */
7172 for (ring
= fgrp
->mrg_rings
; ring
!= NULL
;
7173 ring
= ring
->mr_next
) {
7174 ASSERT(mac_tx_srs_ring_present(tx_srs
, ring
));
7175 mac_tx_invoke_callbacks(mcip
,
7176 (mac_tx_cookie_t
)mac_tx_srs_get_soft_ring(tx_srs
,
7178 mac_tx_srs_del_ring(tx_srs
, ring
);
7180 ASSERT(tx
->st_arg2
== NULL
);
7185 * Switch the MAC client from one group to another. This means we need
7186 * to remove the MAC client, teardown the SRSs and revert the group state.
7187 * Then, we add the client to the destination roup, set the SRSs etc.
7190 mac_tx_switch_group(mac_client_impl_t
*mcip
, mac_group_t
*fgrp
,
7193 mac_client_impl_t
*group_only_mcip
;
7194 mac_impl_t
*mip
= mcip
->mci_mip
;
7195 flow_entry_t
*flent
= mcip
->mci_flent
;
7196 mac_group_t
*defgrp
;
7197 mac_grp_client_t
*mgcp
;
7198 mac_client_impl_t
*gmcip
;
7199 flow_entry_t
*gflent
;
7201 defgrp
= MAC_DEFAULT_TX_GROUP(mip
);
7202 ASSERT(fgrp
== flent
->fe_tx_ring_group
);
7204 if (fgrp
== defgrp
) {
7206 * If this is the primary we need to find any VLANs on
7207 * the primary and move them too.
7209 mac_group_remove_client(fgrp
, mcip
);
7210 mac_tx_dismantle_soft_rings(fgrp
, flent
);
7211 if (mcip
->mci_unicast
->ma_nusers
> 1) {
7212 mgcp
= fgrp
->mrg_clients
;
7213 while (mgcp
!= NULL
) {
7214 gmcip
= mgcp
->mgc_client
;
7215 mgcp
= mgcp
->mgc_next
;
7216 if (mcip
->mci_unicast
!= gmcip
->mci_unicast
)
7218 mac_tx_client_quiesce(
7219 (mac_client_handle_t
)gmcip
);
7221 gflent
= gmcip
->mci_flent
;
7222 mac_group_remove_client(fgrp
, gmcip
);
7223 mac_tx_dismantle_soft_rings(fgrp
, gflent
);
7225 mac_group_add_client(tgrp
, gmcip
);
7226 gflent
->fe_tx_ring_group
= tgrp
;
7227 /* We could directly set this to SHARED */
7228 tgrp
->mrg_state
= mac_group_next_state(tgrp
,
7229 &group_only_mcip
, defgrp
, B_FALSE
);
7231 mac_tx_srs_group_setup(gmcip
, gflent
,
7233 mac_fanout_setup(gmcip
, gflent
,
7234 MCIP_RESOURCE_PROPS(gmcip
), mac_rx_deliver
,
7237 mac_tx_client_restart(
7238 (mac_client_handle_t
)gmcip
);
7241 if (MAC_GROUP_NO_CLIENT(fgrp
)) {
7246 fgrp
->mrg_state
= MAC_GROUP_STATE_REGISTERED
;
7248 * Additionally, we also need to stop all
7249 * the rings in the default group, except
7250 * the default ring. The reason being
7251 * this group won't be released since it is
7252 * the default group, so the rings won't
7253 * be stopped otherwise.
7255 ringcnt
= fgrp
->mrg_cur_count
;
7256 ring
= fgrp
->mrg_rings
;
7257 for (cnt
= 0; cnt
< ringcnt
; cnt
++) {
7258 if (ring
->mr_state
== MR_INUSE
&&
7260 (mac_ring_t
*)mip
->mi_default_tx_ring
) {
7261 mac_stop_ring(ring
);
7264 ring
= ring
->mr_next
;
7266 } else if (MAC_GROUP_ONLY_CLIENT(fgrp
) != NULL
) {
7267 fgrp
->mrg_state
= MAC_GROUP_STATE_RESERVED
;
7269 ASSERT(fgrp
->mrg_state
== MAC_GROUP_STATE_SHARED
);
7273 * We could have VLANs sharing the non-default group with
7276 mgcp
= fgrp
->mrg_clients
;
7277 while (mgcp
!= NULL
) {
7278 gmcip
= mgcp
->mgc_client
;
7279 mgcp
= mgcp
->mgc_next
;
7282 mac_tx_client_quiesce((mac_client_handle_t
)gmcip
);
7283 gflent
= gmcip
->mci_flent
;
7285 mac_group_remove_client(fgrp
, gmcip
);
7286 mac_tx_dismantle_soft_rings(fgrp
, gflent
);
7288 mac_group_add_client(tgrp
, gmcip
);
7289 gflent
->fe_tx_ring_group
= tgrp
;
7290 /* We could directly set this to SHARED */
7291 tgrp
->mrg_state
= mac_group_next_state(tgrp
,
7292 &group_only_mcip
, defgrp
, B_FALSE
);
7293 mac_tx_srs_group_setup(gmcip
, gflent
, SRST_LINK
);
7294 mac_fanout_setup(gmcip
, gflent
,
7295 MCIP_RESOURCE_PROPS(gmcip
), mac_rx_deliver
,
7298 mac_tx_client_restart((mac_client_handle_t
)gmcip
);
7300 mac_group_remove_client(fgrp
, mcip
);
7301 mac_release_tx_group(mcip
, fgrp
);
7302 fgrp
->mrg_state
= MAC_GROUP_STATE_REGISTERED
;
7305 /* Add it to the tgroup */
7306 mac_group_add_client(tgrp
, mcip
);
7307 flent
->fe_tx_ring_group
= tgrp
;
7308 tgrp
->mrg_state
= mac_group_next_state(tgrp
, &group_only_mcip
,
7311 mac_tx_srs_group_setup(mcip
, flent
, SRST_LINK
);
7312 mac_fanout_setup(mcip
, flent
, MCIP_RESOURCE_PROPS(mcip
),
7313 mac_rx_deliver
, mcip
, NULL
, NULL
);
7317 * This is a 1-time control path activity initiated by the client (IP).
7318 * The mac perimeter protects against other simultaneous control activities,
7319 * for example an ioctl that attempts to change the degree of fanout and
7320 * increase or decrease the number of softrings associated with this Tx SRS.
7322 static mac_tx_notify_cb_t
*
7323 mac_client_tx_notify_add(mac_client_impl_t
*mcip
,
7324 mac_tx_notify_t notify
, void *arg
)
7326 mac_cb_info_t
*mcbi
;
7327 mac_tx_notify_cb_t
*mtnfp
;
7329 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mcip
->mci_mip
));
7331 mtnfp
= kmem_zalloc(sizeof (mac_tx_notify_cb_t
), KM_SLEEP
);
7332 mtnfp
->mtnf_fn
= notify
;
7333 mtnfp
->mtnf_arg
= arg
;
7334 mtnfp
->mtnf_link
.mcb_objp
= mtnfp
;
7335 mtnfp
->mtnf_link
.mcb_objsize
= sizeof (mac_tx_notify_cb_t
);
7336 mtnfp
->mtnf_link
.mcb_flags
= MCB_TX_NOTIFY_CB_T
;
7338 mcbi
= &mcip
->mci_tx_notify_cb_info
;
7339 mutex_enter(mcbi
->mcbi_lockp
);
7340 mac_callback_add(mcbi
, &mcip
->mci_tx_notify_cb_list
, &mtnfp
->mtnf_link
);
7341 mutex_exit(mcbi
->mcbi_lockp
);
7346 mac_client_tx_notify_remove(mac_client_impl_t
*mcip
, mac_tx_notify_cb_t
*mtnfp
)
7348 mac_cb_info_t
*mcbi
;
7351 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mcip
->mci_mip
));
7353 if (!mac_callback_find(&mcip
->mci_tx_notify_cb_info
,
7354 &mcip
->mci_tx_notify_cb_list
, &mtnfp
->mtnf_link
)) {
7356 "mac_client_tx_notify_remove: callback not "
7357 "found, mcip 0x%p mtnfp 0x%p", (void *)mcip
, (void *)mtnfp
);
7361 mcbi
= &mcip
->mci_tx_notify_cb_info
;
7362 cblist
= &mcip
->mci_tx_notify_cb_list
;
7363 mutex_enter(mcbi
->mcbi_lockp
);
7364 if (mac_callback_remove(mcbi
, cblist
, &mtnfp
->mtnf_link
))
7365 kmem_free(mtnfp
, sizeof (mac_tx_notify_cb_t
));
7367 mac_callback_remove_wait(&mcip
->mci_tx_notify_cb_info
);
7368 mutex_exit(mcbi
->mcbi_lockp
);
7372 * mac_client_tx_notify():
7373 * call to add and remove flow control callback routine.
7375 mac_tx_notify_handle_t
7376 mac_client_tx_notify(mac_client_handle_t mch
, mac_tx_notify_t callb_func
,
7379 mac_client_impl_t
*mcip
= (mac_client_impl_t
*)mch
;
7380 mac_tx_notify_cb_t
*mtnfp
= NULL
;
7382 i_mac_perim_enter(mcip
->mci_mip
);
7384 if (callb_func
!= NULL
) {
7385 /* Add a notify callback */
7386 mtnfp
= mac_client_tx_notify_add(mcip
, callb_func
, ptr
);
7388 mac_client_tx_notify_remove(mcip
, (mac_tx_notify_cb_t
*)ptr
);
7390 i_mac_perim_exit(mcip
->mci_mip
);
7392 return ((mac_tx_notify_handle_t
)mtnfp
);
7396 mac_bridge_vectors(mac_bridge_tx_t txf
, mac_bridge_rx_t rxf
,
7397 mac_bridge_ref_t reff
, mac_bridge_ls_t lsf
)
7399 mac_bridge_tx_cb
= txf
;
7400 mac_bridge_rx_cb
= rxf
;
7401 mac_bridge_ref_cb
= reff
;
7402 mac_bridge_ls_cb
= lsf
;
7406 mac_bridge_set(mac_handle_t mh
, mac_handle_t link
)
7408 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
7411 mutex_enter(&mip
->mi_bridge_lock
);
7412 if (mip
->mi_bridge_link
== NULL
) {
7413 mip
->mi_bridge_link
= link
;
7418 mutex_exit(&mip
->mi_bridge_lock
);
7420 mac_poll_state_change(mh
, B_FALSE
);
7421 mac_capab_update(mh
);
7427 * Disable bridging on the indicated link.
7430 mac_bridge_clear(mac_handle_t mh
, mac_handle_t link
)
7432 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
7434 mutex_enter(&mip
->mi_bridge_lock
);
7435 ASSERT(mip
->mi_bridge_link
== link
);
7436 mip
->mi_bridge_link
= NULL
;
7437 mutex_exit(&mip
->mi_bridge_lock
);
7438 mac_poll_state_change(mh
, B_TRUE
);
7439 mac_capab_update(mh
);
7443 mac_no_active(mac_handle_t mh
)
7445 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
7447 i_mac_perim_enter(mip
);
7448 mip
->mi_state_flags
|= MIS_NO_ACTIVE
;
7449 i_mac_perim_exit(mip
);
7453 * Walk the primary VLAN clients whenever the primary's rings property
7454 * changes and update the mac_resource_props_t for the VLAN's client.
7455 * We need to do this since we don't support setting these properties
7456 * on the primary's VLAN clients, but the VLAN clients have to
7457 * follow the primary w.r.t the rings property;
7460 mac_set_prim_vlan_rings(mac_impl_t
*mip
, mac_resource_props_t
*mrp
)
7462 mac_client_impl_t
*vmcip
;
7463 mac_resource_props_t
*vmrp
;
7465 for (vmcip
= mip
->mi_clients_list
; vmcip
!= NULL
;
7466 vmcip
= vmcip
->mci_client_next
) {
7467 if (!(vmcip
->mci_flent
->fe_type
& FLOW_PRIMARY_MAC
) ||
7468 mac_client_vid((mac_client_handle_t
)vmcip
) ==
7472 vmrp
= MCIP_RESOURCE_PROPS(vmcip
);
7474 vmrp
->mrp_nrxrings
= mrp
->mrp_nrxrings
;
7475 if (mrp
->mrp_mask
& MRP_RX_RINGS
)
7476 vmrp
->mrp_mask
|= MRP_RX_RINGS
;
7477 else if (vmrp
->mrp_mask
& MRP_RX_RINGS
)
7478 vmrp
->mrp_mask
&= ~MRP_RX_RINGS
;
7480 vmrp
->mrp_ntxrings
= mrp
->mrp_ntxrings
;
7481 if (mrp
->mrp_mask
& MRP_TX_RINGS
)
7482 vmrp
->mrp_mask
|= MRP_TX_RINGS
;
7483 else if (vmrp
->mrp_mask
& MRP_TX_RINGS
)
7484 vmrp
->mrp_mask
&= ~MRP_TX_RINGS
;
7486 if (mrp
->mrp_mask
& MRP_RXRINGS_UNSPEC
)
7487 vmrp
->mrp_mask
|= MRP_RXRINGS_UNSPEC
;
7489 vmrp
->mrp_mask
&= ~MRP_RXRINGS_UNSPEC
;
7491 if (mrp
->mrp_mask
& MRP_TXRINGS_UNSPEC
)
7492 vmrp
->mrp_mask
|= MRP_TXRINGS_UNSPEC
;
7494 vmrp
->mrp_mask
&= ~MRP_TXRINGS_UNSPEC
;
7499 * We are adding or removing ring(s) from a group. The source for taking
7500 * rings is the default group. The destination for giving rings back is
7501 * the default group.
7504 mac_group_ring_modify(mac_client_impl_t
*mcip
, mac_group_t
*group
,
7505 mac_group_t
*defgrp
)
7507 mac_resource_props_t
*mrp
= MCIP_RESOURCE_PROPS(mcip
);
7512 mac_impl_t
*mip
= mcip
->mci_mip
;
7516 boolean_t rx_group
= group
->mrg_type
== MAC_RING_TYPE_RX
;
7524 * If we are asked for just a group, we give 1 ring, else
7525 * the specified number of rings.
7528 ringcnt
= (mrp
->mrp_mask
& MRP_RXRINGS_UNSPEC
) ? 1:
7531 ringcnt
= (mrp
->mrp_mask
& MRP_TXRINGS_UNSPEC
) ? 1:
7535 /* don't allow modifying rings for a share for now. */
7536 ASSERT(mcip
->mci_share
== (uintptr_t)NULL
);
7538 if (ringcnt
== group
->mrg_cur_count
)
7541 if (group
->mrg_cur_count
> ringcnt
) {
7542 modify
= group
->mrg_cur_count
- ringcnt
;
7544 if (mip
->mi_rx_donor_grp
== group
) {
7545 ASSERT(mac_is_primary_client(mcip
));
7546 mip
->mi_rx_donor_grp
= defgrp
;
7548 defgrp
= mip
->mi_rx_donor_grp
;
7551 ring
= group
->mrg_rings
;
7552 rings
= kmem_alloc(modify
* sizeof (mac_ring_handle_t
),
7555 for (count
= 0; count
< modify
; count
++) {
7556 next
= ring
->mr_next
;
7557 rv
= mac_group_mov_ring(mip
, defgrp
, ring
);
7559 /* cleanup on failure */
7560 for (j
= 0; j
< count
; j
++) {
7561 (void) mac_group_mov_ring(mip
, group
,
7569 kmem_free(rings
, modify
* sizeof (mac_ring_handle_t
));
7572 if (ringcnt
>= MAX_RINGS_PER_GROUP
)
7575 modify
= ringcnt
- group
->mrg_cur_count
;
7578 if (group
!= mip
->mi_rx_donor_grp
)
7579 defgrp
= mip
->mi_rx_donor_grp
;
7582 * This is the donor group with all the remaining
7583 * rings. Default group now gets to be the donor
7585 mip
->mi_rx_donor_grp
= defgrp
;
7587 end
= mip
->mi_rx_group_count
;
7590 end
= mip
->mi_tx_group_count
- 1;
7593 * If the default doesn't have any rings, lets see if we can
7594 * take rings given to an h/w client that doesn't need it.
7595 * For now, we just see if there is any one client that can donate
7596 * all the required rings.
7598 if (defgrp
->mrg_cur_count
< (modify
+ 1)) {
7599 for (i
= start
; i
< end
; i
++) {
7601 tgrp
= &mip
->mi_rx_groups
[i
];
7602 if (tgrp
== group
|| tgrp
->mrg_state
<
7603 MAC_GROUP_STATE_RESERVED
) {
7606 mcip
= MAC_GROUP_ONLY_CLIENT(tgrp
);
7608 mcip
= mac_get_grp_primary(tgrp
);
7609 ASSERT(mcip
!= NULL
);
7610 mrp
= MCIP_RESOURCE_PROPS(mcip
);
7611 if ((mrp
->mrp_mask
& MRP_RX_RINGS
) != 0)
7613 if ((tgrp
->mrg_cur_count
+
7614 defgrp
->mrg_cur_count
) < (modify
+ 1)) {
7617 if (mac_rx_switch_group(mcip
, tgrp
,
7622 tgrp
= &mip
->mi_tx_groups
[i
];
7623 if (tgrp
== group
|| tgrp
->mrg_state
<
7624 MAC_GROUP_STATE_RESERVED
) {
7627 mcip
= MAC_GROUP_ONLY_CLIENT(tgrp
);
7629 mcip
= mac_get_grp_primary(tgrp
);
7630 mrp
= MCIP_RESOURCE_PROPS(mcip
);
7631 if ((mrp
->mrp_mask
& MRP_TX_RINGS
) != 0)
7633 if ((tgrp
->mrg_cur_count
+
7634 defgrp
->mrg_cur_count
) < (modify
+ 1)) {
7637 /* OK, we can switch this to s/w */
7638 mac_tx_client_quiesce(
7639 (mac_client_handle_t
)mcip
);
7640 mac_tx_switch_group(mcip
, tgrp
, defgrp
);
7641 mac_tx_client_restart(
7642 (mac_client_handle_t
)mcip
);
7645 if (defgrp
->mrg_cur_count
< (modify
+ 1))
7648 if ((rv
= i_mac_group_allocate_rings(mip
, group
->mrg_type
, defgrp
,
7649 group
, mcip
->mci_share
, modify
)) != 0) {
7656 * Given the poolname in mac_resource_props, find the cpupart
7657 * that is associated with this pool. The cpupart will be used
7658 * later for finding the cpus to be bound to the networking threads.
7660 * use_default is set B_TRUE if pools are enabled and pool_default
7661 * is returned. This avoids a 2nd lookup to set the poolname
7662 * for pool-effective.
7666 * NULL - pools are disabled or if the 'cpus' property is set.
7667 * cpupart of pool_default - pools are enabled and the pool
7668 * is not available or poolname is blank
7669 * cpupart of named pool - pools are enabled and the pool
7673 mac_pset_find(mac_resource_props_t
*mrp
, boolean_t
*use_default
)
7678 *use_default
= B_FALSE
;
7680 /* CPUs property is set */
7681 if (mrp
->mrp_mask
& MRP_CPUS
)
7684 ASSERT(pool_lock_held());
7686 /* Pools are disabled, no pset */
7687 if (pool_state
== POOL_DISABLED
)
7690 /* Pools property is set */
7691 if (mrp
->mrp_mask
& MRP_POOL
) {
7692 if ((pool
= pool_lookup_pool_by_name(mrp
->mrp_pool
)) == NULL
) {
7693 /* Pool not found */
7694 DTRACE_PROBE1(mac_pset_find_no_pool
, char *,
7696 *use_default
= B_TRUE
;
7697 pool
= pool_default
;
7699 /* Pools property is not set */
7701 *use_default
= B_TRUE
;
7702 pool
= pool_default
;
7705 /* Find the CPU pset that corresponds to the pool */
7706 mutex_enter(&cpu_lock
);
7707 if ((cpupart
= cpupart_find(pool
->pool_pset
->pset_id
)) == NULL
) {
7708 DTRACE_PROBE1(mac_find_pset_no_pset
, psetid_t
,
7709 pool
->pool_pset
->pset_id
);
7711 mutex_exit(&cpu_lock
);
7717 mac_set_pool_effective(boolean_t use_default
, cpupart_t
*cpupart
,
7718 mac_resource_props_t
*mrp
, mac_resource_props_t
*emrp
)
7720 ASSERT(pool_lock_held());
7722 if (cpupart
!= NULL
) {
7723 emrp
->mrp_mask
|= MRP_POOL
;
7725 (void) strcpy(emrp
->mrp_pool
,
7728 ASSERT(strlen(mrp
->mrp_pool
) != 0);
7729 (void) strcpy(emrp
->mrp_pool
,
7733 emrp
->mrp_mask
&= ~MRP_POOL
;
7734 bzero(emrp
->mrp_pool
, MAXPATHLEN
);
7738 struct mac_pool_arg
{
7739 char mpa_poolname
[MAXPATHLEN
];
7740 pool_event_t mpa_what
;
7745 mac_pool_link_update(mod_hash_key_t key
, mod_hash_val_t
*val
, void *arg
)
7747 struct mac_pool_arg
*mpa
= arg
;
7748 mac_impl_t
*mip
= (mac_impl_t
*)val
;
7749 mac_client_impl_t
*mcip
;
7750 mac_resource_props_t
*mrp
, *emrp
;
7751 boolean_t pool_update
= B_FALSE
;
7752 boolean_t pool_clear
= B_FALSE
;
7753 boolean_t use_default
= B_FALSE
;
7754 cpupart_t
*cpupart
= NULL
;
7756 mrp
= kmem_zalloc(sizeof (*mrp
), KM_SLEEP
);
7757 i_mac_perim_enter(mip
);
7758 for (mcip
= mip
->mi_clients_list
; mcip
!= NULL
;
7759 mcip
= mcip
->mci_client_next
) {
7760 pool_update
= B_FALSE
;
7761 pool_clear
= B_FALSE
;
7762 use_default
= B_FALSE
;
7763 mac_client_get_resources((mac_client_handle_t
)mcip
, mrp
);
7764 emrp
= MCIP_EFFECTIVE_PROPS(mcip
);
7767 * When pools are enabled
7769 if ((mpa
->mpa_what
== POOL_E_ENABLE
) &&
7770 ((mrp
->mrp_mask
& MRP_CPUS
) == 0)) {
7771 mrp
->mrp_mask
|= MRP_POOL
;
7772 pool_update
= B_TRUE
;
7776 * When pools are disabled
7778 if ((mpa
->mpa_what
== POOL_E_DISABLE
) &&
7779 ((mrp
->mrp_mask
& MRP_CPUS
) == 0)) {
7780 mrp
->mrp_mask
|= MRP_POOL
;
7781 pool_clear
= B_TRUE
;
7785 * Look for links with the pool property set and the poolname
7786 * matching the one which is changing.
7788 if (strcmp(mrp
->mrp_pool
, mpa
->mpa_poolname
) == 0) {
7790 * The pool associated with the link has changed.
7792 if (mpa
->mpa_what
== POOL_E_CHANGE
) {
7793 mrp
->mrp_mask
|= MRP_POOL
;
7794 pool_update
= B_TRUE
;
7799 * This link is associated with pool_default and
7800 * pool_default has changed.
7802 if ((mpa
->mpa_what
== POOL_E_CHANGE
) &&
7803 (strcmp(emrp
->mrp_pool
, "pool_default") == 0) &&
7804 (strcmp(mpa
->mpa_poolname
, "pool_default") == 0)) {
7805 mrp
->mrp_mask
|= MRP_POOL
;
7806 pool_update
= B_TRUE
;
7810 * Get new list of cpus for the pool, bind network
7811 * threads to new list of cpus and update resources.
7814 if (MCIP_DATAPATH_SETUP(mcip
)) {
7816 cpupart
= mac_pset_find(mrp
, &use_default
);
7817 mac_fanout_setup(mcip
, mcip
->mci_flent
, mrp
,
7818 mac_rx_deliver
, mcip
, NULL
, cpupart
);
7819 mac_set_pool_effective(use_default
, cpupart
,
7823 mac_update_resources(mrp
, MCIP_RESOURCE_PROPS(mcip
),
7828 * Clear the effective pool and bind network threads
7829 * to any available CPU.
7832 if (MCIP_DATAPATH_SETUP(mcip
)) {
7833 emrp
->mrp_mask
&= ~MRP_POOL
;
7834 bzero(emrp
->mrp_pool
, MAXPATHLEN
);
7835 mac_fanout_setup(mcip
, mcip
->mci_flent
, mrp
,
7836 mac_rx_deliver
, mcip
, NULL
, NULL
);
7838 mac_update_resources(mrp
, MCIP_RESOURCE_PROPS(mcip
),
7842 i_mac_perim_exit(mip
);
7843 kmem_free(mrp
, sizeof (*mrp
));
7844 return (MH_WALK_CONTINUE
);
7848 mac_pool_update(void *arg
)
7850 mod_hash_walk(i_mac_impl_hash
, mac_pool_link_update
, arg
);
7851 kmem_free(arg
, sizeof (struct mac_pool_arg
));
7855 * Callback function to be executed when a noteworthy pool event
7860 mac_pool_event_cb(pool_event_t what
, poolid_t id
, void *arg
)
7863 char *poolname
= NULL
;
7864 struct mac_pool_arg
*mpa
;
7867 mpa
= kmem_zalloc(sizeof (struct mac_pool_arg
), KM_SLEEP
);
7871 case POOL_E_DISABLE
:
7875 pool
= pool_lookup_pool_by_id(id
);
7877 kmem_free(mpa
, sizeof (struct mac_pool_arg
));
7881 pool_get_name(pool
, &poolname
);
7882 (void) strlcpy(mpa
->mpa_poolname
, poolname
,
7883 sizeof (mpa
->mpa_poolname
));
7887 kmem_free(mpa
, sizeof (struct mac_pool_arg
));
7893 mpa
->mpa_what
= what
;
7895 mac_pool_update(mpa
);
7899 * Set effective rings property. This could be called from datapath_setup/
7900 * datapath_teardown or set-linkprop.
7901 * If the group is reserved we just go ahead and set the effective rings.
7902 * Additionally, for TX this could mean the default group has lost/gained
7903 * some rings, so if the default group is reserved, we need to adjust the
7904 * effective rings for the default group clients. For RX, if we are working
7905 * with the non-default group, we just need * to reset the effective props
7906 * for the default group clients.
7909 mac_set_rings_effective(mac_client_impl_t
*mcip
)
7911 mac_impl_t
*mip
= mcip
->mci_mip
;
7913 mac_group_t
*defgrp
;
7914 flow_entry_t
*flent
= mcip
->mci_flent
;
7915 mac_resource_props_t
*emrp
= MCIP_EFFECTIVE_PROPS(mcip
);
7916 mac_grp_client_t
*mgcp
;
7917 mac_client_impl_t
*gmcip
;
7919 grp
= flent
->fe_rx_ring_group
;
7921 defgrp
= MAC_DEFAULT_RX_GROUP(mip
);
7923 * If we have reserved a group, set the effective rings
7924 * to the ring count in the group.
7926 if (grp
->mrg_state
== MAC_GROUP_STATE_RESERVED
) {
7927 emrp
->mrp_mask
|= MRP_RX_RINGS
;
7928 emrp
->mrp_nrxrings
= grp
->mrg_cur_count
;
7932 * We go through the clients in the shared group and
7933 * reset the effective properties. It is possible this
7934 * might have already been done for some client (i.e.
7935 * if some client is being moved to a group that is
7936 * already shared). The case where the default group is
7937 * RESERVED is taken care of above (note in the RX side if
7938 * there is a non-default group, the default group is always
7941 if (grp
!= defgrp
|| grp
->mrg_state
== MAC_GROUP_STATE_SHARED
) {
7942 if (grp
->mrg_state
== MAC_GROUP_STATE_SHARED
)
7943 mgcp
= grp
->mrg_clients
;
7945 mgcp
= defgrp
->mrg_clients
;
7946 while (mgcp
!= NULL
) {
7947 gmcip
= mgcp
->mgc_client
;
7948 emrp
= MCIP_EFFECTIVE_PROPS(gmcip
);
7949 if (emrp
->mrp_mask
& MRP_RX_RINGS
) {
7950 emrp
->mrp_mask
&= ~MRP_RX_RINGS
;
7951 emrp
->mrp_nrxrings
= 0;
7953 mgcp
= mgcp
->mgc_next
;
7958 /* Now the TX side */
7959 grp
= flent
->fe_tx_ring_group
;
7961 defgrp
= MAC_DEFAULT_TX_GROUP(mip
);
7963 if (grp
->mrg_state
== MAC_GROUP_STATE_RESERVED
) {
7964 emrp
->mrp_mask
|= MRP_TX_RINGS
;
7965 emrp
->mrp_ntxrings
= grp
->mrg_cur_count
;
7966 } else if (grp
->mrg_state
== MAC_GROUP_STATE_SHARED
) {
7967 mgcp
= grp
->mrg_clients
;
7968 while (mgcp
!= NULL
) {
7969 gmcip
= mgcp
->mgc_client
;
7970 emrp
= MCIP_EFFECTIVE_PROPS(gmcip
);
7971 if (emrp
->mrp_mask
& MRP_TX_RINGS
) {
7972 emrp
->mrp_mask
&= ~MRP_TX_RINGS
;
7973 emrp
->mrp_ntxrings
= 0;
7975 mgcp
= mgcp
->mgc_next
;
7980 * If the group is not the default group and the default
7981 * group is reserved, the ring count in the default group
7982 * might have changed, update it.
7984 if (grp
!= defgrp
&&
7985 defgrp
->mrg_state
== MAC_GROUP_STATE_RESERVED
) {
7986 gmcip
= MAC_GROUP_ONLY_CLIENT(defgrp
);
7987 emrp
= MCIP_EFFECTIVE_PROPS(gmcip
);
7988 emrp
->mrp_ntxrings
= defgrp
->mrg_cur_count
;
7991 emrp
= MCIP_EFFECTIVE_PROPS(mcip
);
7995 * Check if the primary is in the default group. If so, see if we
7996 * can give it a an exclusive group now that another client is
7997 * being configured. We take the primary out of the default group
7998 * because the multicast/broadcast packets for the all the clients
7999 * will land in the default ring in the default group which means
8000 * any client in the default group, even if it is the only on in
8001 * the group, will lose exclusive access to the rings, hence
8005 mac_check_primary_relocation(mac_client_impl_t
*mcip
, boolean_t rxhw
)
8007 mac_impl_t
*mip
= mcip
->mci_mip
;
8008 mac_group_t
*defgrp
= MAC_DEFAULT_RX_GROUP(mip
);
8009 flow_entry_t
*flent
= mcip
->mci_flent
;
8010 mac_resource_props_t
*mrp
= MCIP_RESOURCE_PROPS(mcip
);
8015 * Check if the primary is in the default group, if not
8016 * or if it is explicitly configured to be in the default
8017 * group OR set the RX rings property, return.
8019 if (flent
->fe_rx_ring_group
!= defgrp
|| mrp
->mrp_mask
& MRP_RX_RINGS
)
8023 * If the new client needs an exclusive group and we
8024 * don't have another for the primary, return.
8026 if (rxhw
&& mip
->mi_rxhwclnt_avail
< 2)
8029 mac_addr
= flent
->fe_flow_desc
.fd_dst_mac
;
8031 * We call this when we are setting up the datapath for
8032 * the first non-primary.
8034 ASSERT(mip
->mi_nactiveclients
== 2);
8036 * OK, now we have the primary that needs to be relocated.
8038 ngrp
= mac_reserve_rx_group(mcip
, mac_addr
, B_TRUE
);
8041 if (mac_rx_switch_group(mcip
, defgrp
, ngrp
) != 0) {
8042 mac_stop_group(ngrp
);
8049 mac_transceiver_init(mac_impl_t
*mip
)
8051 if (mac_capab_get((mac_handle_t
)mip
, MAC_CAPAB_TRANSCEIVER
,
8052 &mip
->mi_transceiver
)) {
8054 * The driver set a flag that we don't know about. In this case,
8055 * we need to warn about that case and ignore this capability.
8057 if (mip
->mi_transceiver
.mct_flags
!= 0) {
8058 dev_err(mip
->mi_dip
, CE_WARN
, "driver set transceiver "
8059 "flags to invalid value: 0x%x, ignoring "
8060 "capability", mip
->mi_transceiver
.mct_flags
);
8061 bzero(&mip
->mi_transceiver
,
8062 sizeof (mac_capab_transceiver_t
));
8065 bzero(&mip
->mi_transceiver
,
8066 sizeof (mac_capab_transceiver_t
));
8071 mac_transceiver_count(mac_handle_t mh
, uint_t
*countp
)
8073 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
8075 ASSERT(MAC_PERIM_HELD(mh
));
8077 if (mip
->mi_transceiver
.mct_ntransceivers
== 0)
8080 *countp
= mip
->mi_transceiver
.mct_ntransceivers
;
8085 mac_transceiver_info(mac_handle_t mh
, uint_t tranid
, boolean_t
*present
,
8089 mac_transceiver_info_t info
;
8091 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
8093 ASSERT(MAC_PERIM_HELD(mh
));
8095 if (mip
->mi_transceiver
.mct_info
== NULL
||
8096 mip
->mi_transceiver
.mct_ntransceivers
== 0)
8099 if (tranid
>= mip
->mi_transceiver
.mct_ntransceivers
)
8102 bzero(&info
, sizeof (mac_transceiver_info_t
));
8103 if ((ret
= mip
->mi_transceiver
.mct_info(mip
->mi_driver
, tranid
,
8108 *present
= info
.mti_present
;
8109 *usable
= info
.mti_usable
;
8114 mac_transceiver_read(mac_handle_t mh
, uint_t tranid
, uint_t page
, void *buf
,
8115 size_t nbytes
, off_t offset
, size_t *nread
)
8119 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
8121 ASSERT(MAC_PERIM_HELD(mh
));
8123 if (mip
->mi_transceiver
.mct_read
== NULL
)
8126 if (tranid
>= mip
->mi_transceiver
.mct_ntransceivers
)
8130 * All supported pages today are 256 bytes wide. Make sure offset +
8131 * nbytes never exceeds that.
8133 if (offset
< 0 || offset
>= 256 || nbytes
> 256 ||
8134 offset
+ nbytes
> 256)
8139 ret
= mip
->mi_transceiver
.mct_read(mip
->mi_driver
, tranid
, page
, buf
,
8140 nbytes
, offset
, nread
);
8141 if (ret
== 0 && *nread
> nbytes
) {
8142 dev_err(mip
->mi_dip
, CE_PANIC
, "driver wrote %lu bytes into "
8143 "%lu byte sized buffer, possible memory corruption",
8151 mac_led_init(mac_impl_t
*mip
)
8153 mip
->mi_led_modes
= MAC_LED_DEFAULT
;
8155 if (!mac_capab_get((mac_handle_t
)mip
, MAC_CAPAB_LED
, &mip
->mi_led
)) {
8156 bzero(&mip
->mi_led
, sizeof (mac_capab_led_t
));
8160 if (mip
->mi_led
.mcl_flags
!= 0) {
8161 dev_err(mip
->mi_dip
, CE_WARN
, "driver set led capability "
8162 "flags to invalid value: 0x%x, ignoring "
8163 "capability", mip
->mi_transceiver
.mct_flags
);
8164 bzero(&mip
->mi_led
, sizeof (mac_capab_led_t
));
8168 if ((mip
->mi_led
.mcl_modes
& ~MAC_LED_ALL
) != 0) {
8169 dev_err(mip
->mi_dip
, CE_WARN
, "driver set led capability "
8170 "supported modes to invalid value: 0x%x, ignoring "
8171 "capability", mip
->mi_transceiver
.mct_flags
);
8172 bzero(&mip
->mi_led
, sizeof (mac_capab_led_t
));
8178 mac_led_get(mac_handle_t mh
, mac_led_mode_t
*supported
, mac_led_mode_t
*active
)
8180 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
8182 ASSERT(MAC_PERIM_HELD(mh
));
8184 if (mip
->mi_led
.mcl_set
== NULL
)
8187 *supported
= mip
->mi_led
.mcl_modes
;
8188 *active
= mip
->mi_led_modes
;
8194 * Update and multiplex the various LED requests. We only ever send one LED to
8195 * the underlying driver at a time. As such, we end up multiplexing all
8196 * requested states and picking one to send down to the driver.
8199 mac_led_set(mac_handle_t mh
, mac_led_mode_t desired
)
8202 mac_led_mode_t driver
;
8204 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
8206 ASSERT(MAC_PERIM_HELD(mh
));
8209 * If we've been passed a desired value of zero, that indicates that
8210 * we're basically resetting to the value of zero, which is our default
8214 desired
= MAC_LED_DEFAULT
;
8216 if (mip
->mi_led
.mcl_set
== NULL
)
8220 * Catch both values that we don't know about and those that the driver
8223 if ((desired
& ~MAC_LED_ALL
) != 0)
8226 if ((desired
& ~mip
->mi_led
.mcl_modes
) != 0)
8230 * If we have the same value, then there is nothing to do.
8232 if (desired
== mip
->mi_led_modes
)
8236 * Based on the desired value, determine what to send to the driver. We
8237 * only will send a single bit to the driver at any given time. IDENT
8238 * takes priority over OFF or ON. We also let OFF take priority over the
8241 if (desired
& MAC_LED_IDENT
) {
8242 driver
= MAC_LED_IDENT
;
8243 } else if (desired
& MAC_LED_OFF
) {
8244 driver
= MAC_LED_OFF
;
8245 } else if (desired
& MAC_LED_ON
) {
8246 driver
= MAC_LED_ON
;
8248 driver
= MAC_LED_DEFAULT
;
8251 if ((ret
= mip
->mi_led
.mcl_set(mip
->mi_driver
, driver
, 0)) == 0) {
8252 mip
->mi_led_modes
= desired
;