4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2015 Joyent, Inc.
27 * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
29 * An instance of the structure aggr_grp_t is allocated for each
30 * link aggregation group. When created, aggr_grp_t objects are
31 * entered into the aggr_grp_hash hash table maintained by the modhash
32 * module. The hash key is the linkid associated with the link
35 * A set of MAC ports are associated with each association group.
37 * Aggr pseudo TX rings
38 * --------------------
39 * The underlying ports (NICs) in an aggregation can have TX rings. To
40 * enhance aggr's performance, these TX rings are made available to the
41 * aggr layer as pseudo TX rings. The concept of pseudo rings are not new.
42 * They are already present and implemented on the RX side. It is called
43 * as pseudo RX rings. The same concept is extended to the TX side where
44 * each TX ring of an underlying port is reflected in aggr as a pseudo
45 * TX ring. Thus each pseudo TX ring will map to a specific hardware TX
46 * ring. Even in the case of a NIC that does not have a TX ring, a pseudo
47 * TX ring is given to the aggregation layer.
49 * With this change, the outgoing stack depth looks much better:
51 * mac_tx() -> mac_tx_aggr_mode() -> mac_tx_soft_ring_process() ->
52 * mac_tx_send() -> aggr_ring_rx() -> <driver>_ring_tx()
54 * Two new modes are introduced to mac_tx() to handle aggr pseudo TX rings:
55 * SRS_TX_AGGR and SRS_TX_BW_AGGR.
57 * In SRS_TX_AGGR mode, mac_tx_aggr_mode() routine is called. This routine
58 * invokes an aggr function, aggr_find_tx_ring(), to find a (pseudo) TX
59 * ring belonging to a port on which the packet has to be sent.
60 * aggr_find_tx_ring() first finds the outgoing port based on L2/L3/L4
61 * policy and then uses the fanout_hint passed to it to pick a TX ring from
64 * In SRS_TX_BW_AGGR mode, mac_tx_bw_mode() function is called where
65 * bandwidth limit is applied first on the outgoing packet and the packets
66 * allowed to go out would call mac_tx_aggr_mode() to send the packet on a
70 #include <sys/types.h>
71 #include <sys/sysmacros.h>
73 #include <sys/cmn_err.h>
76 #include <sys/ksynch.h>
78 #include <sys/stream.h>
79 #include <sys/modctl.h>
81 #include <sys/sunddi.h>
82 #include <sys/atomic.h>
84 #include <sys/modhash.h>
85 #include <sys/id_space.h>
86 #include <sys/strsun.h>
90 #include <sys/mac_provider.h>
94 #include <sys/aggr_impl.h>
96 static int aggr_m_start(void *);
97 static void aggr_m_stop(void *);
98 static int aggr_m_promisc(void *, boolean_t
);
99 static int aggr_m_multicst(void *, boolean_t
, const uint8_t *);
100 static int aggr_m_unicst(void *, const uint8_t *);
101 static int aggr_m_stat(void *, uint_t
, uint64_t *);
102 static void aggr_m_ioctl(void *, queue_t
*, mblk_t
*);
103 static boolean_t
aggr_m_capab_get(void *, mac_capab_t
, void *);
104 static int aggr_m_setprop(void *, const char *, mac_prop_id_t
, uint_t
,
106 static void aggr_m_propinfo(void *, const char *, mac_prop_id_t
,
107 mac_prop_info_handle_t
);
109 static aggr_port_t
*aggr_grp_port_lookup(aggr_grp_t
*, datalink_id_t
);
110 static int aggr_grp_rem_port(aggr_grp_t
*, aggr_port_t
*, boolean_t
*,
113 static void aggr_grp_capab_set(aggr_grp_t
*);
114 static boolean_t
aggr_grp_capab_check(aggr_grp_t
*, aggr_port_t
*);
115 static uint_t
aggr_grp_max_sdu(aggr_grp_t
*);
116 static uint32_t aggr_grp_max_margin(aggr_grp_t
*);
117 static boolean_t
aggr_grp_sdu_check(aggr_grp_t
*, aggr_port_t
*);
118 static boolean_t
aggr_grp_margin_check(aggr_grp_t
*, aggr_port_t
*);
120 static int aggr_add_pseudo_rx_group(aggr_port_t
*, aggr_pseudo_rx_group_t
*);
121 static void aggr_rem_pseudo_rx_group(aggr_port_t
*, aggr_pseudo_rx_group_t
*);
122 static int aggr_pseudo_disable_intr(mac_intr_handle_t
);
123 static int aggr_pseudo_enable_intr(mac_intr_handle_t
);
124 static int aggr_pseudo_start_ring(mac_ring_driver_t
, uint64_t);
125 static void aggr_pseudo_stop_ring(mac_ring_driver_t
);
126 static int aggr_addmac(void *, const uint8_t *);
127 static int aggr_remmac(void *, const uint8_t *);
128 static mblk_t
*aggr_rx_poll(void *, int);
129 static void aggr_fill_ring(void *, mac_ring_type_t
, const int,
130 const int, mac_ring_info_t
*, mac_ring_handle_t
);
131 static void aggr_fill_group(void *, mac_ring_type_t
, const int,
132 mac_group_info_t
*, mac_group_handle_t
);
134 static kmem_cache_t
*aggr_grp_cache
;
135 static mod_hash_t
*aggr_grp_hash
;
136 static krwlock_t aggr_grp_lock
;
137 static uint_t aggr_grp_cnt
;
138 static id_space_t
*key_ids
;
140 #define GRP_HASHSZ 64
141 #define GRP_HASH_KEY(linkid) ((mod_hash_key_t)(uintptr_t)linkid)
142 #define AGGR_PORT_NAME_DELIMIT '-'
144 static uchar_t aggr_zero_mac
[] = {0, 0, 0, 0, 0, 0};
146 #define AGGR_M_CALLBACK_FLAGS \
147 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO)
149 static mac_callbacks_t aggr_m_callbacks
= {
150 AGGR_M_CALLBACK_FLAGS
,
170 aggr_grp_constructor(void *buf
, void *arg
, int kmflag
)
172 aggr_grp_t
*grp
= buf
;
174 bzero(grp
, sizeof (*grp
));
175 mutex_init(&grp
->lg_lacp_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
176 cv_init(&grp
->lg_lacp_cv
, NULL
, CV_DEFAULT
, NULL
);
177 rw_init(&grp
->lg_tx_lock
, NULL
, RW_DRIVER
, NULL
);
178 mutex_init(&grp
->lg_port_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
179 cv_init(&grp
->lg_port_cv
, NULL
, CV_DEFAULT
, NULL
);
180 mutex_init(&grp
->lg_tx_flowctl_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
181 cv_init(&grp
->lg_tx_flowctl_cv
, NULL
, CV_DEFAULT
, NULL
);
182 grp
->lg_link_state
= LINK_STATE_UNKNOWN
;
188 aggr_grp_destructor(void *buf
, void *arg
)
190 aggr_grp_t
*grp
= buf
;
192 if (grp
->lg_tx_ports
!= NULL
) {
193 kmem_free(grp
->lg_tx_ports
,
194 grp
->lg_tx_ports_size
* sizeof (aggr_port_t
*));
197 mutex_destroy(&grp
->lg_lacp_lock
);
198 cv_destroy(&grp
->lg_lacp_cv
);
199 mutex_destroy(&grp
->lg_port_lock
);
200 cv_destroy(&grp
->lg_port_cv
);
201 rw_destroy(&grp
->lg_tx_lock
);
202 mutex_destroy(&grp
->lg_tx_flowctl_lock
);
203 cv_destroy(&grp
->lg_tx_flowctl_cv
);
209 aggr_grp_cache
= kmem_cache_create("aggr_grp_cache",
210 sizeof (aggr_grp_t
), 0, aggr_grp_constructor
,
211 aggr_grp_destructor
, NULL
, NULL
, NULL
, 0);
213 aggr_grp_hash
= mod_hash_create_idhash("aggr_grp_hash",
214 GRP_HASHSZ
, mod_hash_null_valdtor
);
215 rw_init(&aggr_grp_lock
, NULL
, RW_DEFAULT
, NULL
);
219 * Allocate an id space to manage key values (when key is not
220 * specified). The range of the id space will be from
221 * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol
224 key_ids
= id_space_create("aggr_key_ids", AGGR_MAX_KEY
+ 1, UINT16_MAX
);
225 ASSERT(key_ids
!= NULL
);
231 id_space_destroy(key_ids
);
232 rw_destroy(&aggr_grp_lock
);
233 mod_hash_destroy_idhash(aggr_grp_hash
);
234 kmem_cache_destroy(aggr_grp_cache
);
242 rw_enter(&aggr_grp_lock
, RW_READER
);
243 count
= aggr_grp_cnt
;
244 rw_exit(&aggr_grp_lock
);
249 * Since both aggr_port_notify_cb() and aggr_port_timer_thread() functions
250 * requires the mac perimeter, this function holds a reference of the aggr
251 * and aggr won't call mac_unregister() until this reference drops to 0.
254 aggr_grp_port_hold(aggr_port_t
*port
)
256 aggr_grp_t
*grp
= port
->lp_grp
;
258 AGGR_PORT_REFHOLD(port
);
259 mutex_enter(&grp
->lg_port_lock
);
261 mutex_exit(&grp
->lg_port_lock
);
265 * Release the reference of the grp and inform aggr_grp_delete() calling
266 * mac_unregister() is now safe.
269 aggr_grp_port_rele(aggr_port_t
*port
)
271 aggr_grp_t
*grp
= port
->lp_grp
;
273 mutex_enter(&grp
->lg_port_lock
);
274 if (--grp
->lg_port_ref
== 0)
275 cv_signal(&grp
->lg_port_cv
);
276 mutex_exit(&grp
->lg_port_lock
);
277 AGGR_PORT_REFRELE(port
);
281 * Wait for the port's lacp timer thread and the port's notification callback
285 aggr_grp_port_wait(aggr_grp_t
*grp
)
287 mutex_enter(&grp
->lg_port_lock
);
288 if (grp
->lg_port_ref
!= 0)
289 cv_wait(&grp
->lg_port_cv
, &grp
->lg_port_lock
);
290 mutex_exit(&grp
->lg_port_lock
);
294 * Attach a port to a link aggregation group.
296 * A port is attached to a link aggregation group once its speed
297 * and link state have been verified.
299 * Returns B_TRUE if the group link state or speed has changed. If
300 * it's the case, the caller must notify the MAC layer via a call
304 aggr_grp_attach_port(aggr_grp_t
*grp
, aggr_port_t
*port
)
306 boolean_t link_state_changed
= B_FALSE
;
308 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
309 ASSERT(MAC_PERIM_HELD(port
->lp_mh
));
311 if (port
->lp_state
== AGGR_PORT_STATE_ATTACHED
)
315 * Validate the MAC port link speed and update the group
316 * link speed if needed.
318 if (port
->lp_ifspeed
== 0 ||
319 port
->lp_link_state
!= LINK_STATE_UP
||
320 port
->lp_link_duplex
!= LINK_DUPLEX_FULL
) {
322 * Can't attach a MAC port with unknown link speed,
323 * down link, or not in full duplex mode.
328 if (grp
->lg_ifspeed
== 0) {
330 * The group inherits the speed of the first link being
333 grp
->lg_ifspeed
= port
->lp_ifspeed
;
334 link_state_changed
= B_TRUE
;
335 } else if (grp
->lg_ifspeed
!= port
->lp_ifspeed
) {
337 * The link speed of the MAC port must be the same as
338 * the group link speed, as per 802.3ad. Since it is
339 * not, the attach is cancelled.
344 grp
->lg_nattached_ports
++;
347 * Update the group link state.
349 if (grp
->lg_link_state
!= LINK_STATE_UP
) {
350 grp
->lg_link_state
= LINK_STATE_UP
;
351 grp
->lg_link_duplex
= LINK_DUPLEX_FULL
;
352 link_state_changed
= B_TRUE
;
356 * Update port's state.
358 port
->lp_state
= AGGR_PORT_STATE_ATTACHED
;
360 aggr_grp_multicst_port(port
, B_TRUE
);
363 * Set port's receive callback
365 mac_rx_set(port
->lp_mch
, aggr_recv_cb
, port
);
368 * If LACP is OFF, the port can be used to send data as soon
369 * as its link is up and verified to be compatible with the
372 * If LACP is active or passive, notify the LACP subsystem, which
373 * will enable sending on the port following the LACP protocol.
375 if (grp
->lg_lacp_mode
== AGGR_LACP_OFF
)
376 aggr_send_port_enable(port
);
378 aggr_lacp_port_attached(port
);
380 return (link_state_changed
);
384 aggr_grp_detach_port(aggr_grp_t
*grp
, aggr_port_t
*port
)
386 boolean_t link_state_changed
= B_FALSE
;
388 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
389 ASSERT(MAC_PERIM_HELD(port
->lp_mh
));
392 if (port
->lp_state
!= AGGR_PORT_STATE_ATTACHED
)
395 mac_rx_clear(port
->lp_mch
);
397 aggr_grp_multicst_port(port
, B_FALSE
);
399 if (grp
->lg_lacp_mode
== AGGR_LACP_OFF
)
400 aggr_send_port_disable(port
);
402 aggr_lacp_port_detached(port
);
404 port
->lp_state
= AGGR_PORT_STATE_STANDBY
;
406 grp
->lg_nattached_ports
--;
407 if (grp
->lg_nattached_ports
== 0) {
408 /* the last attached MAC port of the group is being detached */
410 grp
->lg_link_state
= LINK_STATE_DOWN
;
411 grp
->lg_link_duplex
= LINK_DUPLEX_UNKNOWN
;
412 link_state_changed
= B_TRUE
;
415 return (link_state_changed
);
419 * Update the MAC addresses of the constituent ports of the specified
420 * group. This function is invoked:
421 * - after creating a new aggregation group.
422 * - after adding new ports to an aggregation group.
423 * - after removing a port from a group when the MAC address of
424 * that port was used for the MAC address of the group.
425 * - after the MAC address of a port changed when the MAC address
426 * of that port was used for the MAC address of the group.
428 * Return true if the link state of the aggregation changed, for example
429 * as a result of a failure changing the MAC address of one of the
433 aggr_grp_update_ports_mac(aggr_grp_t
*grp
)
436 boolean_t link_state_changed
= B_FALSE
;
437 mac_perim_handle_t mph
;
439 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
441 for (cport
= grp
->lg_ports
; cport
!= NULL
;
442 cport
= cport
->lp_next
) {
443 mac_perim_enter_by_mh(cport
->lp_mh
, &mph
);
444 if (aggr_port_unicst(cport
) != 0) {
445 if (aggr_grp_detach_port(grp
, cport
))
446 link_state_changed
= B_TRUE
;
449 * If a port was detached because of a previous
450 * failure changing the MAC address, the port is
451 * reattached when it successfully changes the MAC
452 * address now, and this might cause the link state
453 * of the aggregation to change.
455 if (aggr_grp_attach_port(grp
, cport
))
456 link_state_changed
= B_TRUE
;
460 return (link_state_changed
);
464 * Invoked when the MAC address of a port has changed. If the port's
465 * MAC address was used for the group MAC address, set mac_addr_changedp
466 * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST
467 * notification. If the link state changes due to detach/attach of
468 * the constituent port, set link_state_changedp to B_TRUE to indicate
469 * to the caller that it should send a MAC_NOTE_LINK notification. In both
470 * cases, it is the responsibility of the caller to invoke notification
471 * functions after releasing the the port lock.
474 aggr_grp_port_mac_changed(aggr_grp_t
*grp
, aggr_port_t
*port
,
475 boolean_t
*mac_addr_changedp
, boolean_t
*link_state_changedp
)
477 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
478 ASSERT(MAC_PERIM_HELD(port
->lp_mh
));
479 ASSERT(mac_addr_changedp
!= NULL
);
480 ASSERT(link_state_changedp
!= NULL
);
482 *mac_addr_changedp
= B_FALSE
;
483 *link_state_changedp
= B_FALSE
;
485 if (grp
->lg_addr_fixed
) {
487 * The group is using a fixed MAC address or an automatic
488 * MAC address has not been set.
493 if (grp
->lg_mac_addr_port
== port
) {
495 * The MAC address of the port was assigned to the group
496 * MAC address. Update the group MAC address.
498 bcopy(port
->lp_addr
, grp
->lg_addr
, ETHERADDRL
);
499 *mac_addr_changedp
= B_TRUE
;
502 * Update the actual port MAC address to the MAC address
505 if (aggr_port_unicst(port
) != 0) {
506 *link_state_changedp
= aggr_grp_detach_port(grp
, port
);
509 * If a port was detached because of a previous
510 * failure changing the MAC address, the port is
511 * reattached when it successfully changes the MAC
512 * address now, and this might cause the link state
513 * of the aggregation to change.
515 *link_state_changedp
= aggr_grp_attach_port(grp
, port
);
521 * Add a port to a link aggregation group.
524 aggr_grp_add_port(aggr_grp_t
*grp
, datalink_id_t port_linkid
, boolean_t force
,
527 aggr_port_t
*port
, **cport
;
528 mac_perim_handle_t mph
;
529 zoneid_t port_zoneid
= ALL_ZONES
;
532 /* The port must be int the same zone as the aggregation. */
533 if (zone_check_datalink(&port_zoneid
, port_linkid
) != 0)
534 port_zoneid
= GLOBAL_ZONEID
;
535 if (grp
->lg_zoneid
!= port_zoneid
)
539 * lg_mh could be NULL when the function is called during the creation
540 * of the aggregation.
542 ASSERT(grp
->lg_mh
== NULL
|| MAC_PERIM_HELD(grp
->lg_mh
));
544 /* create new port */
545 err
= aggr_port_create(grp
, port_linkid
, force
, &port
);
549 mac_perim_enter_by_mh(port
->lp_mh
, &mph
);
551 /* add port to list of group constituent ports */
552 cport
= &grp
->lg_ports
;
553 while (*cport
!= NULL
)
554 cport
= &((*cport
)->lp_next
);
558 * Back reference to the group it is member of. A port always
559 * holds a reference to its group to ensure that the back
560 * reference is always valid.
563 AGGR_GRP_REFHOLD(grp
);
566 aggr_lacp_init_port(port
);
576 * This is called in response to either our LACP state machine or a MAC
577 * notification that the link has gone down via aggr_send_port_disable(). At
578 * this point, we may need to update our default ring. To that end, we go
579 * through the set of ports (underlying datalinks in an aggregation) that are
580 * currently enabled to transmit data. If all our links have been disabled for
581 * transmit, then we don't do anything.
583 * Note, because we only have a single TX group, we don't have to worry about
584 * the rings moving between groups and the chance that mac will reassign it
585 * unless someone removes a port, at which point, we play it safe and call this
589 aggr_grp_update_default(aggr_grp_t
*grp
)
592 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
594 rw_enter(&grp
->lg_tx_lock
, RW_WRITER
);
596 if (grp
->lg_ntx_ports
== 0) {
597 rw_exit(&grp
->lg_tx_lock
);
601 port
= grp
->lg_tx_ports
[0];
602 ASSERT(port
->lp_tx_ring_cnt
> 0);
603 mac_hwring_set_default(grp
->lg_mh
, port
->lp_pseudo_tx_rings
[0]);
604 rw_exit(&grp
->lg_tx_lock
);
608 * Add a pseudo RX ring for the given HW ring handle.
611 aggr_add_pseudo_rx_ring(aggr_port_t
*port
,
612 aggr_pseudo_rx_group_t
*rx_grp
, mac_ring_handle_t hw_rh
)
614 aggr_pseudo_rx_ring_t
*ring
;
618 for (j
= 0; j
< MAX_RINGS_PER_GROUP
; j
++) {
619 ring
= rx_grp
->arg_rings
+ j
;
620 if (!(ring
->arr_flags
& MAC_PSEUDO_RING_INUSE
))
625 * No slot for this new RX ring.
627 if (j
== MAX_RINGS_PER_GROUP
)
630 ring
->arr_flags
|= MAC_PSEUDO_RING_INUSE
;
631 ring
->arr_hw_rh
= hw_rh
;
632 ring
->arr_port
= port
;
633 rx_grp
->arg_ring_cnt
++;
636 * The group is already registered, dynamically add a new ring to the
639 if ((err
= mac_group_add_ring(rx_grp
->arg_gh
, j
)) != 0) {
640 ring
->arr_flags
&= ~MAC_PSEUDO_RING_INUSE
;
641 ring
->arr_hw_rh
= NULL
;
642 ring
->arr_port
= NULL
;
643 rx_grp
->arg_ring_cnt
--;
645 mac_hwring_setup(hw_rh
, (mac_resource_handle_t
)ring
,
646 mac_find_ring(rx_grp
->arg_gh
, j
));
652 * Remove the pseudo RX ring of the given HW ring handle.
655 aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t
*rx_grp
, mac_ring_handle_t hw_rh
)
657 aggr_pseudo_rx_ring_t
*ring
;
660 for (j
= 0; j
< MAX_RINGS_PER_GROUP
; j
++) {
661 ring
= rx_grp
->arg_rings
+ j
;
662 if (!(ring
->arr_flags
& MAC_PSEUDO_RING_INUSE
) ||
663 ring
->arr_hw_rh
!= hw_rh
) {
667 mac_group_rem_ring(rx_grp
->arg_gh
, ring
->arr_rh
);
669 ring
->arr_flags
&= ~MAC_PSEUDO_RING_INUSE
;
670 ring
->arr_hw_rh
= NULL
;
671 ring
->arr_port
= NULL
;
672 rx_grp
->arg_ring_cnt
--;
673 mac_hwring_teardown(hw_rh
);
679 * This function is called to create pseudo rings over the hardware rings of
680 * the underlying device. Note that there is a 1:1 mapping between the pseudo
681 * RX rings of the aggr and the hardware rings of the underlying port.
684 aggr_add_pseudo_rx_group(aggr_port_t
*port
, aggr_pseudo_rx_group_t
*rx_grp
)
686 aggr_grp_t
*grp
= port
->lp_grp
;
687 mac_ring_handle_t hw_rh
[MAX_RINGS_PER_GROUP
];
688 aggr_unicst_addr_t
*addr
, *a
;
689 mac_perim_handle_t pmph
;
690 int hw_rh_cnt
, i
= 0, j
;
693 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
694 mac_perim_enter_by_mh(port
->lp_mh
, &pmph
);
697 * This function must be called after the aggr registers its mac
698 * and its RX group has been initialized.
700 ASSERT(rx_grp
->arg_gh
!= NULL
);
703 * Get the list the the underlying HW rings.
705 hw_rh_cnt
= mac_hwrings_get(port
->lp_mch
,
706 &port
->lp_hwgh
, hw_rh
, MAC_RING_TYPE_RX
);
708 if (port
->lp_hwgh
!= NULL
) {
710 * Quiesce the HW ring and the mac srs on the ring. Note
711 * that the HW ring will be restarted when the pseudo ring
712 * is started. At that time all the packets will be
713 * directly passed up to the pseudo RX ring and handled
714 * by mac srs created over the pseudo RX ring.
716 mac_rx_client_quiesce(port
->lp_mch
);
717 mac_srs_perm_quiesce(port
->lp_mch
, B_TRUE
);
721 * Add all the unicast addresses to the newly added port.
723 for (addr
= rx_grp
->arg_macaddr
; addr
!= NULL
; addr
= addr
->aua_next
) {
724 if ((err
= aggr_port_addmac(port
, addr
->aua_addr
)) != 0)
728 for (i
= 0; err
== 0 && i
< hw_rh_cnt
; i
++)
729 err
= aggr_add_pseudo_rx_ring(port
, rx_grp
, hw_rh
[i
]);
732 for (j
= 0; j
< i
; j
++)
733 aggr_rem_pseudo_rx_ring(rx_grp
, hw_rh
[j
]);
735 for (a
= rx_grp
->arg_macaddr
; a
!= addr
; a
= a
->aua_next
)
736 aggr_port_remmac(port
, a
->aua_addr
);
738 if (port
->lp_hwgh
!= NULL
) {
739 mac_srs_perm_quiesce(port
->lp_mch
, B_FALSE
);
740 mac_rx_client_restart(port
->lp_mch
);
741 port
->lp_hwgh
= NULL
;
744 port
->lp_rx_grp_added
= B_TRUE
;
747 mac_perim_exit(pmph
);
752 * This function is called by aggr to remove pseudo RX rings over the
753 * HW rings of the underlying port.
756 aggr_rem_pseudo_rx_group(aggr_port_t
*port
, aggr_pseudo_rx_group_t
*rx_grp
)
758 aggr_grp_t
*grp
= port
->lp_grp
;
759 mac_ring_handle_t hw_rh
[MAX_RINGS_PER_GROUP
];
760 aggr_unicst_addr_t
*addr
;
761 mac_group_handle_t hwgh
;
762 mac_perim_handle_t pmph
;
765 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
766 mac_perim_enter_by_mh(port
->lp_mh
, &pmph
);
768 if (!port
->lp_rx_grp_added
)
771 ASSERT(rx_grp
->arg_gh
!= NULL
);
772 hw_rh_cnt
= mac_hwrings_get(port
->lp_mch
,
773 &hwgh
, hw_rh
, MAC_RING_TYPE_RX
);
776 * If hw_rh_cnt is 0, it means that the underlying port does not
777 * support RX rings. Directly return in this case.
779 for (i
= 0; i
< hw_rh_cnt
; i
++)
780 aggr_rem_pseudo_rx_ring(rx_grp
, hw_rh
[i
]);
782 for (addr
= rx_grp
->arg_macaddr
; addr
!= NULL
; addr
= addr
->aua_next
)
783 aggr_port_remmac(port
, addr
->aua_addr
);
785 if (port
->lp_hwgh
!= NULL
) {
786 port
->lp_hwgh
= NULL
;
789 * First clear the permanent-quiesced flag of the RX srs then
790 * restart the HW ring and the mac srs on the ring. Note that
791 * the HW ring and associated SRS will soon been removed when
792 * the port is removed from the aggr.
794 mac_srs_perm_quiesce(port
->lp_mch
, B_FALSE
);
795 mac_rx_client_restart(port
->lp_mch
);
798 port
->lp_rx_grp_added
= B_FALSE
;
800 mac_perim_exit(pmph
);
804 * Add a pseudo TX ring for the given HW ring handle.
807 aggr_add_pseudo_tx_ring(aggr_port_t
*port
,
808 aggr_pseudo_tx_group_t
*tx_grp
, mac_ring_handle_t hw_rh
,
809 mac_ring_handle_t
*pseudo_rh
)
811 aggr_pseudo_tx_ring_t
*ring
;
815 ASSERT(MAC_PERIM_HELD(port
->lp_mh
));
816 for (i
= 0; i
< MAX_RINGS_PER_GROUP
; i
++) {
817 ring
= tx_grp
->atg_rings
+ i
;
818 if (!(ring
->atr_flags
& MAC_PSEUDO_RING_INUSE
))
822 * No slot for this new TX ring.
824 if (i
== MAX_RINGS_PER_GROUP
)
827 * The following 4 statements needs to be done before
828 * calling mac_group_add_ring(). Otherwise it will
829 * result in an assertion failure in mac_init_ring().
831 ring
->atr_flags
|= MAC_PSEUDO_RING_INUSE
;
832 ring
->atr_hw_rh
= hw_rh
;
833 ring
->atr_port
= port
;
834 tx_grp
->atg_ring_cnt
++;
837 * The TX side has no concept of ring groups unlike RX groups.
838 * There is just a single group which stores all the TX rings.
839 * This group will be used to store aggr's pseudo TX rings.
841 if ((err
= mac_group_add_ring(tx_grp
->atg_gh
, i
)) != 0) {
842 ring
->atr_flags
&= ~MAC_PSEUDO_RING_INUSE
;
843 ring
->atr_hw_rh
= NULL
;
844 ring
->atr_port
= NULL
;
845 tx_grp
->atg_ring_cnt
--;
847 *pseudo_rh
= mac_find_ring(tx_grp
->atg_gh
, i
);
849 mac_hwring_setup(hw_rh
, (mac_resource_handle_t
)ring
,
850 mac_find_ring(tx_grp
->atg_gh
, i
));
858 * Remove the pseudo TX ring of the given HW ring handle.
861 aggr_rem_pseudo_tx_ring(aggr_pseudo_tx_group_t
*tx_grp
,
862 mac_ring_handle_t pseudo_hw_rh
)
864 aggr_pseudo_tx_ring_t
*ring
;
867 for (i
= 0; i
< MAX_RINGS_PER_GROUP
; i
++) {
868 ring
= tx_grp
->atg_rings
+ i
;
869 if (ring
->atr_rh
!= pseudo_hw_rh
)
872 ASSERT(ring
->atr_flags
& MAC_PSEUDO_RING_INUSE
);
873 mac_group_rem_ring(tx_grp
->atg_gh
, pseudo_hw_rh
);
874 ring
->atr_flags
&= ~MAC_PSEUDO_RING_INUSE
;
875 mac_hwring_teardown(ring
->atr_hw_rh
);
876 ring
->atr_hw_rh
= NULL
;
877 ring
->atr_port
= NULL
;
878 tx_grp
->atg_ring_cnt
--;
884 * This function is called to create pseudo rings over hardware rings of
885 * the underlying device. There is a 1:1 mapping between the pseudo TX
886 * rings of the aggr and the hardware rings of the underlying port.
889 aggr_add_pseudo_tx_group(aggr_port_t
*port
, aggr_pseudo_tx_group_t
*tx_grp
)
891 aggr_grp_t
*grp
= port
->lp_grp
;
892 mac_ring_handle_t hw_rh
[MAX_RINGS_PER_GROUP
], pseudo_rh
;
893 mac_perim_handle_t pmph
;
894 int hw_rh_cnt
, i
= 0, j
;
897 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
898 mac_perim_enter_by_mh(port
->lp_mh
, &pmph
);
901 * Get the list the the underlying HW rings.
903 hw_rh_cnt
= mac_hwrings_get(port
->lp_mch
,
904 NULL
, hw_rh
, MAC_RING_TYPE_TX
);
907 * Even if the underlying NIC does not have TX rings, we
908 * still make a psuedo TX ring for that NIC with NULL as
912 port
->lp_tx_ring_cnt
= 1;
914 port
->lp_tx_ring_cnt
= hw_rh_cnt
;
916 port
->lp_tx_rings
= kmem_zalloc((sizeof (mac_ring_handle_t
*) *
917 port
->lp_tx_ring_cnt
), KM_SLEEP
);
918 port
->lp_pseudo_tx_rings
= kmem_zalloc((sizeof (mac_ring_handle_t
*) *
919 port
->lp_tx_ring_cnt
), KM_SLEEP
);
921 if (hw_rh_cnt
== 0) {
922 if ((err
= aggr_add_pseudo_tx_ring(port
, tx_grp
,
923 NULL
, &pseudo_rh
)) == 0) {
924 port
->lp_tx_rings
[0] = NULL
;
925 port
->lp_pseudo_tx_rings
[0] = pseudo_rh
;
928 for (i
= 0; err
== 0 && i
< hw_rh_cnt
; i
++) {
929 err
= aggr_add_pseudo_tx_ring(port
,
930 tx_grp
, hw_rh
[i
], &pseudo_rh
);
933 port
->lp_tx_rings
[i
] = hw_rh
[i
];
934 port
->lp_pseudo_tx_rings
[i
] = pseudo_rh
;
939 if (hw_rh_cnt
!= 0) {
940 for (j
= 0; j
< i
; j
++) {
941 aggr_rem_pseudo_tx_ring(tx_grp
,
942 port
->lp_pseudo_tx_rings
[j
]);
945 kmem_free(port
->lp_tx_rings
,
946 (sizeof (mac_ring_handle_t
*) * port
->lp_tx_ring_cnt
));
947 kmem_free(port
->lp_pseudo_tx_rings
,
948 (sizeof (mac_ring_handle_t
*) * port
->lp_tx_ring_cnt
));
949 port
->lp_tx_ring_cnt
= 0;
951 port
->lp_tx_grp_added
= B_TRUE
;
952 port
->lp_tx_notify_mh
= mac_client_tx_notify(port
->lp_mch
,
953 aggr_tx_ring_update
, port
);
955 mac_perim_exit(pmph
);
956 aggr_grp_update_default(grp
);
961 * This function is called by aggr to remove pseudo TX rings over the
962 * HW rings of the underlying port.
965 aggr_rem_pseudo_tx_group(aggr_port_t
*port
, aggr_pseudo_tx_group_t
*tx_grp
)
967 aggr_grp_t
*grp
= port
->lp_grp
;
968 mac_perim_handle_t pmph
;
971 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
972 mac_perim_enter_by_mh(port
->lp_mh
, &pmph
);
974 if (!port
->lp_tx_grp_added
)
977 ASSERT(tx_grp
->atg_gh
!= NULL
);
979 for (i
= 0; i
< port
->lp_tx_ring_cnt
; i
++)
980 aggr_rem_pseudo_tx_ring(tx_grp
, port
->lp_pseudo_tx_rings
[i
]);
982 kmem_free(port
->lp_tx_rings
,
983 (sizeof (mac_ring_handle_t
*) * port
->lp_tx_ring_cnt
));
984 kmem_free(port
->lp_pseudo_tx_rings
,
985 (sizeof (mac_ring_handle_t
*) * port
->lp_tx_ring_cnt
));
987 port
->lp_tx_ring_cnt
= 0;
988 (void) mac_client_tx_notify(port
->lp_mch
, NULL
, port
->lp_tx_notify_mh
);
989 port
->lp_tx_grp_added
= B_FALSE
;
990 aggr_grp_update_default(grp
);
992 mac_perim_exit(pmph
);
996 aggr_pseudo_disable_intr(mac_intr_handle_t ih
)
998 aggr_pseudo_rx_ring_t
*rr_ring
= (aggr_pseudo_rx_ring_t
*)ih
;
999 return (mac_hwring_disable_intr(rr_ring
->arr_hw_rh
));
1003 aggr_pseudo_enable_intr(mac_intr_handle_t ih
)
1005 aggr_pseudo_rx_ring_t
*rr_ring
= (aggr_pseudo_rx_ring_t
*)ih
;
1006 return (mac_hwring_enable_intr(rr_ring
->arr_hw_rh
));
1010 aggr_pseudo_start_ring(mac_ring_driver_t arg
, uint64_t mr_gen
)
1012 aggr_pseudo_rx_ring_t
*rr_ring
= (aggr_pseudo_rx_ring_t
*)arg
;
1015 err
= mac_hwring_start(rr_ring
->arr_hw_rh
);
1017 rr_ring
->arr_gen
= mr_gen
;
1022 aggr_pseudo_stop_ring(mac_ring_driver_t arg
)
1024 aggr_pseudo_rx_ring_t
*rr_ring
= (aggr_pseudo_rx_ring_t
*)arg
;
1025 mac_hwring_stop(rr_ring
->arr_hw_rh
);
1029 * Add one or more ports to an existing link aggregation group.
1032 aggr_grp_add_ports(datalink_id_t linkid
, uint_t nports
, boolean_t force
,
1033 laioc_port_t
*ports
)
1035 int rc
, i
, nadded
= 0;
1036 aggr_grp_t
*grp
= NULL
;
1038 boolean_t link_state_changed
= B_FALSE
;
1039 mac_perim_handle_t mph
, pmph
;
1041 /* get group corresponding to linkid */
1042 rw_enter(&aggr_grp_lock
, RW_READER
);
1043 if (mod_hash_find(aggr_grp_hash
, GRP_HASH_KEY(linkid
),
1044 (mod_hash_val_t
*)&grp
) != 0) {
1045 rw_exit(&aggr_grp_lock
);
1048 AGGR_GRP_REFHOLD(grp
);
1051 * Hold the perimeter so that the aggregation won't be destroyed.
1053 mac_perim_enter_by_mh(grp
->lg_mh
, &mph
);
1054 rw_exit(&aggr_grp_lock
);
1056 /* add the specified ports to group */
1057 for (i
= 0; i
< nports
; i
++) {
1058 /* add port to group */
1059 if ((rc
= aggr_grp_add_port(grp
, ports
[i
].lp_linkid
,
1060 force
, &port
)) != 0) {
1063 ASSERT(port
!= NULL
);
1066 /* check capabilities */
1067 if (!aggr_grp_capab_check(grp
, port
) ||
1068 !aggr_grp_sdu_check(grp
, port
) ||
1069 !aggr_grp_margin_check(grp
, port
)) {
1075 * Create the pseudo ring for each HW ring of the underlying
1078 rc
= aggr_add_pseudo_tx_group(port
, &grp
->lg_tx_group
);
1081 rc
= aggr_add_pseudo_rx_group(port
, &grp
->lg_rx_group
);
1085 mac_perim_enter_by_mh(port
->lp_mh
, &pmph
);
1088 aggr_port_lacp_set_mode(grp
, port
);
1090 /* start port if group has already been started */
1091 if (grp
->lg_started
) {
1092 rc
= aggr_port_start(port
);
1094 mac_perim_exit(pmph
);
1099 * Turn on the promiscuous mode over the port when it
1100 * is requested to be turned on to receive the
1101 * non-primary address over a port, or the promiscous
1102 * mode is enabled over the aggr.
1104 if (grp
->lg_promisc
|| port
->lp_prom_addr
!= NULL
) {
1105 rc
= aggr_port_promisc(port
, B_TRUE
);
1107 mac_perim_exit(pmph
);
1112 mac_perim_exit(pmph
);
1115 * Attach each port if necessary.
1117 if (aggr_port_notify_link(grp
, port
))
1118 link_state_changed
= B_TRUE
;
1121 * Initialize the callback functions for this port.
1123 aggr_port_init_callbacks(port
);
1126 /* update the MAC address of the constituent ports */
1127 if (aggr_grp_update_ports_mac(grp
))
1128 link_state_changed
= B_TRUE
;
1130 if (link_state_changed
)
1131 mac_link_update(grp
->lg_mh
, grp
->lg_link_state
);
1135 /* stop and remove ports that have been added */
1136 for (i
= 0; i
< nadded
; i
++) {
1137 port
= aggr_grp_port_lookup(grp
, ports
[i
].lp_linkid
);
1138 ASSERT(port
!= NULL
);
1139 if (grp
->lg_started
) {
1140 mac_perim_enter_by_mh(port
->lp_mh
, &pmph
);
1141 (void) aggr_port_promisc(port
, B_FALSE
);
1142 aggr_port_stop(port
);
1143 mac_perim_exit(pmph
);
1145 aggr_rem_pseudo_tx_group(port
, &grp
->lg_tx_group
);
1146 aggr_rem_pseudo_rx_group(port
, &grp
->lg_rx_group
);
1147 (void) aggr_grp_rem_port(grp
, port
, NULL
, NULL
);
1151 mac_perim_exit(mph
);
1152 AGGR_GRP_REFRELE(grp
);
1157 aggr_grp_modify_common(aggr_grp_t
*grp
, uint8_t update_mask
, uint32_t policy
,
1158 boolean_t mac_fixed
, const uchar_t
*mac_addr
, aggr_lacp_mode_t lacp_mode
,
1159 aggr_lacp_timer_t lacp_timer
)
1161 boolean_t mac_addr_changed
= B_FALSE
;
1162 boolean_t link_state_changed
= B_FALSE
;
1163 mac_perim_handle_t pmph
;
1165 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
1167 /* validate fixed address if specified */
1168 if ((update_mask
& AGGR_MODIFY_MAC
) && mac_fixed
&&
1169 ((bcmp(aggr_zero_mac
, mac_addr
, ETHERADDRL
) == 0) ||
1170 (mac_addr
[0] & 0x01))) {
1174 /* update policy if requested */
1175 if (update_mask
& AGGR_MODIFY_POLICY
)
1176 aggr_send_update_policy(grp
, policy
);
1178 /* update unicast MAC address if requested */
1179 if (update_mask
& AGGR_MODIFY_MAC
) {
1181 /* user-supplied MAC address */
1182 grp
->lg_mac_addr_port
= NULL
;
1183 if (bcmp(mac_addr
, grp
->lg_addr
, ETHERADDRL
) != 0) {
1184 bcopy(mac_addr
, grp
->lg_addr
, ETHERADDRL
);
1185 mac_addr_changed
= B_TRUE
;
1187 } else if (grp
->lg_addr_fixed
) {
1188 /* switch from user-supplied to automatic */
1189 aggr_port_t
*port
= grp
->lg_ports
;
1191 mac_perim_enter_by_mh(port
->lp_mh
, &pmph
);
1192 bcopy(port
->lp_addr
, grp
->lg_addr
, ETHERADDRL
);
1193 grp
->lg_mac_addr_port
= port
;
1194 mac_addr_changed
= B_TRUE
;
1195 mac_perim_exit(pmph
);
1197 grp
->lg_addr_fixed
= mac_fixed
;
1200 if (mac_addr_changed
)
1201 link_state_changed
= aggr_grp_update_ports_mac(grp
);
1203 if (update_mask
& AGGR_MODIFY_LACP_MODE
)
1204 aggr_lacp_update_mode(grp
, lacp_mode
);
1206 if (update_mask
& AGGR_MODIFY_LACP_TIMER
)
1207 aggr_lacp_update_timer(grp
, lacp_timer
);
1209 if (link_state_changed
)
1210 mac_link_update(grp
->lg_mh
, grp
->lg_link_state
);
1212 if (mac_addr_changed
)
1213 mac_unicst_update(grp
->lg_mh
, grp
->lg_addr
);
1219 * Update properties of an existing link aggregation group.
1222 aggr_grp_modify(datalink_id_t linkid
, uint8_t update_mask
, uint32_t policy
,
1223 boolean_t mac_fixed
, const uchar_t
*mac_addr
, aggr_lacp_mode_t lacp_mode
,
1224 aggr_lacp_timer_t lacp_timer
)
1226 aggr_grp_t
*grp
= NULL
;
1227 mac_perim_handle_t mph
;
1230 /* get group corresponding to linkid */
1231 rw_enter(&aggr_grp_lock
, RW_READER
);
1232 if (mod_hash_find(aggr_grp_hash
, GRP_HASH_KEY(linkid
),
1233 (mod_hash_val_t
*)&grp
) != 0) {
1234 rw_exit(&aggr_grp_lock
);
1237 AGGR_GRP_REFHOLD(grp
);
1240 * Hold the perimeter so that the aggregation won't be destroyed.
1242 mac_perim_enter_by_mh(grp
->lg_mh
, &mph
);
1243 rw_exit(&aggr_grp_lock
);
1245 err
= aggr_grp_modify_common(grp
, update_mask
, policy
, mac_fixed
,
1246 mac_addr
, lacp_mode
, lacp_timer
);
1248 mac_perim_exit(mph
);
1249 AGGR_GRP_REFRELE(grp
);
1254 * Create a new link aggregation group upon request from administrator.
1255 * Returns 0 on success, an errno on failure.
1258 aggr_grp_create(datalink_id_t linkid
, uint32_t key
, uint_t nports
,
1259 laioc_port_t
*ports
, uint32_t policy
, boolean_t mac_fixed
, boolean_t force
,
1260 uchar_t
*mac_addr
, aggr_lacp_mode_t lacp_mode
, aggr_lacp_timer_t lacp_timer
,
1263 aggr_grp_t
*grp
= NULL
;
1265 mac_register_t
*mac
;
1266 boolean_t link_state_changed
;
1267 mac_perim_handle_t mph
;
1272 /* need at least one port */
1276 rw_enter(&aggr_grp_lock
, RW_WRITER
);
1278 /* does a group with the same linkid already exist? */
1279 err
= mod_hash_find(aggr_grp_hash
, GRP_HASH_KEY(linkid
),
1280 (mod_hash_val_t
*)&grp
);
1282 rw_exit(&aggr_grp_lock
);
1286 grp
= kmem_cache_alloc(aggr_grp_cache
, KM_SLEEP
);
1289 grp
->lg_closing
= B_FALSE
;
1290 grp
->lg_force
= force
;
1291 grp
->lg_linkid
= linkid
;
1292 grp
->lg_zoneid
= crgetzoneid(credp
);
1293 grp
->lg_ifspeed
= 0;
1294 grp
->lg_link_state
= LINK_STATE_UNKNOWN
;
1295 grp
->lg_link_duplex
= LINK_DUPLEX_UNKNOWN
;
1296 grp
->lg_started
= B_FALSE
;
1297 grp
->lg_promisc
= B_FALSE
;
1298 grp
->lg_lacp_done
= B_FALSE
;
1299 grp
->lg_tx_notify_done
= B_FALSE
;
1300 grp
->lg_lacp_head
= grp
->lg_lacp_tail
= NULL
;
1301 grp
->lg_lacp_rx_thread
= thread_create(NULL
, 0,
1302 aggr_lacp_rx_thread
, grp
, 0, &p0
, TS_RUN
, minclsyspri
);
1303 grp
->lg_tx_notify_thread
= thread_create(NULL
, 0,
1304 aggr_tx_notify_thread
, grp
, 0, &p0
, TS_RUN
, minclsyspri
);
1305 grp
->lg_tx_blocked_rings
= kmem_zalloc((sizeof (mac_ring_handle_t
*) *
1306 MAX_RINGS_PER_GROUP
), KM_SLEEP
);
1307 grp
->lg_tx_blocked_cnt
= 0;
1308 bzero(&grp
->lg_rx_group
, sizeof (aggr_pseudo_rx_group_t
));
1309 bzero(&grp
->lg_tx_group
, sizeof (aggr_pseudo_tx_group_t
));
1310 aggr_lacp_init_grp(grp
);
1312 /* add MAC ports to group */
1313 grp
->lg_ports
= NULL
;
1315 grp
->lg_nattached_ports
= 0;
1316 grp
->lg_ntx_ports
= 0;
1319 * If key is not specified by the user, allocate the key.
1321 if ((key
== 0) && ((key
= (uint32_t)id_alloc(key_ids
)) == 0)) {
1327 for (i
= 0; i
< nports
; i
++) {
1328 err
= aggr_grp_add_port(grp
, ports
[i
].lp_linkid
, force
, NULL
);
1334 * If no explicit MAC address was specified by the administrator,
1335 * set it to the MAC address of the first port.
1337 grp
->lg_addr_fixed
= mac_fixed
;
1338 if (grp
->lg_addr_fixed
) {
1339 /* validate specified address */
1340 if (bcmp(aggr_zero_mac
, mac_addr
, ETHERADDRL
) == 0) {
1344 bcopy(mac_addr
, grp
->lg_addr
, ETHERADDRL
);
1346 bcopy(grp
->lg_ports
->lp_addr
, grp
->lg_addr
, ETHERADDRL
);
1347 grp
->lg_mac_addr_port
= grp
->lg_ports
;
1350 /* set the initial group capabilities */
1351 aggr_grp_capab_set(grp
);
1353 if ((mac
= mac_alloc(MAC_VERSION
)) == NULL
) {
1357 mac
->m_type_ident
= MAC_PLUGIN_IDENT_ETHER
;
1358 mac
->m_driver
= grp
;
1359 mac
->m_dip
= aggr_dip
;
1360 mac
->m_instance
= grp
->lg_key
> AGGR_MAX_KEY
? (uint_t
)-1 : grp
->lg_key
;
1361 mac
->m_src_addr
= grp
->lg_addr
;
1362 mac
->m_callbacks
= &aggr_m_callbacks
;
1364 mac
->m_max_sdu
= grp
->lg_max_sdu
= aggr_grp_max_sdu(grp
);
1365 mac
->m_margin
= aggr_grp_max_margin(grp
);
1366 mac
->m_v12n
= MAC_VIRT_LEVEL1
;
1367 err
= mac_register(mac
, &grp
->lg_mh
);
1372 err
= dls_devnet_create(grp
->lg_mh
, grp
->lg_linkid
, crgetzoneid(credp
));
1374 (void) mac_unregister(grp
->lg_mh
);
1379 mac_perim_enter_by_mh(grp
->lg_mh
, &mph
);
1382 * Update the MAC address of the constituent ports.
1383 * None of the port is attached at this time, the link state of the
1384 * aggregation will not change.
1386 link_state_changed
= aggr_grp_update_ports_mac(grp
);
1387 ASSERT(!link_state_changed
);
1389 /* update outbound load balancing policy */
1390 aggr_send_update_policy(grp
, policy
);
1393 aggr_lacp_set_mode(grp
, lacp_mode
, lacp_timer
);
1396 * Attach each port if necessary.
1398 for (port
= grp
->lg_ports
; port
!= NULL
; port
= port
->lp_next
) {
1400 * Create the pseudo ring for each HW ring of the underlying
1401 * port. Note that this is done after the aggr registers the
1404 VERIFY(aggr_add_pseudo_tx_group(port
, &grp
->lg_tx_group
) == 0);
1405 VERIFY(aggr_add_pseudo_rx_group(port
, &grp
->lg_rx_group
) == 0);
1406 if (aggr_port_notify_link(grp
, port
))
1407 link_state_changed
= B_TRUE
;
1410 * Initialize the callback functions for this port.
1412 aggr_port_init_callbacks(port
);
1415 if (link_state_changed
)
1416 mac_link_update(grp
->lg_mh
, grp
->lg_link_state
);
1418 /* add new group to hash table */
1419 err
= mod_hash_insert(aggr_grp_hash
, GRP_HASH_KEY(linkid
),
1420 (mod_hash_val_t
)grp
);
1424 mac_perim_exit(mph
);
1425 rw_exit(&aggr_grp_lock
);
1430 grp
->lg_closing
= B_TRUE
;
1432 port
= grp
->lg_ports
;
1433 while (port
!= NULL
) {
1436 cport
= port
->lp_next
;
1437 aggr_port_delete(port
);
1442 * Inform the lacp_rx thread to exit.
1444 mutex_enter(&grp
->lg_lacp_lock
);
1445 grp
->lg_lacp_done
= B_TRUE
;
1446 cv_signal(&grp
->lg_lacp_cv
);
1447 while (grp
->lg_lacp_rx_thread
!= NULL
)
1448 cv_wait(&grp
->lg_lacp_cv
, &grp
->lg_lacp_lock
);
1449 mutex_exit(&grp
->lg_lacp_lock
);
1451 * Inform the tx_notify thread to exit.
1453 mutex_enter(&grp
->lg_tx_flowctl_lock
);
1454 if (grp
->lg_tx_notify_thread
!= NULL
) {
1455 tid
= grp
->lg_tx_notify_thread
->t_did
;
1456 grp
->lg_tx_notify_done
= B_TRUE
;
1457 cv_signal(&grp
->lg_tx_flowctl_cv
);
1459 mutex_exit(&grp
->lg_tx_flowctl_lock
);
1463 kmem_free(grp
->lg_tx_blocked_rings
,
1464 (sizeof (mac_ring_handle_t
*) * MAX_RINGS_PER_GROUP
));
1465 rw_exit(&aggr_grp_lock
);
1466 AGGR_GRP_REFRELE(grp
);
1471 * Return a pointer to the member of a group with specified linkid.
1473 static aggr_port_t
*
1474 aggr_grp_port_lookup(aggr_grp_t
*grp
, datalink_id_t linkid
)
1478 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
1480 for (port
= grp
->lg_ports
; port
!= NULL
; port
= port
->lp_next
) {
1481 if (port
->lp_linkid
== linkid
)
1489 * Stop, detach and remove a port from a link aggregation group.
1492 aggr_grp_rem_port(aggr_grp_t
*grp
, aggr_port_t
*port
,
1493 boolean_t
*mac_addr_changedp
, boolean_t
*link_state_changedp
)
1496 aggr_port_t
**pport
;
1497 boolean_t mac_addr_changed
= B_FALSE
;
1498 boolean_t link_state_changed
= B_FALSE
;
1499 mac_perim_handle_t mph
;
1504 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
1505 ASSERT(grp
->lg_nports
> 1);
1506 ASSERT(!grp
->lg_closing
);
1509 for (pport
= &grp
->lg_ports
; *pport
!= port
;
1510 pport
= &(*pport
)->lp_next
) {
1511 if (*pport
== NULL
) {
1516 *pport
= port
->lp_next
;
1518 mac_perim_enter_by_mh(port
->lp_mh
, &mph
);
1521 * If the MAC address of the port being removed was assigned
1522 * to the group, update the group MAC address
1523 * using the MAC address of a different port.
1525 if (!grp
->lg_addr_fixed
&& grp
->lg_mac_addr_port
== port
) {
1527 * Set the MAC address of the group to the
1528 * MAC address of its first port.
1530 bcopy(grp
->lg_ports
->lp_addr
, grp
->lg_addr
, ETHERADDRL
);
1531 grp
->lg_mac_addr_port
= grp
->lg_ports
;
1532 mac_addr_changed
= B_TRUE
;
1535 link_state_changed
= aggr_grp_detach_port(grp
, port
);
1538 * Add the counter statistics of the ports while it was aggregated
1539 * to the group's residual statistics. This is done by obtaining
1540 * the current counter from the underlying MAC then subtracting the
1541 * value of the counter at the moment it was added to the
1544 for (i
= 0; i
< MAC_NSTAT
; i
++) {
1545 stat
= i
+ MAC_STAT_MIN
;
1546 if (!MAC_STAT_ISACOUNTER(stat
))
1548 val
= aggr_port_stat(port
, stat
);
1549 val
-= port
->lp_stat
[i
];
1550 grp
->lg_stat
[i
] += val
;
1552 for (i
= 0; i
< ETHER_NSTAT
; i
++) {
1553 stat
= i
+ MACTYPE_STAT_MIN
;
1554 if (!ETHER_STAT_ISACOUNTER(stat
))
1556 val
= aggr_port_stat(port
, stat
);
1557 val
-= port
->lp_ether_stat
[i
];
1558 grp
->lg_ether_stat
[i
] += val
;
1562 mac_perim_exit(mph
);
1564 aggr_rem_pseudo_tx_group(port
, &grp
->lg_tx_group
);
1565 aggr_port_delete(port
);
1568 * If the group MAC address has changed, update the MAC address of
1569 * the remaining constituent ports according to the new MAC
1570 * address of the group.
1572 if (mac_addr_changed
&& aggr_grp_update_ports_mac(grp
))
1573 link_state_changed
= B_TRUE
;
1576 if (mac_addr_changedp
!= NULL
)
1577 *mac_addr_changedp
= mac_addr_changed
;
1578 if (link_state_changedp
!= NULL
)
1579 *link_state_changedp
= link_state_changed
;
1585 * Remove one or more ports from an existing link aggregation group.
1588 aggr_grp_rem_ports(datalink_id_t linkid
, uint_t nports
, laioc_port_t
*ports
)
1591 aggr_grp_t
*grp
= NULL
;
1593 boolean_t mac_addr_update
= B_FALSE
, mac_addr_changed
;
1594 boolean_t link_state_update
= B_FALSE
, link_state_changed
;
1595 mac_perim_handle_t mph
, pmph
;
1597 /* get group corresponding to linkid */
1598 rw_enter(&aggr_grp_lock
, RW_READER
);
1599 if (mod_hash_find(aggr_grp_hash
, GRP_HASH_KEY(linkid
),
1600 (mod_hash_val_t
*)&grp
) != 0) {
1601 rw_exit(&aggr_grp_lock
);
1604 AGGR_GRP_REFHOLD(grp
);
1607 * Hold the perimeter so that the aggregation won't be destroyed.
1609 mac_perim_enter_by_mh(grp
->lg_mh
, &mph
);
1610 rw_exit(&aggr_grp_lock
);
1612 /* we need to keep at least one port per group */
1613 if (nports
>= grp
->lg_nports
) {
1618 /* first verify that all the groups are valid */
1619 for (i
= 0; i
< nports
; i
++) {
1620 if (aggr_grp_port_lookup(grp
, ports
[i
].lp_linkid
) == NULL
) {
1621 /* port not found */
1627 /* clear the promiscous mode for the specified ports */
1628 for (i
= 0; i
< nports
&& rc
== 0; i
++) {
1630 port
= aggr_grp_port_lookup(grp
, ports
[i
].lp_linkid
);
1631 ASSERT(port
!= NULL
);
1633 mac_perim_enter_by_mh(port
->lp_mh
, &pmph
);
1634 rc
= aggr_port_promisc(port
, B_FALSE
);
1635 mac_perim_exit(pmph
);
1638 for (i
= 0; i
< nports
; i
++) {
1639 port
= aggr_grp_port_lookup(grp
,
1640 ports
[i
].lp_linkid
);
1641 ASSERT(port
!= NULL
);
1644 * Turn the promiscuous mode back on if it is required
1645 * to receive the non-primary address over a port, or
1646 * the promiscous mode is enabled over the aggr.
1648 mac_perim_enter_by_mh(port
->lp_mh
, &pmph
);
1649 if (port
->lp_started
&& (grp
->lg_promisc
||
1650 port
->lp_prom_addr
!= NULL
)) {
1651 (void) aggr_port_promisc(port
, B_TRUE
);
1653 mac_perim_exit(pmph
);
1658 /* remove the specified ports from group */
1659 for (i
= 0; i
< nports
; i
++) {
1661 port
= aggr_grp_port_lookup(grp
, ports
[i
].lp_linkid
);
1662 ASSERT(port
!= NULL
);
1664 /* stop port if group has already been started */
1665 if (grp
->lg_started
) {
1666 mac_perim_enter_by_mh(port
->lp_mh
, &pmph
);
1667 aggr_port_stop(port
);
1668 mac_perim_exit(pmph
);
1672 * aggr_rem_pseudo_tx_group() is not called here. Instead
1673 * it is called from inside aggr_grp_rem_port() after the
1674 * port has been detached. The reason is that
1675 * aggr_rem_pseudo_tx_group() removes one ring at a time
1676 * and if there is still traffic going on, then there
1677 * is the possibility of aggr_find_tx_ring() returning a
1678 * removed ring for transmission. Once the port has been
1679 * detached, that port will not be used and
1680 * aggr_find_tx_ring() will not return any rings
1683 aggr_rem_pseudo_rx_group(port
, &grp
->lg_rx_group
);
1685 /* remove port from group */
1686 rc
= aggr_grp_rem_port(grp
, port
, &mac_addr_changed
,
1687 &link_state_changed
);
1689 mac_addr_update
= mac_addr_update
|| mac_addr_changed
;
1690 link_state_update
= link_state_update
|| link_state_changed
;
1694 if (mac_addr_update
)
1695 mac_unicst_update(grp
->lg_mh
, grp
->lg_addr
);
1696 if (link_state_update
)
1697 mac_link_update(grp
->lg_mh
, grp
->lg_link_state
);
1699 mac_perim_exit(mph
);
1700 AGGR_GRP_REFRELE(grp
);
1706 aggr_grp_delete(datalink_id_t linkid
, cred_t
*cred
)
1708 aggr_grp_t
*grp
= NULL
;
1709 aggr_port_t
*port
, *cport
;
1710 datalink_id_t tmpid
;
1712 mac_perim_handle_t mph
, pmph
;
1716 rw_enter(&aggr_grp_lock
, RW_WRITER
);
1718 if (mod_hash_find(aggr_grp_hash
, GRP_HASH_KEY(linkid
),
1719 (mod_hash_val_t
*)&grp
) != 0) {
1720 rw_exit(&aggr_grp_lock
);
1725 * Note that dls_devnet_destroy() must be called before lg_lock is
1726 * held. Otherwise, it will deadlock if another thread is in
1727 * aggr_m_stat() and thus has a kstat_hold() on the kstats that
1728 * dls_devnet_destroy() needs to delete.
1730 if ((err
= dls_devnet_destroy(grp
->lg_mh
, &tmpid
, B_TRUE
)) != 0) {
1731 rw_exit(&aggr_grp_lock
);
1734 ASSERT(linkid
== tmpid
);
1737 * Unregister from the MAC service module. Since this can
1738 * fail if a client hasn't closed the MAC port, we gracefully
1739 * fail the operation.
1741 if ((err
= mac_disable(grp
->lg_mh
)) != 0) {
1742 (void) dls_devnet_create(grp
->lg_mh
, linkid
, crgetzoneid(cred
));
1743 rw_exit(&aggr_grp_lock
);
1746 (void) mod_hash_remove(aggr_grp_hash
, GRP_HASH_KEY(linkid
), &val
);
1747 ASSERT(grp
== (aggr_grp_t
*)val
);
1749 ASSERT(aggr_grp_cnt
> 0);
1751 rw_exit(&aggr_grp_lock
);
1754 * Inform the lacp_rx thread to exit.
1756 mutex_enter(&grp
->lg_lacp_lock
);
1757 grp
->lg_lacp_done
= B_TRUE
;
1758 cv_signal(&grp
->lg_lacp_cv
);
1759 while (grp
->lg_lacp_rx_thread
!= NULL
)
1760 cv_wait(&grp
->lg_lacp_cv
, &grp
->lg_lacp_lock
);
1761 mutex_exit(&grp
->lg_lacp_lock
);
1763 * Inform the tx_notify_thread to exit.
1765 mutex_enter(&grp
->lg_tx_flowctl_lock
);
1766 if (grp
->lg_tx_notify_thread
!= NULL
) {
1767 tid
= grp
->lg_tx_notify_thread
->t_did
;
1768 grp
->lg_tx_notify_done
= B_TRUE
;
1769 cv_signal(&grp
->lg_tx_flowctl_cv
);
1771 mutex_exit(&grp
->lg_tx_flowctl_lock
);
1775 mac_perim_enter_by_mh(grp
->lg_mh
, &mph
);
1777 grp
->lg_closing
= B_TRUE
;
1778 /* detach and free MAC ports associated with group */
1779 port
= grp
->lg_ports
;
1780 while (port
!= NULL
) {
1781 cport
= port
->lp_next
;
1782 mac_perim_enter_by_mh(port
->lp_mh
, &pmph
);
1783 if (grp
->lg_started
)
1784 aggr_port_stop(port
);
1785 (void) aggr_grp_detach_port(grp
, port
);
1786 mac_perim_exit(pmph
);
1787 aggr_rem_pseudo_tx_group(port
, &grp
->lg_tx_group
);
1788 aggr_rem_pseudo_rx_group(port
, &grp
->lg_rx_group
);
1789 aggr_port_delete(port
);
1793 mac_perim_exit(mph
);
1795 kmem_free(grp
->lg_tx_blocked_rings
,
1796 (sizeof (mac_ring_handle_t
*) * MAX_RINGS_PER_GROUP
));
1798 * Wait for the port's lacp timer thread and its notification callback
1799 * to exit before calling mac_unregister() since both needs to access
1800 * the mac perimeter of the grp.
1802 aggr_grp_port_wait(grp
);
1804 VERIFY(mac_unregister(grp
->lg_mh
) == 0);
1807 AGGR_GRP_REFRELE(grp
);
1812 aggr_grp_free(aggr_grp_t
*grp
)
1814 ASSERT(grp
->lg_refs
== 0);
1815 ASSERT(grp
->lg_port_ref
== 0);
1816 if (grp
->lg_key
> AGGR_MAX_KEY
) {
1817 id_free(key_ids
, grp
->lg_key
);
1820 kmem_cache_free(aggr_grp_cache
, grp
);
1824 aggr_grp_info(datalink_id_t linkid
, void *fn_arg
,
1825 aggr_grp_info_new_grp_fn_t new_grp_fn
,
1826 aggr_grp_info_new_port_fn_t new_port_fn
, cred_t
*cred
)
1830 mac_perim_handle_t mph
, pmph
;
1834 * Make sure that the aggregation link is visible from the caller's
1837 if (!dls_devnet_islinkvisible(linkid
, crgetzoneid(cred
)))
1840 rw_enter(&aggr_grp_lock
, RW_READER
);
1842 if (mod_hash_find(aggr_grp_hash
, GRP_HASH_KEY(linkid
),
1843 (mod_hash_val_t
*)&grp
) != 0) {
1844 rw_exit(&aggr_grp_lock
);
1847 AGGR_GRP_REFHOLD(grp
);
1849 mac_perim_enter_by_mh(grp
->lg_mh
, &mph
);
1850 rw_exit(&aggr_grp_lock
);
1852 rc
= new_grp_fn(fn_arg
, grp
->lg_linkid
,
1853 (grp
->lg_key
> AGGR_MAX_KEY
) ? 0 : grp
->lg_key
, grp
->lg_addr
,
1854 grp
->lg_addr_fixed
, grp
->lg_force
, grp
->lg_tx_policy
,
1855 grp
->lg_nports
, grp
->lg_lacp_mode
, grp
->aggr
.PeriodicTimer
);
1860 for (port
= grp
->lg_ports
; port
!= NULL
; port
= port
->lp_next
) {
1861 mac_perim_enter_by_mh(port
->lp_mh
, &pmph
);
1862 rc
= new_port_fn(fn_arg
, port
->lp_linkid
, port
->lp_addr
,
1863 port
->lp_state
, &port
->lp_lacp
.ActorOperPortState
);
1864 mac_perim_exit(pmph
);
1871 mac_perim_exit(mph
);
1872 AGGR_GRP_REFRELE(grp
);
1878 aggr_m_ioctl(void *arg
, queue_t
*q
, mblk_t
*mp
)
1880 miocnak(q
, mp
, 0, ENOTSUP
);
1884 aggr_grp_stat(aggr_grp_t
*grp
, uint_t stat
, uint64_t *val
)
1889 /* We only aggregate counter statistics. */
1890 if (IS_MAC_STAT(stat
) && !MAC_STAT_ISACOUNTER(stat
) ||
1891 IS_MACTYPE_STAT(stat
) && !ETHER_STAT_ISACOUNTER(stat
)) {
1896 * Counter statistics for a group are computed by aggregating the
1897 * counters of the members MACs while they were aggregated, plus
1898 * the residual counter of the group itself, which is updated each
1899 * time a MAC is removed from the group.
1902 for (port
= grp
->lg_ports
; port
!= NULL
; port
= port
->lp_next
) {
1903 /* actual port statistic */
1904 *val
+= aggr_port_stat(port
, stat
);
1906 * minus the port stat when it was added, plus any residual
1907 * amount for the group.
1909 if (IS_MAC_STAT(stat
)) {
1910 stat_index
= stat
- MAC_STAT_MIN
;
1911 *val
-= port
->lp_stat
[stat_index
];
1912 *val
+= grp
->lg_stat
[stat_index
];
1913 } else if (IS_MACTYPE_STAT(stat
)) {
1914 stat_index
= stat
- MACTYPE_STAT_MIN
;
1915 *val
-= port
->lp_ether_stat
[stat_index
];
1916 *val
+= grp
->lg_ether_stat
[stat_index
];
1923 aggr_rx_ring_stat(mac_ring_driver_t rdriver
, uint_t stat
, uint64_t *val
)
1925 aggr_pseudo_rx_ring_t
*rx_ring
= (aggr_pseudo_rx_ring_t
*)rdriver
;
1927 if (rx_ring
->arr_hw_rh
!= NULL
) {
1928 *val
= mac_pseudo_rx_ring_stat_get(rx_ring
->arr_hw_rh
, stat
);
1930 aggr_port_t
*port
= rx_ring
->arr_port
;
1932 *val
= mac_stat_get(port
->lp_mh
, stat
);
1939 aggr_tx_ring_stat(mac_ring_driver_t rdriver
, uint_t stat
, uint64_t *val
)
1941 aggr_pseudo_tx_ring_t
*tx_ring
= (aggr_pseudo_tx_ring_t
*)rdriver
;
1943 if (tx_ring
->atr_hw_rh
!= NULL
) {
1944 *val
= mac_pseudo_tx_ring_stat_get(tx_ring
->atr_hw_rh
, stat
);
1946 aggr_port_t
*port
= tx_ring
->atr_port
;
1948 *val
= mac_stat_get(port
->lp_mh
, stat
);
1954 aggr_m_stat(void *arg
, uint_t stat
, uint64_t *val
)
1956 aggr_grp_t
*grp
= arg
;
1957 mac_perim_handle_t mph
;
1960 mac_perim_enter_by_mh(grp
->lg_mh
, &mph
);
1963 case MAC_STAT_IFSPEED
:
1964 *val
= grp
->lg_ifspeed
;
1967 case ETHER_STAT_LINK_DUPLEX
:
1968 *val
= grp
->lg_link_duplex
;
1973 * For all other statistics, we return the aggregated stat
1974 * from the underlying ports. aggr_grp_stat() will set
1975 * rval appropriately if the statistic isn't a counter.
1977 rval
= aggr_grp_stat(grp
, stat
, val
);
1980 mac_perim_exit(mph
);
1985 aggr_m_start(void *arg
)
1987 aggr_grp_t
*grp
= arg
;
1989 mac_perim_handle_t mph
, pmph
;
1991 mac_perim_enter_by_mh(grp
->lg_mh
, &mph
);
1994 * Attempts to start all configured members of the group.
1995 * Group members will be attached when their link-up notification
1998 for (port
= grp
->lg_ports
; port
!= NULL
; port
= port
->lp_next
) {
1999 mac_perim_enter_by_mh(port
->lp_mh
, &pmph
);
2000 if (aggr_port_start(port
) != 0) {
2001 mac_perim_exit(pmph
);
2006 * Turn on the promiscuous mode if it is required to receive
2007 * the non-primary address over a port, or the promiscous
2008 * mode is enabled over the aggr.
2010 if (grp
->lg_promisc
|| port
->lp_prom_addr
!= NULL
) {
2011 if (aggr_port_promisc(port
, B_TRUE
) != 0)
2012 aggr_port_stop(port
);
2014 mac_perim_exit(pmph
);
2017 grp
->lg_started
= B_TRUE
;
2019 mac_perim_exit(mph
);
2024 aggr_m_stop(void *arg
)
2026 aggr_grp_t
*grp
= arg
;
2028 mac_perim_handle_t mph
, pmph
;
2030 mac_perim_enter_by_mh(grp
->lg_mh
, &mph
);
2032 for (port
= grp
->lg_ports
; port
!= NULL
; port
= port
->lp_next
) {
2033 mac_perim_enter_by_mh(port
->lp_mh
, &pmph
);
2035 /* reset port promiscuous mode */
2036 (void) aggr_port_promisc(port
, B_FALSE
);
2038 aggr_port_stop(port
);
2039 mac_perim_exit(pmph
);
2042 grp
->lg_started
= B_FALSE
;
2043 mac_perim_exit(mph
);
2047 aggr_m_promisc(void *arg
, boolean_t on
)
2049 aggr_grp_t
*grp
= arg
;
2051 boolean_t link_state_changed
= B_FALSE
;
2052 mac_perim_handle_t mph
, pmph
;
2054 AGGR_GRP_REFHOLD(grp
);
2055 mac_perim_enter_by_mh(grp
->lg_mh
, &mph
);
2057 ASSERT(!grp
->lg_closing
);
2059 if (on
== grp
->lg_promisc
)
2062 for (port
= grp
->lg_ports
; port
!= NULL
; port
= port
->lp_next
) {
2065 mac_perim_enter_by_mh(port
->lp_mh
, &pmph
);
2066 AGGR_PORT_REFHOLD(port
);
2067 if (!on
&& (port
->lp_prom_addr
== NULL
))
2068 err
= aggr_port_promisc(port
, B_FALSE
);
2069 else if (on
&& port
->lp_started
)
2070 err
= aggr_port_promisc(port
, B_TRUE
);
2073 if (aggr_grp_detach_port(grp
, port
))
2074 link_state_changed
= B_TRUE
;
2077 * If a port was detached because of a previous
2078 * failure changing the promiscuity, the port
2079 * is reattached when it successfully changes
2080 * the promiscuity now, and this might cause
2081 * the link state of the aggregation to change.
2083 if (aggr_grp_attach_port(grp
, port
))
2084 link_state_changed
= B_TRUE
;
2086 mac_perim_exit(pmph
);
2087 AGGR_PORT_REFRELE(port
);
2090 grp
->lg_promisc
= on
;
2092 if (link_state_changed
)
2093 mac_link_update(grp
->lg_mh
, grp
->lg_link_state
);
2096 mac_perim_exit(mph
);
2097 AGGR_GRP_REFRELE(grp
);
2103 aggr_grp_port_rename(const char *new_name
, void *arg
)
2106 * aggr port's mac client name is the format of "aggr link name" plus
2107 * AGGR_PORT_NAME_DELIMIT plus "underneath link name".
2109 int aggr_len
, link_len
, clnt_name_len
, i
;
2110 char *str_end
, *str_st
, *str_del
;
2111 char aggr_name
[MAXNAMELEN
];
2112 char link_name
[MAXNAMELEN
];
2114 aggr_grp_t
*aggr_grp
= arg
;
2115 aggr_port_t
*aggr_port
= aggr_grp
->lg_ports
;
2117 for (i
= 0; i
< aggr_grp
->lg_nports
; i
++) {
2118 clnt_name
= mac_client_name(aggr_port
->lp_mch
);
2119 clnt_name_len
= strlen(clnt_name
);
2121 str_end
= &(clnt_name
[clnt_name_len
]);
2122 str_del
= strchr(str_st
, AGGR_PORT_NAME_DELIMIT
);
2123 ASSERT(str_del
!= NULL
);
2124 aggr_len
= (intptr_t)((uintptr_t)str_del
- (uintptr_t)str_st
);
2125 link_len
= (intptr_t)((uintptr_t)str_end
- (uintptr_t)str_del
);
2126 bzero(aggr_name
, MAXNAMELEN
);
2127 bzero(link_name
, MAXNAMELEN
);
2128 bcopy(clnt_name
, aggr_name
, aggr_len
);
2129 bcopy(str_del
, link_name
, link_len
+ 1);
2130 bzero(clnt_name
, MAXNAMELEN
);
2131 (void) snprintf(clnt_name
, MAXNAMELEN
, "%s%s", new_name
,
2134 (void) mac_rename_primary(aggr_port
->lp_mh
, NULL
);
2135 aggr_port
= aggr_port
->lp_next
;
2140 * Initialize the capabilities that are advertised for the group
2141 * according to the capabilities of the constituent ports.
2144 aggr_m_capab_get(void *arg
, mac_capab_t cap
, void *cap_data
)
2146 aggr_grp_t
*grp
= arg
;
2149 case MAC_CAPAB_HCKSUM
: {
2150 uint32_t *hcksum_txflags
= cap_data
;
2151 *hcksum_txflags
= grp
->lg_hcksum_txflags
;
2154 case MAC_CAPAB_LSO
: {
2155 mac_capab_lso_t
*cap_lso
= cap_data
;
2158 *cap_lso
= grp
->lg_cap_lso
;
2164 case MAC_CAPAB_NO_NATIVEVLAN
:
2165 return (!grp
->lg_vlan
);
2166 case MAC_CAPAB_NO_ZCOPY
:
2167 return (!grp
->lg_zcopy
);
2168 case MAC_CAPAB_RINGS
: {
2169 mac_capab_rings_t
*cap_rings
= cap_data
;
2171 if (cap_rings
->mr_type
== MAC_RING_TYPE_RX
) {
2172 cap_rings
->mr_group_type
= MAC_GROUP_TYPE_STATIC
;
2173 cap_rings
->mr_rnum
= grp
->lg_rx_group
.arg_ring_cnt
;
2176 * An aggregation advertises only one (pseudo) RX
2177 * group, which virtualizes the main/primary group of
2178 * the underlying devices.
2180 cap_rings
->mr_gnum
= 1;
2181 cap_rings
->mr_gaddring
= NULL
;
2182 cap_rings
->mr_gremring
= NULL
;
2184 cap_rings
->mr_group_type
= MAC_GROUP_TYPE_STATIC
;
2185 cap_rings
->mr_rnum
= grp
->lg_tx_group
.atg_ring_cnt
;
2186 cap_rings
->mr_gnum
= 0;
2188 cap_rings
->mr_rget
= aggr_fill_ring
;
2189 cap_rings
->mr_gget
= aggr_fill_group
;
2192 case MAC_CAPAB_AGGR
:
2194 mac_capab_aggr_t
*aggr_cap
;
2196 if (cap_data
!= NULL
) {
2197 aggr_cap
= cap_data
;
2198 aggr_cap
->mca_rename_fn
= aggr_grp_port_rename
;
2199 aggr_cap
->mca_unicst
= aggr_m_unicst
;
2200 aggr_cap
->mca_find_tx_ring_fn
= aggr_find_tx_ring
;
2201 aggr_cap
->mca_arg
= arg
;
2212 * Callback funtion for MAC layer to register groups.
2215 aggr_fill_group(void *arg
, mac_ring_type_t rtype
, const int index
,
2216 mac_group_info_t
*infop
, mac_group_handle_t gh
)
2218 aggr_grp_t
*grp
= arg
;
2219 aggr_pseudo_rx_group_t
*rx_group
;
2220 aggr_pseudo_tx_group_t
*tx_group
;
2223 if (rtype
== MAC_RING_TYPE_RX
) {
2224 rx_group
= &grp
->lg_rx_group
;
2225 rx_group
->arg_gh
= gh
;
2226 rx_group
->arg_grp
= grp
;
2228 infop
->mgi_driver
= (mac_group_driver_t
)rx_group
;
2229 infop
->mgi_start
= NULL
;
2230 infop
->mgi_stop
= NULL
;
2231 infop
->mgi_addmac
= aggr_addmac
;
2232 infop
->mgi_remmac
= aggr_remmac
;
2233 infop
->mgi_count
= rx_group
->arg_ring_cnt
;
2235 tx_group
= &grp
->lg_tx_group
;
2236 tx_group
->atg_gh
= gh
;
2241 * Callback funtion for MAC layer to register all rings.
2244 aggr_fill_ring(void *arg
, mac_ring_type_t rtype
, const int rg_index
,
2245 const int index
, mac_ring_info_t
*infop
, mac_ring_handle_t rh
)
2247 aggr_grp_t
*grp
= arg
;
2250 case MAC_RING_TYPE_RX
: {
2251 aggr_pseudo_rx_group_t
*rx_group
= &grp
->lg_rx_group
;
2252 aggr_pseudo_rx_ring_t
*rx_ring
;
2253 mac_intr_t aggr_mac_intr
;
2255 ASSERT(rg_index
== 0);
2257 ASSERT((index
>= 0) && (index
< rx_group
->arg_ring_cnt
));
2258 rx_ring
= rx_group
->arg_rings
+ index
;
2259 rx_ring
->arr_rh
= rh
;
2262 * Entrypoint to enable interrupt (disable poll) and
2263 * disable interrupt (enable poll).
2265 aggr_mac_intr
.mi_handle
= (mac_intr_handle_t
)rx_ring
;
2266 aggr_mac_intr
.mi_enable
= aggr_pseudo_enable_intr
;
2267 aggr_mac_intr
.mi_disable
= aggr_pseudo_disable_intr
;
2268 aggr_mac_intr
.mi_ddi_handle
= NULL
;
2270 infop
->mri_driver
= (mac_ring_driver_t
)rx_ring
;
2271 infop
->mri_start
= aggr_pseudo_start_ring
;
2272 infop
->mri_stop
= aggr_pseudo_stop_ring
;
2274 infop
->mri_intr
= aggr_mac_intr
;
2275 infop
->mri_poll
= aggr_rx_poll
;
2277 infop
->mri_stat
= aggr_rx_ring_stat
;
2280 case MAC_RING_TYPE_TX
: {
2281 aggr_pseudo_tx_group_t
*tx_group
= &grp
->lg_tx_group
;
2282 aggr_pseudo_tx_ring_t
*tx_ring
;
2284 ASSERT(rg_index
== -1);
2285 ASSERT(index
< tx_group
->atg_ring_cnt
);
2287 tx_ring
= &tx_group
->atg_rings
[index
];
2288 tx_ring
->atr_rh
= rh
;
2290 infop
->mri_driver
= (mac_ring_driver_t
)tx_ring
;
2291 infop
->mri_start
= NULL
;
2292 infop
->mri_stop
= NULL
;
2293 infop
->mri_tx
= aggr_ring_tx
;
2294 infop
->mri_stat
= aggr_tx_ring_stat
;
2296 * Use the hw TX ring handle to find if the ring needs
2297 * serialization or not. For NICs that do not expose
2298 * Tx rings, atr_hw_rh will be NULL.
2300 if (tx_ring
->atr_hw_rh
!= NULL
) {
2302 mac_hwring_getinfo(tx_ring
->atr_hw_rh
);
2312 aggr_rx_poll(void *arg
, int bytes_to_pickup
)
2314 aggr_pseudo_rx_ring_t
*rr_ring
= arg
;
2315 aggr_port_t
*port
= rr_ring
->arr_port
;
2316 aggr_grp_t
*grp
= port
->lp_grp
;
2317 mblk_t
*mp_chain
, *mp
, **mpp
;
2319 mp_chain
= mac_hwring_poll(rr_ring
->arr_hw_rh
, bytes_to_pickup
);
2321 if (grp
->lg_lacp_mode
== AGGR_LACP_OFF
)
2325 while ((mp
= *mpp
) != NULL
) {
2326 if (MBLKL(mp
) >= sizeof (struct ether_header
)) {
2327 struct ether_header
*ehp
;
2329 ehp
= (struct ether_header
*)mp
->b_rptr
;
2330 if (ntohs(ehp
->ether_type
) == ETHERTYPE_SLOW
) {
2333 aggr_recv_lacp(port
,
2334 (mac_resource_handle_t
)rr_ring
, mp
);
2339 if (!port
->lp_collector_enabled
) {
2351 aggr_addmac(void *arg
, const uint8_t *mac_addr
)
2353 aggr_pseudo_rx_group_t
*rx_group
= (aggr_pseudo_rx_group_t
*)arg
;
2354 aggr_unicst_addr_t
*addr
, **pprev
;
2355 aggr_grp_t
*grp
= rx_group
->arg_grp
;
2356 aggr_port_t
*port
, *p
;
2357 mac_perim_handle_t mph
;
2360 mac_perim_enter_by_mh(grp
->lg_mh
, &mph
);
2362 if (bcmp(mac_addr
, grp
->lg_addr
, ETHERADDRL
) == 0) {
2363 mac_perim_exit(mph
);
2368 * Insert this mac address into the list of mac addresses owned by
2369 * the aggregation pseudo group.
2371 pprev
= &rx_group
->arg_macaddr
;
2372 while ((addr
= *pprev
) != NULL
) {
2373 if (bcmp(mac_addr
, addr
->aua_addr
, ETHERADDRL
) == 0) {
2374 mac_perim_exit(mph
);
2377 pprev
= &addr
->aua_next
;
2379 addr
= kmem_alloc(sizeof (aggr_unicst_addr_t
), KM_SLEEP
);
2380 bcopy(mac_addr
, addr
->aua_addr
, ETHERADDRL
);
2381 addr
->aua_next
= NULL
;
2384 for (port
= grp
->lg_ports
; port
!= NULL
; port
= port
->lp_next
)
2385 if ((err
= aggr_port_addmac(port
, mac_addr
)) != 0)
2389 for (p
= grp
->lg_ports
; p
!= port
; p
= p
->lp_next
)
2390 aggr_port_remmac(p
, mac_addr
);
2393 kmem_free(addr
, sizeof (aggr_unicst_addr_t
));
2396 mac_perim_exit(mph
);
2401 aggr_remmac(void *arg
, const uint8_t *mac_addr
)
2403 aggr_pseudo_rx_group_t
*rx_group
= (aggr_pseudo_rx_group_t
*)arg
;
2404 aggr_unicst_addr_t
*addr
, **pprev
;
2405 aggr_grp_t
*grp
= rx_group
->arg_grp
;
2407 mac_perim_handle_t mph
;
2410 mac_perim_enter_by_mh(grp
->lg_mh
, &mph
);
2412 if (bcmp(mac_addr
, grp
->lg_addr
, ETHERADDRL
) == 0) {
2413 mac_perim_exit(mph
);
2418 * Insert this mac address into the list of mac addresses owned by
2419 * the aggregation pseudo group.
2421 pprev
= &rx_group
->arg_macaddr
;
2422 while ((addr
= *pprev
) != NULL
) {
2423 if (bcmp(mac_addr
, addr
->aua_addr
, ETHERADDRL
) != 0) {
2424 pprev
= &addr
->aua_next
;
2430 mac_perim_exit(mph
);
2434 for (port
= grp
->lg_ports
; port
!= NULL
; port
= port
->lp_next
)
2435 aggr_port_remmac(port
, mac_addr
);
2437 *pprev
= addr
->aua_next
;
2438 kmem_free(addr
, sizeof (aggr_unicst_addr_t
));
2440 mac_perim_exit(mph
);
2445 * Add or remove the multicast addresses that are defined for the group
2446 * to or from the specified port.
2448 * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port
2449 * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is
2450 * called when the port is either stopped or detached.
2453 aggr_grp_multicst_port(aggr_port_t
*port
, boolean_t add
)
2455 aggr_grp_t
*grp
= port
->lp_grp
;
2457 ASSERT(MAC_PERIM_HELD(port
->lp_mh
));
2458 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
2460 if (!port
->lp_started
|| port
->lp_state
!= AGGR_PORT_STATE_ATTACHED
)
2463 mac_multicast_refresh(grp
->lg_mh
, aggr_port_multicst
, port
, add
);
2467 aggr_m_multicst(void *arg
, boolean_t add
, const uint8_t *addrp
)
2469 aggr_grp_t
*grp
= arg
;
2470 aggr_port_t
*port
= NULL
, *errport
= NULL
;
2471 mac_perim_handle_t mph
;
2474 mac_perim_enter_by_mh(grp
->lg_mh
, &mph
);
2475 for (port
= grp
->lg_ports
; port
!= NULL
; port
= port
->lp_next
) {
2476 if (port
->lp_state
!= AGGR_PORT_STATE_ATTACHED
||
2477 !port
->lp_started
) {
2480 err
= aggr_port_multicst(port
, add
, addrp
);
2488 * At least one port caused error return and this error is returned to
2489 * mac, eventually a NAK would be sent upwards.
2490 * Some ports have this multicast address listed now, and some don't.
2491 * Treat this error as a whole aggr failure not individual port failure.
2492 * Therefore remove this multicast address from other ports.
2494 if ((err
!= 0) && add
) {
2495 for (port
= grp
->lg_ports
; port
!= errport
;
2496 port
= port
->lp_next
) {
2497 if (port
->lp_state
!= AGGR_PORT_STATE_ATTACHED
||
2498 !port
->lp_started
) {
2501 (void) aggr_port_multicst(port
, B_FALSE
, addrp
);
2504 mac_perim_exit(mph
);
2509 aggr_m_unicst(void *arg
, const uint8_t *macaddr
)
2511 aggr_grp_t
*grp
= arg
;
2512 mac_perim_handle_t mph
;
2515 mac_perim_enter_by_mh(grp
->lg_mh
, &mph
);
2516 err
= aggr_grp_modify_common(grp
, AGGR_MODIFY_MAC
, 0, B_TRUE
, macaddr
,
2518 mac_perim_exit(mph
);
2523 * Initialize the capabilities that are advertised for the group
2524 * according to the capabilities of the constituent ports.
2527 aggr_grp_capab_set(aggr_grp_t
*grp
)
2531 mac_capab_lso_t cap_lso
;
2533 ASSERT(grp
->lg_mh
== NULL
);
2534 ASSERT(grp
->lg_ports
!= NULL
);
2536 grp
->lg_hcksum_txflags
= (uint32_t)-1;
2537 grp
->lg_zcopy
= B_TRUE
;
2538 grp
->lg_vlan
= B_TRUE
;
2540 grp
->lg_lso
= B_TRUE
;
2541 grp
->lg_cap_lso
.lso_flags
= (t_uscalar_t
)-1;
2542 grp
->lg_cap_lso
.lso_basic_tcp_ipv4
.lso_max
= (t_uscalar_t
)-1;
2544 for (port
= grp
->lg_ports
; port
!= NULL
; port
= port
->lp_next
) {
2545 if (!mac_capab_get(port
->lp_mh
, MAC_CAPAB_HCKSUM
, &cksum
))
2547 grp
->lg_hcksum_txflags
&= cksum
;
2550 !mac_capab_get(port
->lp_mh
, MAC_CAPAB_NO_NATIVEVLAN
, NULL
);
2553 !mac_capab_get(port
->lp_mh
, MAC_CAPAB_NO_ZCOPY
, NULL
);
2556 mac_capab_get(port
->lp_mh
, MAC_CAPAB_LSO
, &cap_lso
);
2558 grp
->lg_cap_lso
.lso_flags
&= cap_lso
.lso_flags
;
2559 if (grp
->lg_cap_lso
.lso_basic_tcp_ipv4
.lso_max
>
2560 cap_lso
.lso_basic_tcp_ipv4
.lso_max
)
2561 grp
->lg_cap_lso
.lso_basic_tcp_ipv4
.lso_max
=
2562 cap_lso
.lso_basic_tcp_ipv4
.lso_max
;
2568 * Checks whether the capabilities of the port being added are compatible
2569 * with the current capabilities of the aggregation.
2572 aggr_grp_capab_check(aggr_grp_t
*grp
, aggr_port_t
*port
)
2574 uint32_t hcksum_txflags
;
2576 ASSERT(grp
->lg_ports
!= NULL
);
2578 if (((!mac_capab_get(port
->lp_mh
, MAC_CAPAB_NO_NATIVEVLAN
, NULL
)) &
2579 grp
->lg_vlan
) != grp
->lg_vlan
) {
2583 if (((!mac_capab_get(port
->lp_mh
, MAC_CAPAB_NO_ZCOPY
, NULL
)) &
2584 grp
->lg_zcopy
) != grp
->lg_zcopy
) {
2588 if (!mac_capab_get(port
->lp_mh
, MAC_CAPAB_HCKSUM
, &hcksum_txflags
)) {
2589 if (grp
->lg_hcksum_txflags
!= 0)
2591 } else if ((hcksum_txflags
& grp
->lg_hcksum_txflags
) !=
2592 grp
->lg_hcksum_txflags
) {
2597 mac_capab_lso_t cap_lso
;
2599 if (mac_capab_get(port
->lp_mh
, MAC_CAPAB_LSO
, &cap_lso
)) {
2600 if ((grp
->lg_cap_lso
.lso_flags
& cap_lso
.lso_flags
) !=
2601 grp
->lg_cap_lso
.lso_flags
)
2603 if (grp
->lg_cap_lso
.lso_basic_tcp_ipv4
.lso_max
>
2604 cap_lso
.lso_basic_tcp_ipv4
.lso_max
)
2615 * Returns the maximum SDU according to the SDU of the constituent ports.
2618 aggr_grp_max_sdu(aggr_grp_t
*grp
)
2620 uint_t max_sdu
= (uint_t
)-1;
2623 ASSERT(grp
->lg_ports
!= NULL
);
2625 for (port
= grp
->lg_ports
; port
!= NULL
; port
= port
->lp_next
) {
2626 uint_t port_sdu_max
;
2628 mac_sdu_get(port
->lp_mh
, NULL
, &port_sdu_max
);
2629 if (max_sdu
> port_sdu_max
)
2630 max_sdu
= port_sdu_max
;
2637 * Checks if the maximum SDU of the specified port is compatible
2638 * with the maximum SDU of the specified aggregation group, returns
2639 * B_TRUE if it is, B_FALSE otherwise.
2642 aggr_grp_sdu_check(aggr_grp_t
*grp
, aggr_port_t
*port
)
2644 uint_t port_sdu_max
;
2646 mac_sdu_get(port
->lp_mh
, NULL
, &port_sdu_max
);
2647 return (port_sdu_max
>= grp
->lg_max_sdu
);
2651 * Returns the maximum margin according to the margin of the constituent ports.
2654 aggr_grp_max_margin(aggr_grp_t
*grp
)
2656 uint32_t margin
= UINT32_MAX
;
2659 ASSERT(grp
->lg_mh
== NULL
);
2660 ASSERT(grp
->lg_ports
!= NULL
);
2662 for (port
= grp
->lg_ports
; port
!= NULL
; port
= port
->lp_next
) {
2663 if (margin
> port
->lp_margin
)
2664 margin
= port
->lp_margin
;
2667 grp
->lg_margin
= margin
;
2672 * Checks if the maximum margin of the specified port is compatible
2673 * with the maximum margin of the specified aggregation group, returns
2674 * B_TRUE if it is, B_FALSE otherwise.
2677 aggr_grp_margin_check(aggr_grp_t
*grp
, aggr_port_t
*port
)
2679 if (port
->lp_margin
>= grp
->lg_margin
)
2683 * See whether the current margin value is allowed to be changed to
2686 if (!mac_margin_update(grp
->lg_mh
, port
->lp_margin
))
2689 grp
->lg_margin
= port
->lp_margin
;
2694 * Set MTU on individual ports of an aggregation group
2697 aggr_set_port_sdu(aggr_grp_t
*grp
, aggr_port_t
*port
, uint32_t sdu
,
2700 boolean_t removed
= B_FALSE
;
2701 mac_perim_handle_t mph
;
2703 int err
, rv
, retry
= 0;
2705 if (port
->lp_mah
!= NULL
) {
2706 (void) mac_unicast_remove(port
->lp_mch
, port
->lp_mah
);
2707 port
->lp_mah
= NULL
;
2710 err
= mac_set_mtu(port
->lp_mh
, sdu
, old_mtu
);
2712 if (removed
&& (rv
= mac_unicast_add(port
->lp_mch
, NULL
,
2713 MAC_UNICAST_PRIMARY
| MAC_UNICAST_DISABLE_TX_VID_CHECK
,
2714 &port
->lp_mah
, 0, &diag
)) != 0) {
2716 * following is a workaround for a bug in 'bge' driver.
2717 * See CR 6794654 for more information and this work around
2718 * will be removed once the CR is fixed.
2720 if (rv
== EIO
&& retry
++ < 3) {
2725 * if mac_unicast_add() failed while setting the MTU,
2726 * detach the port from the group.
2728 mac_perim_enter_by_mh(port
->lp_mh
, &mph
);
2729 (void) aggr_grp_detach_port(grp
, port
);
2730 mac_perim_exit(mph
);
2731 cmn_err(CE_WARN
, "Unable to restart the port %s while "
2732 "setting MTU. Detaching the port from the aggregation.",
2733 mac_client_name(port
->lp_mch
));
2739 aggr_sdu_update(aggr_grp_t
*grp
, uint32_t sdu
)
2745 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
2748 * If the MTU being set is equal to aggr group's maximum
2749 * allowable value, then there is nothing to change
2751 if (sdu
== grp
->lg_max_sdu
)
2754 /* 0 is aggr group's min sdu */
2758 mtu
= kmem_alloc(sizeof (uint32_t) * grp
->lg_nports
, KM_SLEEP
);
2759 for (port
= grp
->lg_ports
, i
= 0; port
!= NULL
&& err
== 0;
2760 port
= port
->lp_next
, i
++) {
2761 err
= aggr_set_port_sdu(grp
, port
, sdu
, mtu
+ i
);
2764 /* recover from error: reset the mtus of the ports */
2767 for (tmp
= grp
->lg_ports
, i
= 0; tmp
!= port
;
2768 tmp
= tmp
->lp_next
, i
++) {
2769 (void) aggr_set_port_sdu(grp
, tmp
, *(mtu
+ i
), NULL
);
2773 grp
->lg_max_sdu
= aggr_grp_max_sdu(grp
);
2774 rv
= mac_maxsdu_update(grp
->lg_mh
, grp
->lg_max_sdu
);
2777 kmem_free(mtu
, sizeof (uint32_t) * grp
->lg_nports
);
2782 * Callback functions for set/get of properties
2786 aggr_m_setprop(void *m_driver
, const char *pr_name
, mac_prop_id_t pr_num
,
2787 uint_t pr_valsize
, const void *pr_val
)
2790 aggr_grp_t
*grp
= m_driver
;
2793 case MAC_PROP_MTU
: {
2796 if (pr_valsize
< sizeof (mtu
)) {
2800 bcopy(pr_val
, &mtu
, sizeof (mtu
));
2801 err
= aggr_sdu_update(grp
, mtu
);
2810 typedef struct rboundary
{
2816 * This function finds the intersection of mtu ranges stored in arrays -
2817 * mrange[0] ... mrange[mcount -1]. It returns the intersection in rval.
2818 * Individual arrays are assumed to contain non-overlapping ranges.
2820 * A range has two boundaries - min and max. We scan all arrays and store
2821 * each boundary as a separate element in a temporary array. We also store
2822 * the boundary types, min or max, as +1 or -1 respectively in the temporary
2823 * array. Then we sort the temporary array in ascending order. We scan the
2824 * sorted array from lower to higher values and keep a cumulative sum of
2825 * boundary types. Element in the temporary array for which the sum reaches
2826 * mcount is a min boundary of a range in the result and next element will be
2829 * Example for mcount = 3,
2831 * ----|_________|-------|_______|----|__|------ mrange[0]
2833 * -------|________|--|____________|-----|___|-- mrange[1]
2835 * --------|________________|-------|____|------ mrange[2]
2839 * 1 23 2 1 2 3 2 1 01 2 V 0 <- the sum
2840 * ----|--||-----|-|--|--|--|----|-||-|--|---|-- sorted array
2844 * --------|_____|-------|__|------------|------ intersecting ranges
2847 aggr_mtu_range_intersection(mac_propval_range_t
**mrange
, int mcount
,
2848 mac_propval_uint32_range_t
**prval
, int *prmaxcnt
, int *prcount
)
2850 mac_propval_uint32_range_t
*rval
, *ur
;
2851 int rmaxcnt
, rcount
;
2853 rboundary_t
*ta
; /* temporary array */
2855 boolean_t range_started
= B_FALSE
;
2858 sz_range32
= sizeof (mac_propval_uint32_range_t
);
2860 for (i
= 0, rmaxcnt
= 0; i
< mcount
; i
++)
2861 rmaxcnt
+= mrange
[i
]->mpr_count
;
2863 /* Allocate enough space to store the results */
2864 rval
= kmem_alloc(rmaxcnt
* sz_range32
, KM_SLEEP
);
2866 /* Number of boundaries are twice as many as ranges */
2867 ta
= kmem_alloc(2 * rmaxcnt
* sizeof (rboundary_t
), KM_SLEEP
);
2869 for (i
= 0, m
= 0; i
< mcount
; i
++) {
2870 ur
= &(mrange
[i
]->mpr_range_uint32
[0]);
2871 for (j
= 0; j
< mrange
[i
]->mpr_count
; j
++) {
2872 ta
[m
].bval
= ur
[j
].mpur_min
;
2874 ta
[m
].bval
= ur
[j
].mpur_max
;
2880 * Sort the temporary array in ascending order of bval;
2881 * if boundary values are same then sort on btype.
2883 for (i
= 0; i
< m
-1; i
++) {
2884 for (j
= i
+1; j
< m
; j
++) {
2885 if ((ta
[i
].bval
> ta
[j
].bval
) ||
2886 ((ta
[i
].bval
== ta
[j
].bval
) &&
2887 (ta
[i
].btype
< ta
[j
].btype
))) {
2895 /* Walk through temporary array to find all ranges in the results */
2896 for (i
= 0, sum
= 0, rcount
= 0; i
< m
; i
++) {
2898 if (sum
== mcount
) {
2899 rval
[rcount
].mpur_min
= ta
[i
].bval
;
2900 range_started
= B_TRUE
;
2901 } else if (sum
< mcount
&& range_started
) {
2902 rval
[rcount
++].mpur_max
= ta
[i
].bval
;
2903 range_started
= B_FALSE
;
2908 *prmaxcnt
= rmaxcnt
;
2911 kmem_free(ta
, 2 * rmaxcnt
* sizeof (rboundary_t
));
2915 * Returns the mtu ranges which could be supported by aggr group.
2916 * prmaxcnt returns the size of the buffer prval, prcount returns
2917 * the number of valid entries in prval. Caller is responsible
2918 * for freeing up prval.
2921 aggr_grp_possible_mtu_range(aggr_grp_t
*grp
, mac_propval_uint32_range_t
**prval
,
2922 int *prmaxcnt
, int *prcount
)
2924 mac_propval_range_t
**vals
;
2926 mac_perim_handle_t mph
;
2929 size_t sz_propval
, sz_range32
;
2932 sz_propval
= sizeof (mac_propval_range_t
);
2933 sz_range32
= sizeof (mac_propval_uint32_range_t
);
2935 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
2937 vals
= kmem_zalloc(sizeof (mac_propval_range_t
*) * grp
->lg_nports
,
2940 for (port
= grp
->lg_ports
, i
= 0; port
!= NULL
;
2941 port
= port
->lp_next
, i
++) {
2944 vals
[i
] = kmem_alloc(size
, KM_SLEEP
);
2945 vals
[i
]->mpr_count
= 1;
2947 mac_perim_enter_by_mh(port
->lp_mh
, &mph
);
2949 err
= mac_prop_info(port
->lp_mh
, MAC_PROP_MTU
, NULL
,
2950 NULL
, 0, vals
[i
], NULL
);
2951 if (err
== ENOSPC
) {
2953 * Not enough space to hold all ranges.
2954 * Allocate extra space as indicated and retry.
2956 numr
= vals
[i
]->mpr_count
;
2957 kmem_free(vals
[i
], sz_propval
);
2958 size
= sz_propval
+ (numr
- 1) * sz_range32
;
2959 vals
[i
] = kmem_alloc(size
, KM_SLEEP
);
2960 vals
[i
]->mpr_count
= numr
;
2961 err
= mac_prop_info(port
->lp_mh
, MAC_PROP_MTU
, NULL
,
2962 NULL
, 0, vals
[i
], NULL
);
2963 ASSERT(err
!= ENOSPC
);
2965 mac_perim_exit(mph
);
2967 kmem_free(vals
[i
], size
);
2974 * if any of the underlying ports does not support changing MTU then
2975 * just return ENOTSUP
2982 aggr_mtu_range_intersection(vals
, grp
->lg_nports
, prval
, prmaxcnt
,
2986 for (i
= 0; i
< grp
->lg_nports
; i
++) {
2987 if (vals
[i
] != NULL
) {
2988 numr
= vals
[i
]->mpr_count
;
2989 size
= sz_propval
+ (numr
- 1) * sz_range32
;
2990 kmem_free(vals
[i
], size
);
2994 kmem_free(vals
, sizeof (mac_propval_range_t
*) * grp
->lg_nports
);
2999 aggr_m_propinfo(void *m_driver
, const char *pr_name
, mac_prop_id_t pr_num
,
3000 mac_prop_info_handle_t prh
)
3002 aggr_grp_t
*grp
= m_driver
;
3003 mac_propval_uint32_range_t
*rval
= NULL
;
3004 int i
, rcount
, rmaxcnt
;
3007 _NOTE(ARGUNUSED(pr_name
));
3012 err
= aggr_grp_possible_mtu_range(grp
, &rval
, &rmaxcnt
,
3015 ASSERT(rval
== NULL
);
3018 for (i
= 0; i
< rcount
; i
++) {
3019 mac_prop_info_set_range_uint32(prh
,
3020 rval
[i
].mpur_min
, rval
[i
].mpur_max
);
3022 kmem_free(rval
, sizeof (mac_propval_uint32_range_t
) * rmaxcnt
);