4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * IEEE 802.3ad Link Aggregation - Link Aggregation MAC ports.
29 * Implements the functions needed to manage the MAC ports that are
30 * part of Link Aggregation groups.
33 #include <sys/types.h>
34 #include <sys/sysmacros.h>
36 #include <sys/cmn_err.h>
37 #include <sys/id_space.h>
39 #include <sys/ksynch.h>
41 #include <sys/stream.h>
42 #include <sys/modctl.h>
44 #include <sys/sunddi.h>
45 #include <sys/atomic.h>
51 #include <sys/aggr_impl.h>
53 static kmem_cache_t
*aggr_port_cache
;
54 static id_space_t
*aggr_portids
;
56 static void aggr_port_notify_cb(void *, mac_notify_type_t
);
60 aggr_port_constructor(void *buf
, void *arg
, int kmflag
)
62 bzero(buf
, sizeof (aggr_port_t
));
68 aggr_port_destructor(void *buf
, void *arg
)
70 aggr_port_t
*port
= buf
;
72 ASSERT(port
->lp_mnh
== NULL
);
73 ASSERT(port
->lp_mphp
== NULL
);
74 ASSERT(!port
->lp_rx_grp_added
&& !port
->lp_tx_grp_added
);
75 ASSERT(port
->lp_hwgh
== NULL
);
81 aggr_port_cache
= kmem_cache_create("aggr_port_cache",
82 sizeof (aggr_port_t
), 0, aggr_port_constructor
,
83 aggr_port_destructor
, NULL
, NULL
, NULL
, 0);
86 * Allocate a id space to manage port identification. The range of
87 * the arena will be from 1 to UINT16_MAX, because the LACP protocol
88 * specifies 16-bit unique identification.
90 aggr_portids
= id_space_create("aggr_portids", 1, UINT16_MAX
);
91 ASSERT(aggr_portids
!= NULL
);
98 * This function is called only after all groups have been
99 * freed. This ensures that there are no remaining allocated
100 * ports when this function is invoked.
102 kmem_cache_destroy(aggr_port_cache
);
103 id_space_destroy(aggr_portids
);
108 aggr_port_init_callbacks(aggr_port_t
*port
)
110 /* add the port's receive callback */
111 port
->lp_mnh
= mac_notify_add(port
->lp_mh
, aggr_port_notify_cb
, port
);
113 * Hold a reference of the grp and the port and this reference will
114 * be released when the thread exits.
116 * The reference on the port is used for aggr_port_delete() to
117 * continue without waiting for the thread to exit; the reference
118 * on the grp is used for aggr_grp_delete() to wait for the thread
119 * to exit before calling mac_unregister().
121 * Note that these references will be released either in
122 * aggr_port_delete() when mac_notify_remove() succeeds, or in
123 * the aggr_port_notify_cb() callback when the port is deleted
124 * (lp_closing is set).
126 aggr_grp_port_hold(port
);
131 aggr_port_create(aggr_grp_t
*grp
, const datalink_id_t linkid
, boolean_t force
,
136 mac_client_handle_t mch
= NULL
;
140 boolean_t no_link_update
= B_FALSE
;
141 const mac_info_t
*mip
;
144 char client_name
[MAXNAMELEN
];
145 char aggr_name
[MAXNAMELEN
];
146 char port_name
[MAXNAMELEN
];
148 mac_unicast_handle_t mah
;
152 if ((err
= mac_open_by_linkid(linkid
, &mh
)) != 0)
156 if (mip
->mi_media
!= DL_ETHER
|| mip
->mi_nativemedia
!= DL_ETHER
) {
162 * If the underlying MAC does not support link update notification, it
163 * can only be aggregated if `force' is set. This is because aggr
164 * depends on link notifications to attach ports whose link is up.
166 note
= mac_no_notification(mh
);
167 if ((note
& (DL_NOTE_LINK_UP
| DL_NOTE_LINK_DOWN
)) != 0) {
168 no_link_update
= B_TRUE
;
171 * We borrow this error code to indicate that link
172 * notification is not supported.
179 if (((err
= dls_mgmt_get_linkinfo(grp
->lg_linkid
,
180 aggr_name
, NULL
, NULL
, NULL
)) != 0) ||
181 ((err
= dls_mgmt_get_linkinfo(linkid
, port_name
,
182 NULL
, NULL
, NULL
)) != 0)) {
186 (void) snprintf(client_name
, MAXNAMELEN
, "%s-%s", aggr_name
, port_name
);
187 if ((err
= mac_client_open(mh
, &mch
, client_name
,
188 MAC_OPEN_FLAGS_IS_AGGR_PORT
| MAC_OPEN_FLAGS_EXCLUSIVE
)) != 0) {
192 if ((portid
= (uint16_t)id_alloc(aggr_portids
)) == 0) {
198 * As the underlying mac's current margin size is used to determine
199 * the margin size of the aggregation itself, request the underlying
200 * mac not to change to a smaller size.
202 if ((err
= mac_margin_add(mh
, &margin
, B_TRUE
)) != 0) {
203 id_free(aggr_portids
, portid
);
207 if ((err
= mac_unicast_add(mch
, NULL
, MAC_UNICAST_PRIMARY
|
208 MAC_UNICAST_DISABLE_TX_VID_CHECK
, &mah
, 0, &diag
)) != 0) {
209 VERIFY(mac_margin_remove(mh
, margin
) == 0);
210 id_free(aggr_portids
, portid
);
214 port
= kmem_cache_alloc(aggr_port_cache
, KM_SLEEP
);
217 port
->lp_next
= NULL
;
221 port
->lp_linkid
= linkid
;
222 port
->lp_closing
= B_FALSE
;
225 /* get the port's original MAC address */
226 mac_unicast_primary_get(port
->lp_mh
, port
->lp_addr
);
228 /* initialize state */
229 port
->lp_state
= AGGR_PORT_STATE_STANDBY
;
230 port
->lp_link_state
= LINK_STATE_UNKNOWN
;
231 port
->lp_ifspeed
= 0;
232 port
->lp_link_duplex
= LINK_DUPLEX_UNKNOWN
;
233 port
->lp_started
= B_FALSE
;
234 port
->lp_tx_enabled
= B_FALSE
;
235 port
->lp_promisc_on
= B_FALSE
;
236 port
->lp_no_link_update
= no_link_update
;
237 port
->lp_portid
= portid
;
238 port
->lp_margin
= margin
;
239 port
->lp_prom_addr
= NULL
;
242 * Save the current statistics of the port. They will be used
243 * later by aggr_m_stats() when aggregating the statistics of
244 * the constituent ports.
246 for (i
= 0; i
< MAC_NSTAT
; i
++) {
248 aggr_port_stat(port
, i
+ MAC_STAT_MIN
);
250 for (i
= 0; i
< ETHER_NSTAT
; i
++) {
251 port
->lp_ether_stat
[i
] =
252 aggr_port_stat(port
, i
+ MACTYPE_STAT_MIN
);
255 /* LACP related state */
256 port
->lp_collector_enabled
= B_FALSE
;
263 mac_client_close(mch
, MAC_CLOSE_FLAGS_EXCLUSIVE
);
269 aggr_port_delete(aggr_port_t
*port
)
271 aggr_lacp_port_t
*pl
= &port
->lp_lacp
;
273 ASSERT(port
->lp_mphp
== NULL
);
274 ASSERT(!port
->lp_promisc_on
);
276 port
->lp_closing
= B_TRUE
;
278 VERIFY(mac_margin_remove(port
->lp_mh
, port
->lp_margin
) == 0);
279 mac_rx_clear(port
->lp_mch
);
281 * If the notification callback is already in process and waiting for
282 * the aggr grp's mac perimeter, don't wait (otherwise there would be
283 * deadlock). Otherwise, if mac_notify_remove() succeeds, we can
284 * release the reference held when mac_notify_add() is called.
286 if ((port
->lp_mnh
!= NULL
) &&
287 (mac_notify_remove(port
->lp_mnh
, B_FALSE
) == 0)) {
288 aggr_grp_port_rele(port
);
293 * Inform the the port lacp timer thread to exit. Note that waiting
294 * for the thread to exit may cause deadlock since that thread may
295 * need to enter into the mac perimeter which we are currently in.
296 * It is fine to continue without waiting though since that thread
297 * is holding a reference of the port.
299 mutex_enter(&pl
->lacp_timer_lock
);
300 pl
->lacp_timer_bits
|= LACP_THREAD_EXIT
;
301 cv_broadcast(&pl
->lacp_timer_cv
);
302 mutex_exit(&pl
->lacp_timer_lock
);
305 * Restore the port MAC address. Note it is called after the
306 * port's notification callback being removed. This prevent
307 * port's MAC_NOTE_UNICST notify callback function being called.
309 (void) mac_unicast_primary_set(port
->lp_mh
, port
->lp_addr
);
310 if (port
->lp_mah
!= NULL
)
311 (void) mac_unicast_remove(port
->lp_mch
, port
->lp_mah
);
312 mac_client_close(port
->lp_mch
, MAC_CLOSE_FLAGS_EXCLUSIVE
);
313 mac_close(port
->lp_mh
);
314 AGGR_PORT_REFRELE(port
);
318 aggr_port_free(aggr_port_t
*port
)
320 ASSERT(port
->lp_refs
== 0);
321 if (port
->lp_grp
!= NULL
)
322 AGGR_GRP_REFRELE(port
->lp_grp
);
324 id_free(aggr_portids
, port
->lp_portid
);
326 mutex_destroy(&port
->lp_lacp
.lacp_timer_lock
);
327 cv_destroy(&port
->lp_lacp
.lacp_timer_cv
);
328 kmem_cache_free(aggr_port_cache
, port
);
332 * Invoked upon receiving a MAC_NOTE_LINK notification for
333 * one of the constituent ports.
336 aggr_port_notify_link(aggr_grp_t
*grp
, aggr_port_t
*port
)
338 boolean_t do_attach
= B_FALSE
;
339 boolean_t do_detach
= B_FALSE
;
340 boolean_t link_state_changed
= B_TRUE
;
342 link_state_t link_state
;
343 link_duplex_t link_duplex
;
344 mac_perim_handle_t mph
;
346 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
347 mac_perim_enter_by_mh(port
->lp_mh
, &mph
);
350 * link state change? For links that do not support link state
351 * notification, always assume the link is up.
353 link_state
= port
->lp_no_link_update
? LINK_STATE_UP
:
354 mac_link_get(port
->lp_mh
);
355 if (port
->lp_link_state
!= link_state
) {
356 if (link_state
== LINK_STATE_UP
)
357 do_attach
= (port
->lp_link_state
!= LINK_STATE_UP
);
359 do_detach
= (port
->lp_link_state
== LINK_STATE_UP
);
361 port
->lp_link_state
= link_state
;
363 /* link duplex change? */
364 link_duplex
= aggr_port_stat(port
, ETHER_STAT_LINK_DUPLEX
);
365 if (port
->lp_link_duplex
!= link_duplex
) {
366 if (link_duplex
== LINK_DUPLEX_FULL
)
367 do_attach
|= (port
->lp_link_duplex
!= LINK_DUPLEX_FULL
);
369 do_detach
|= (port
->lp_link_duplex
== LINK_DUPLEX_FULL
);
371 port
->lp_link_duplex
= link_duplex
;
373 /* link speed changes? */
374 ifspeed
= aggr_port_stat(port
, MAC_STAT_IFSPEED
);
375 if (port
->lp_ifspeed
!= ifspeed
) {
376 if (port
->lp_state
== AGGR_PORT_STATE_ATTACHED
)
377 do_detach
|= (ifspeed
!= grp
->lg_ifspeed
);
379 do_attach
|= (ifspeed
== grp
->lg_ifspeed
);
381 port
->lp_ifspeed
= ifspeed
;
384 /* attempt to attach the port to the aggregation */
385 link_state_changed
= aggr_grp_attach_port(grp
, port
);
386 } else if (do_detach
) {
387 /* detach the port from the aggregation */
388 link_state_changed
= aggr_grp_detach_port(grp
, port
);
392 return (link_state_changed
);
396 * Invoked upon receiving a MAC_NOTE_UNICST for one of the constituent
400 aggr_port_notify_unicst(aggr_grp_t
*grp
, aggr_port_t
*port
,
401 boolean_t
*mac_addr_changedp
, boolean_t
*link_state_changedp
)
403 boolean_t mac_addr_changed
= B_FALSE
;
404 boolean_t link_state_changed
= B_FALSE
;
405 uint8_t mac_addr
[ETHERADDRL
];
406 mac_perim_handle_t mph
;
408 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
409 ASSERT(mac_addr_changedp
!= NULL
);
410 ASSERT(link_state_changedp
!= NULL
);
411 mac_perim_enter_by_mh(port
->lp_mh
, &mph
);
414 * If it is called when setting the MAC address to the
415 * aggregation group MAC address, do nothing.
417 mac_unicast_primary_get(port
->lp_mh
, mac_addr
);
418 if (bcmp(mac_addr
, grp
->lg_addr
, ETHERADDRL
) == 0) {
423 /* save the new port MAC address */
424 bcopy(mac_addr
, port
->lp_addr
, ETHERADDRL
);
426 aggr_grp_port_mac_changed(grp
, port
, &mac_addr_changed
,
427 &link_state_changed
);
432 * If this port was used to determine the MAC address of
433 * the group, update the MAC address of the constituent
436 if (mac_addr_changed
&& aggr_grp_update_ports_mac(grp
))
437 link_state_changed
= B_TRUE
;
440 *mac_addr_changedp
= mac_addr_changed
;
441 *link_state_changedp
= link_state_changed
;
445 * Notification callback invoked by the MAC service module for
446 * a particular MAC port.
449 aggr_port_notify_cb(void *arg
, mac_notify_type_t type
)
451 aggr_port_t
*port
= arg
;
452 aggr_grp_t
*grp
= port
->lp_grp
;
453 boolean_t mac_addr_changed
, link_state_changed
;
454 mac_perim_handle_t mph
;
456 mac_perim_enter_by_mh(grp
->lg_mh
, &mph
);
457 if (port
->lp_closing
) {
461 * Release the reference so it is safe for aggr to call
462 * mac_unregister() now.
464 aggr_grp_port_rele(port
);
470 mac_tx_update(grp
->lg_mh
);
473 if (aggr_port_notify_link(grp
, port
))
474 mac_link_update(grp
->lg_mh
, grp
->lg_link_state
);
476 case MAC_NOTE_UNICST
:
477 aggr_port_notify_unicst(grp
, port
, &mac_addr_changed
,
478 &link_state_changed
);
479 if (mac_addr_changed
)
480 mac_unicst_update(grp
->lg_mh
, grp
->lg_addr
);
481 if (link_state_changed
)
482 mac_link_update(grp
->lg_mh
, grp
->lg_link_state
);
492 aggr_port_start(aggr_port_t
*port
)
494 ASSERT(MAC_PERIM_HELD(port
->lp_mh
));
496 if (port
->lp_started
)
499 port
->lp_started
= B_TRUE
;
500 aggr_grp_multicst_port(port
, B_TRUE
);
505 aggr_port_stop(aggr_port_t
*port
)
507 ASSERT(MAC_PERIM_HELD(port
->lp_mh
));
509 if (!port
->lp_started
)
512 aggr_grp_multicst_port(port
, B_FALSE
);
514 /* update the port state */
515 port
->lp_started
= B_FALSE
;
519 aggr_port_promisc(aggr_port_t
*port
, boolean_t on
)
523 ASSERT(MAC_PERIM_HELD(port
->lp_mh
));
525 if (on
== port
->lp_promisc_on
)
526 /* already in desired promiscous mode */
530 mac_rx_clear(port
->lp_mch
);
531 rc
= mac_promisc_add(port
->lp_mch
, MAC_CLIENT_PROMISC_ALL
,
532 aggr_recv_cb
, port
, &port
->lp_mphp
,
533 MAC_PROMISC_FLAGS_NO_TX_LOOP
);
535 mac_rx_set(port
->lp_mch
, aggr_recv_cb
, port
);
539 mac_promisc_remove(port
->lp_mphp
);
540 port
->lp_mphp
= NULL
;
541 mac_rx_set(port
->lp_mch
, aggr_recv_cb
, port
);
544 port
->lp_promisc_on
= on
;
550 * Set the MAC address of a port.
553 aggr_port_unicst(aggr_port_t
*port
)
555 aggr_grp_t
*grp
= port
->lp_grp
;
557 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
558 ASSERT(MAC_PERIM_HELD(port
->lp_mh
));
560 return (mac_unicast_primary_set(port
->lp_mh
, grp
->lg_addr
));
564 * Add or remove a multicast address to/from a port.
567 aggr_port_multicst(void *arg
, boolean_t add
, const uint8_t *addrp
)
569 aggr_port_t
*port
= arg
;
572 return (mac_multicast_add(port
->lp_mch
, addrp
));
574 mac_multicast_remove(port
->lp_mch
, addrp
);
580 aggr_port_stat(aggr_port_t
*port
, uint_t stat
)
582 return (mac_stat_get(port
->lp_mh
, stat
));
586 * Add a non-primary unicast address to the underlying port. If the port
587 * supports HW Rx group, try to add the address into the HW Rx group of
588 * the port first. If that fails, or if the port does not support HW Rx
589 * group, enable the port's promiscous mode.
592 aggr_port_addmac(aggr_port_t
*port
, const uint8_t *mac_addr
)
594 aggr_unicst_addr_t
*addr
, **pprev
;
595 mac_perim_handle_t pmph
;
598 ASSERT(MAC_PERIM_HELD(port
->lp_grp
->lg_mh
));
599 mac_perim_enter_by_mh(port
->lp_mh
, &pmph
);
602 * If the underlying port support HW Rx group, add the mac to its
605 if ((port
->lp_hwgh
!= NULL
) &&
606 ((mac_hwgroup_addmac(port
->lp_hwgh
, mac_addr
)) == 0)) {
607 mac_perim_exit(pmph
);
612 * If that fails, or if the port does not support HW Rx group, enable
613 * the port's promiscous mode. (Note that we turn on the promiscous
614 * mode only if the port is already started.
616 if (port
->lp_started
&&
617 ((err
= aggr_port_promisc(port
, B_TRUE
)) != 0)) {
618 mac_perim_exit(pmph
);
623 * Walk through the unicast addresses that requires promiscous mode
624 * enabled on this port, and add this address to the end of the list.
626 pprev
= &port
->lp_prom_addr
;
627 while ((addr
= *pprev
) != NULL
) {
628 ASSERT(bcmp(mac_addr
, addr
->aua_addr
, ETHERADDRL
) != 0);
629 pprev
= &addr
->aua_next
;
631 addr
= kmem_alloc(sizeof (aggr_unicst_addr_t
), KM_SLEEP
);
632 bcopy(mac_addr
, addr
->aua_addr
, ETHERADDRL
);
633 addr
->aua_next
= NULL
;
635 mac_perim_exit(pmph
);
640 * Remove a non-primary unicast address from the underlying port. This address
641 * must has been added by aggr_port_addmac(). As a result, we probably need to
642 * remove the address from the port's HW Rx group, or to disable the port's
646 aggr_port_remmac(aggr_port_t
*port
, const uint8_t *mac_addr
)
648 aggr_grp_t
*grp
= port
->lp_grp
;
649 aggr_unicst_addr_t
*addr
, **pprev
;
650 mac_perim_handle_t pmph
;
652 ASSERT(MAC_PERIM_HELD(grp
->lg_mh
));
653 mac_perim_enter_by_mh(port
->lp_mh
, &pmph
);
656 * See whether this address is in the list of addresses that requires
657 * the port being promiscous mode.
659 pprev
= &port
->lp_prom_addr
;
660 while ((addr
= *pprev
) != NULL
) {
661 if (bcmp(mac_addr
, addr
->aua_addr
, ETHERADDRL
) == 0)
663 pprev
= &addr
->aua_next
;
667 * This unicast address put the port into the promiscous mode,
668 * delete this address from the lp_prom_addr list. If this is
669 * the last address in that list, disable the promiscous mode
670 * if the aggregation is not in promiscous mode.
672 *pprev
= addr
->aua_next
;
673 kmem_free(addr
, sizeof (aggr_unicst_addr_t
));
674 if (port
->lp_prom_addr
== NULL
&& !grp
->lg_promisc
)
675 (void) aggr_port_promisc(port
, B_FALSE
);
677 ASSERT(port
->lp_hwgh
!= NULL
);
678 (void) mac_hwgroup_remmac(port
->lp_hwgh
, mac_addr
);
680 mac_perim_exit(pmph
);