Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / uts / common / io / aggr / aggr_lacp.c
bloba0d566d12bf381e7c48e6a9d44294613c6e610ba
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
26 * IEEE 802.3ad Link Aggregation - LACP & Marker Protocol processing.
29 #include <sys/types.h>
30 #include <sys/sysmacros.h>
31 #include <sys/callb.h>
32 #include <sys/conf.h>
33 #include <sys/cmn_err.h>
34 #include <sys/disp.h>
35 #include <sys/list.h>
36 #include <sys/ksynch.h>
37 #include <sys/kmem.h>
38 #include <sys/stream.h>
39 #include <sys/modctl.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/atomic.h>
43 #include <sys/stat.h>
44 #include <sys/byteorder.h>
45 #include <sys/strsun.h>
46 #include <sys/isa_defs.h>
47 #include <sys/sdt.h>
49 #include <sys/aggr.h>
50 #include <sys/aggr_impl.h>
52 static struct ether_addr etherzeroaddr = {
53 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
57 * Slow_Protocol_Multicast address, as per IEEE 802.3ad spec.
59 static struct ether_addr slow_multicast_addr = {
60 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02
63 #ifdef DEBUG
64 /* LACP state machine debugging support */
65 static uint32_t aggr_lacp_debug = 0;
66 #define AGGR_LACP_DBG(x) if (aggr_lacp_debug) { (void) printf x; }
67 #else
68 #define AGGR_LACP_DBG(x) {}
69 #endif /* DEBUG */
71 #define NSECS_PER_SEC 1000000000ll
73 /* used by lacp_misconfig_walker() */
74 typedef struct lacp_misconfig_check_state_s {
75 aggr_port_t *cs_portp;
76 boolean_t cs_found;
77 } lacp_misconfig_check_state_t;
79 static const char *lacp_receive_str[] = LACP_RECEIVE_STATE_STRINGS;
80 static const char *lacp_periodic_str[] = LACP_PERIODIC_STRINGS;
81 static const char *lacp_mux_str[] = LACP_MUX_STRINGS;
83 static uint16_t lacp_port_priority = 0x1000;
84 static uint16_t lacp_system_priority = 0x1000;
87 * Maintains a list of all ports in ATTACHED state. This information
88 * is used to detect misconfiguration.
90 typedef struct lacp_sel_ports {
91 datalink_id_t sp_grp_linkid;
92 datalink_id_t sp_linkid;
93 /* Note: sp_partner_system must be 2-byte aligned */
94 struct ether_addr sp_partner_system;
95 uint32_t sp_partner_key;
96 struct lacp_sel_ports *sp_next;
97 } lacp_sel_ports_t;
99 static lacp_sel_ports_t *sel_ports = NULL;
100 static kmutex_t lacp_sel_lock;
102 static void periodic_timer_pop(void *);
103 static void periodic_timer_pop_handler(aggr_port_t *);
104 static void lacp_xmit_sm(aggr_port_t *);
105 static void lacp_periodic_sm(aggr_port_t *);
106 static void fill_lacp_pdu(aggr_port_t *, lacp_t *);
107 static void fill_lacp_ether(aggr_port_t *, struct ether_header *);
108 static void lacp_on(aggr_port_t *);
109 static void lacp_off(aggr_port_t *);
110 static boolean_t valid_lacp_pdu(aggr_port_t *, lacp_t *);
111 static void lacp_receive_sm(aggr_port_t *, lacp_t *);
112 static void aggr_set_coll_dist(aggr_port_t *, boolean_t);
113 static void start_wait_while_timer(aggr_port_t *);
114 static void stop_wait_while_timer(aggr_port_t *);
115 static void lacp_reset_port(aggr_port_t *);
116 static void stop_current_while_timer(aggr_port_t *);
117 static void current_while_timer_pop(void *);
118 static void current_while_timer_pop_handler(aggr_port_t *);
119 static void update_default_selected(aggr_port_t *);
120 static boolean_t update_selected(aggr_port_t *, lacp_t *);
121 static boolean_t lacp_sel_ports_add(aggr_port_t *);
122 static void lacp_sel_ports_del(aggr_port_t *);
123 static void wait_while_timer_pop(void *);
124 static void wait_while_timer_pop_handler(aggr_port_t *);
126 void
127 aggr_lacp_init(void)
129 mutex_init(&lacp_sel_lock, NULL, MUTEX_DEFAULT, NULL);
132 void
133 aggr_lacp_fini(void)
135 mutex_destroy(&lacp_sel_lock);
139 * The following functions are used for handling LACP timers.
141 * Note that we cannot fully rely on the aggr's mac perimeter in the timeout
142 * handler routine, otherwise it may cause deadlock with the untimeout() call
143 * which is usually called with the mac perimeter held. Instead, a
144 * lacp_timer_lock mutex is introduced, which protects a bitwise flag
145 * (lacp_timer_bits). This flag is set/cleared by timeout()/stop_timer()
146 * routines and is checked by a dedicated thread, that executes the real
147 * timeout operation.
149 static void
150 aggr_port_timer_thread(void *arg)
152 aggr_port_t *port = arg;
153 aggr_lacp_port_t *pl = &port->lp_lacp;
154 aggr_grp_t *grp = port->lp_grp;
155 uint32_t lacp_timer_bits;
156 mac_perim_handle_t mph;
157 callb_cpr_t cprinfo;
159 CALLB_CPR_INIT(&cprinfo, &pl->lacp_timer_lock, callb_generic_cpr,
160 "aggr_port_timer_thread");
162 mutex_enter(&pl->lacp_timer_lock);
164 for (;;) {
166 if ((lacp_timer_bits = pl->lacp_timer_bits) == 0) {
167 CALLB_CPR_SAFE_BEGIN(&cprinfo);
168 cv_wait(&pl->lacp_timer_cv, &pl->lacp_timer_lock);
169 CALLB_CPR_SAFE_END(&cprinfo, &pl->lacp_timer_lock);
170 continue;
172 pl->lacp_timer_bits = 0;
174 if (lacp_timer_bits & LACP_THREAD_EXIT)
175 break;
177 if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT)
178 pl->periodic_timer.id = 0;
179 if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT)
180 pl->wait_while_timer.id = 0;
181 if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT)
182 pl->current_while_timer.id = 0;
184 mutex_exit(&pl->lacp_timer_lock);
186 mac_perim_enter_by_mh(grp->lg_mh, &mph);
187 if (port->lp_closing) {
188 mac_perim_exit(mph);
189 mutex_enter(&pl->lacp_timer_lock);
190 break;
193 if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT)
194 periodic_timer_pop_handler(port);
195 if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT)
196 wait_while_timer_pop_handler(port);
197 if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT)
198 current_while_timer_pop_handler(port);
199 mac_perim_exit(mph);
201 mutex_enter(&pl->lacp_timer_lock);
202 if (pl->lacp_timer_bits & LACP_THREAD_EXIT)
203 break;
206 pl->lacp_timer_bits = 0;
207 pl->lacp_timer_thread = NULL;
208 cv_broadcast(&pl->lacp_timer_cv);
210 /* CALLB_CPR_EXIT drops the lock */
211 CALLB_CPR_EXIT(&cprinfo);
214 * Release the reference of the grp so aggr_grp_delete() can call
215 * mac_unregister() safely.
217 aggr_grp_port_rele(port);
218 thread_exit();
222 * Set the port LACP state to SELECTED. Returns B_FALSE if the operation
223 * could not be performed due to a memory allocation error, B_TRUE otherwise.
225 static boolean_t
226 lacp_port_select(aggr_port_t *portp)
228 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
230 if (!lacp_sel_ports_add(portp))
231 return (B_FALSE);
232 portp->lp_lacp.sm.selected = AGGR_SELECTED;
233 return (B_TRUE);
237 * Set the port LACP state to UNSELECTED.
239 static void
240 lacp_port_unselect(aggr_port_t *portp)
242 aggr_grp_t *grp = portp->lp_grp;
244 ASSERT((grp->lg_mh == NULL) || MAC_PERIM_HELD(grp->lg_mh));
246 lacp_sel_ports_del(portp);
247 portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
251 * Initialize group specific LACP state and parameters.
253 void
254 aggr_lacp_init_grp(aggr_grp_t *aggrp)
256 aggrp->aggr.PeriodicTimer = AGGR_LACP_TIMER_SHORT;
257 aggrp->aggr.ActorSystemPriority = (uint16_t)lacp_system_priority;
258 aggrp->aggr.CollectorMaxDelay = 10;
259 aggrp->lg_lacp_mode = AGGR_LACP_OFF;
260 aggrp->aggr.ready = B_FALSE;
264 * Complete LACP info initialization at port creation time.
266 void
267 aggr_lacp_init_port(aggr_port_t *portp)
269 aggr_grp_t *aggrp = portp->lp_grp;
270 aggr_lacp_port_t *pl = &portp->lp_lacp;
272 ASSERT(aggrp->lg_mh == NULL || MAC_PERIM_HELD(aggrp->lg_mh));
273 ASSERT(MAC_PERIM_HELD(portp->lp_mh));
275 /* actor port # */
276 pl->ActorPortNumber = portp->lp_portid;
277 AGGR_LACP_DBG(("aggr_lacp_init_port(%d): "
278 "ActorPortNumber = 0x%x\n", portp->lp_linkid,
279 pl->ActorPortNumber));
281 pl->ActorPortPriority = (uint16_t)lacp_port_priority;
282 pl->ActorPortAggrId = 0; /* aggregator id - not used */
283 pl->NTT = B_FALSE; /* need to transmit */
285 pl->ActorAdminPortKey = aggrp->lg_key;
286 pl->ActorOperPortKey = pl->ActorAdminPortKey;
287 AGGR_LACP_DBG(("aggr_lacp_init_port(%d) "
288 "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n",
289 portp->lp_linkid, pl->ActorAdminPortKey, pl->ActorOperPortKey));
291 /* Actor admin. port state */
292 pl->ActorAdminPortState.bit.activity = B_FALSE;
293 pl->ActorAdminPortState.bit.timeout = B_TRUE;
294 pl->ActorAdminPortState.bit.aggregation = B_TRUE;
295 pl->ActorAdminPortState.bit.sync = B_FALSE;
296 pl->ActorAdminPortState.bit.collecting = B_FALSE;
297 pl->ActorAdminPortState.bit.distributing = B_FALSE;
298 pl->ActorAdminPortState.bit.defaulted = B_FALSE;
299 pl->ActorAdminPortState.bit.expired = B_FALSE;
300 pl->ActorOperPortState = pl->ActorAdminPortState;
303 * Partner Administrative Information
304 * (All initialized to zero except for the following)
305 * Fast Timeouts.
307 pl->PartnerAdminPortState.bit.timeout =
308 pl->PartnerOperPortState.bit.timeout = B_TRUE;
310 pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
313 * State machine information.
315 pl->sm.lacp_on = B_FALSE; /* LACP Off default */
316 pl->sm.begin = B_TRUE; /* Prevents transmissions */
317 pl->sm.lacp_enabled = B_FALSE;
318 pl->sm.port_enabled = B_FALSE; /* Link Down */
319 pl->sm.actor_churn = B_FALSE;
320 pl->sm.partner_churn = B_FALSE;
321 pl->sm.ready_n = B_FALSE;
322 pl->sm.port_moved = B_FALSE;
324 lacp_port_unselect(portp);
326 pl->sm.periodic_state = LACP_NO_PERIODIC;
327 pl->sm.receive_state = LACP_INITIALIZE;
328 pl->sm.mux_state = LACP_DETACHED;
329 pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
332 * Timer information.
334 pl->current_while_timer.id = 0;
335 pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
337 pl->periodic_timer.id = 0;
338 pl->periodic_timer.val = FAST_PERIODIC_TIME;
340 pl->wait_while_timer.id = 0;
341 pl->wait_while_timer.val = AGGREGATE_WAIT_TIME;
343 pl->lacp_timer_bits = 0;
345 mutex_init(&pl->lacp_timer_lock, NULL, MUTEX_DRIVER, NULL);
346 cv_init(&pl->lacp_timer_cv, NULL, CV_DRIVER, NULL);
348 pl->lacp_timer_thread = thread_create(NULL, 0, aggr_port_timer_thread,
349 portp, 0, &p0, TS_RUN, minclsyspri);
352 * Hold a reference of the grp and the port and this reference will
353 * be release when the thread exits.
355 * The reference on the port is used for aggr_port_delete() to
356 * continue without waiting for the thread to exit; the reference
357 * on the grp is used for aggr_grp_delete() to wait for the thread
358 * to exit before calling mac_unregister().
360 aggr_grp_port_hold(portp);
364 * Port initialization when we need to
365 * turn LACP on/off, etc. Not everything is
366 * reset like in the above routine.
367 * Do NOT modify things like link status.
369 static void
370 lacp_reset_port(aggr_port_t *portp)
372 aggr_lacp_port_t *pl = &portp->lp_lacp;
374 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
376 pl->NTT = B_FALSE; /* need to transmit */
378 /* reset operational port state */
379 pl->ActorOperPortState.bit.timeout =
380 pl->ActorAdminPortState.bit.timeout;
382 pl->ActorOperPortState.bit.sync = B_FALSE;
383 pl->ActorOperPortState.bit.collecting = B_FALSE;
384 pl->ActorOperPortState.bit.distributing = B_FALSE;
385 pl->ActorOperPortState.bit.defaulted = B_TRUE;
386 pl->ActorOperPortState.bit.expired = B_FALSE;
388 pl->PartnerOperPortState.bit.timeout = B_TRUE; /* fast t/o */
389 pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
392 * State machine information.
394 pl->sm.begin = B_TRUE; /* Prevents transmissions */
395 pl->sm.actor_churn = B_FALSE;
396 pl->sm.partner_churn = B_FALSE;
397 pl->sm.ready_n = B_FALSE;
399 lacp_port_unselect(portp);
401 pl->sm.periodic_state = LACP_NO_PERIODIC;
402 pl->sm.receive_state = LACP_INITIALIZE;
403 pl->sm.mux_state = LACP_DETACHED;
404 pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
407 * Timer information.
409 pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
410 pl->periodic_timer.val = FAST_PERIODIC_TIME;
413 static void
414 aggr_lacp_mcast_on(aggr_port_t *port)
416 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
417 ASSERT(MAC_PERIM_HELD(port->lp_mh));
419 if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
420 return;
422 (void) aggr_port_multicst(port, B_TRUE,
423 (uchar_t *)&slow_multicast_addr);
426 static void
427 aggr_lacp_mcast_off(aggr_port_t *port)
429 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
430 ASSERT(MAC_PERIM_HELD(port->lp_mh));
432 if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
433 return;
435 (void) aggr_port_multicst(port, B_FALSE,
436 (uchar_t *)&slow_multicast_addr);
439 static void
440 start_periodic_timer(aggr_port_t *portp)
442 aggr_lacp_port_t *pl = &portp->lp_lacp;
444 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
446 mutex_enter(&pl->lacp_timer_lock);
447 if (pl->periodic_timer.id == 0) {
448 pl->periodic_timer.id = timeout(periodic_timer_pop, portp,
449 drv_usectohz(1000000 * portp->lp_lacp.periodic_timer.val));
451 mutex_exit(&pl->lacp_timer_lock);
454 static void
455 stop_periodic_timer(aggr_port_t *portp)
457 aggr_lacp_port_t *pl = &portp->lp_lacp;
458 timeout_id_t id;
460 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
462 mutex_enter(&pl->lacp_timer_lock);
463 if ((id = pl->periodic_timer.id) != 0) {
464 pl->lacp_timer_bits &= ~LACP_PERIODIC_TIMEOUT;
465 pl->periodic_timer.id = 0;
467 mutex_exit(&pl->lacp_timer_lock);
469 if (id != 0)
470 (void) untimeout(id);
474 * When the timer pops, we arrive here to
475 * clear out LACPDU count as well as transmit an
476 * LACPDU. We then set the periodic state and let
477 * the periodic state machine restart the timer.
479 static void
480 periodic_timer_pop(void *data)
482 aggr_port_t *portp = data;
483 aggr_lacp_port_t *pl = &portp->lp_lacp;
485 mutex_enter(&pl->lacp_timer_lock);
486 pl->lacp_timer_bits |= LACP_PERIODIC_TIMEOUT;
487 cv_broadcast(&pl->lacp_timer_cv);
488 mutex_exit(&pl->lacp_timer_lock);
492 * When the timer pops, we arrive here to
493 * clear out LACPDU count as well as transmit an
494 * LACPDU. We then set the periodic state and let
495 * the periodic state machine restart the timer.
497 static void
498 periodic_timer_pop_handler(aggr_port_t *portp)
500 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
502 portp->lp_lacp_stats.LACPDUsTx = 0;
504 /* current timestamp */
505 portp->lp_lacp.time = gethrtime();
506 portp->lp_lacp.NTT = B_TRUE;
507 lacp_xmit_sm(portp);
510 * Set Periodic State machine state based on the
511 * value of the Partner Operation Port State timeout
512 * bit.
514 if (portp->lp_lacp.PartnerOperPortState.bit.timeout) {
515 portp->lp_lacp.periodic_timer.val = FAST_PERIODIC_TIME;
516 portp->lp_lacp.sm.periodic_state = LACP_FAST_PERIODIC;
517 } else {
518 portp->lp_lacp.periodic_timer.val = SLOW_PERIODIC_TIME;
519 portp->lp_lacp.sm.periodic_state = LACP_SLOW_PERIODIC;
522 lacp_periodic_sm(portp);
526 * Invoked from:
527 * - startup upon aggregation
528 * - when the periodic timer pops
529 * - when the periodic timer value is changed
530 * - when the port is attached or detached
531 * - when LACP mode is changed.
533 static void
534 lacp_periodic_sm(aggr_port_t *portp)
536 lacp_periodic_state_t oldstate = portp->lp_lacp.sm.periodic_state;
537 aggr_lacp_port_t *pl = &portp->lp_lacp;
539 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
541 /* LACP_OFF state not in specification so check here. */
542 if (!pl->sm.lacp_on) {
543 /* Stop timer whether it is running or not */
544 stop_periodic_timer(portp);
545 pl->sm.periodic_state = LACP_NO_PERIODIC;
546 pl->NTT = B_FALSE;
547 AGGR_LACP_DBG(("lacp_periodic_sm(%d):NO LACP "
548 "%s--->%s\n", portp->lp_linkid,
549 lacp_periodic_str[oldstate],
550 lacp_periodic_str[pl->sm.periodic_state]));
551 return;
554 if (pl->sm.begin || !pl->sm.lacp_enabled ||
555 !pl->sm.port_enabled ||
556 !pl->ActorOperPortState.bit.activity &&
557 !pl->PartnerOperPortState.bit.activity) {
559 /* Stop timer whether it is running or not */
560 stop_periodic_timer(portp);
561 pl->sm.periodic_state = LACP_NO_PERIODIC;
562 pl->NTT = B_FALSE;
563 AGGR_LACP_DBG(("lacp_periodic_sm(%d):STOP %s--->%s\n",
564 portp->lp_linkid, lacp_periodic_str[oldstate],
565 lacp_periodic_str[pl->sm.periodic_state]));
566 return;
570 * Startup with FAST_PERIODIC_TIME if no previous LACPDU
571 * has been received. Then after we timeout, then it is
572 * possible to go to SLOW_PERIODIC_TIME.
574 if (pl->sm.periodic_state == LACP_NO_PERIODIC) {
575 pl->periodic_timer.val = FAST_PERIODIC_TIME;
576 pl->sm.periodic_state = LACP_FAST_PERIODIC;
577 } else if ((pl->sm.periodic_state == LACP_SLOW_PERIODIC) &&
578 pl->PartnerOperPortState.bit.timeout) {
580 * If we receive a bit indicating we are going to
581 * fast periodic from slow periodic, stop the timer
582 * and let the periodic_timer_pop routine deal
583 * with reseting the periodic state and transmitting
584 * a LACPDU.
586 stop_periodic_timer(portp);
587 periodic_timer_pop_handler(portp);
590 /* Rearm timer with value provided by partner */
591 start_periodic_timer(portp);
595 * This routine transmits an LACPDU if lacp_enabled
596 * is TRUE and if NTT is set.
598 static void
599 lacp_xmit_sm(aggr_port_t *portp)
601 aggr_lacp_port_t *pl = &portp->lp_lacp;
602 size_t len;
603 mblk_t *mp;
604 hrtime_t now, elapsed;
606 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
608 /* LACP_OFF state not in specification so check here. */
609 if (!pl->sm.lacp_on || !pl->NTT || !portp->lp_started)
610 return;
613 * Do nothing if LACP has been turned off or if the
614 * periodic state machine is not enabled.
616 if ((pl->sm.periodic_state == LACP_NO_PERIODIC) ||
617 !pl->sm.lacp_enabled || pl->sm.begin) {
618 pl->NTT = B_FALSE;
619 return;
623 * If we have sent 5 Slow packets in the last second, avoid
624 * sending any more here. No more than three LACPDUs may be transmitted
625 * in any Fast_Periodic_Time interval.
627 if (portp->lp_lacp_stats.LACPDUsTx >= 3) {
629 * Grab the current time value and see if
630 * more than 1 second has passed. If so,
631 * reset the timestamp and clear the count.
633 now = gethrtime();
634 elapsed = now - pl->time;
635 if (elapsed > NSECS_PER_SEC) {
636 portp->lp_lacp_stats.LACPDUsTx = 0;
637 pl->time = now;
638 } else {
639 return;
643 len = sizeof (lacp_t) + sizeof (struct ether_header);
644 mp = allocb(len, BPRI_MED);
645 if (mp == NULL)
646 return;
648 mp->b_wptr = mp->b_rptr + len;
649 bzero(mp->b_rptr, len);
651 fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
652 fill_lacp_pdu(portp,
653 (lacp_t *)(mp->b_rptr + sizeof (struct ether_header)));
655 /* Send the packet over the first TX ring */
656 mp = mac_hwring_send_priv(portp->lp_mch, portp->lp_tx_rings[0], mp);
657 if (mp != NULL)
658 freemsg(mp);
660 pl->NTT = B_FALSE;
661 portp->lp_lacp_stats.LACPDUsTx++;
665 * Initialize the ethernet header of a LACP packet sent from the specified
666 * port.
668 static void
669 fill_lacp_ether(aggr_port_t *port, struct ether_header *ether)
671 bcopy(port->lp_addr, (uint8_t *)&(ether->ether_shost), ETHERADDRL);
672 bcopy(&slow_multicast_addr, (uint8_t *)&(ether->ether_dhost),
673 ETHERADDRL);
674 ether->ether_type = htons(ETHERTYPE_SLOW);
677 static void
678 fill_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
680 aggr_lacp_port_t *pl = &portp->lp_lacp;
681 aggr_grp_t *aggrp = portp->lp_grp;
682 mac_perim_handle_t pmph;
684 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
685 mac_perim_enter_by_mh(portp->lp_mh, &pmph);
687 lacp->subtype = LACP_SUBTYPE;
688 lacp->version = LACP_VERSION;
691 * Actor Information
693 lacp->actor_info.tlv_type = ACTOR_TLV;
694 lacp->actor_info.information_len = sizeof (link_info_t);
695 lacp->actor_info.system_priority =
696 htons(aggrp->aggr.ActorSystemPriority);
697 bcopy(aggrp->lg_addr, (uchar_t *)&lacp->actor_info.system_id,
698 ETHERADDRL);
699 lacp->actor_info.key = htons(pl->ActorOperPortKey);
700 lacp->actor_info.port_priority = htons(pl->ActorPortPriority);
701 lacp->actor_info.port = htons(pl->ActorPortNumber);
702 lacp->actor_info.state.state = pl->ActorOperPortState.state;
705 * Partner Information
707 lacp->partner_info.tlv_type = PARTNER_TLV;
708 lacp->partner_info.information_len = sizeof (link_info_t);
709 lacp->partner_info.system_priority =
710 htons(pl->PartnerOperSysPriority);
711 lacp->partner_info.system_id = pl->PartnerOperSystem;
712 lacp->partner_info.key = htons(pl->PartnerOperKey);
713 lacp->partner_info.port_priority =
714 htons(pl->PartnerOperPortPriority);
715 lacp->partner_info.port = htons(pl->PartnerOperPortNum);
716 lacp->partner_info.state.state = pl->PartnerOperPortState.state;
718 /* Collector Information */
719 lacp->tlv_collector = COLLECTOR_TLV;
720 lacp->collector_len = 0x10;
721 lacp->collector_max_delay = htons(aggrp->aggr.CollectorMaxDelay);
723 /* Termination Information */
724 lacp->tlv_terminator = TERMINATOR_TLV;
725 lacp->terminator_len = 0x0;
727 mac_perim_exit(pmph);
731 * lacp_mux_sm - LACP mux state machine
732 * This state machine is invoked from:
733 * - startup upon aggregation
734 * - from the Selection logic
735 * - when the wait_while_timer pops
736 * - when the aggregation MAC address is changed
737 * - when receiving DL_NOTE_LINK_UP/DOWN
738 * - when receiving DL_NOTE_AGGR_AVAIL/UNAVAIL
739 * - when LACP mode is changed.
740 * - when a DL_NOTE_SPEED is received
742 static void
743 lacp_mux_sm(aggr_port_t *portp)
745 aggr_grp_t *aggrp = portp->lp_grp;
746 boolean_t NTT_updated = B_FALSE;
747 aggr_lacp_port_t *pl = &portp->lp_lacp;
748 lacp_mux_state_t oldstate = pl->sm.mux_state;
750 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
752 /* LACP_OFF state not in specification so check here. */
753 if (!pl->sm.lacp_on) {
754 pl->sm.mux_state = LACP_DETACHED;
755 pl->ActorOperPortState.bit.sync = B_FALSE;
757 if (pl->ActorOperPortState.bit.collecting ||
758 pl->ActorOperPortState.bit.distributing) {
759 AGGR_LACP_DBG(("trunk link: (%d): "
760 "Collector_Distributor Disabled.\n",
761 portp->lp_linkid));
764 pl->ActorOperPortState.bit.collecting =
765 pl->ActorOperPortState.bit.distributing = B_FALSE;
766 return;
769 if (pl->sm.begin || !pl->sm.lacp_enabled)
770 pl->sm.mux_state = LACP_DETACHED;
772 again:
773 /* determine next state, or return if state unchanged */
774 switch (pl->sm.mux_state) {
775 case LACP_DETACHED:
776 if (pl->sm.begin) {
777 break;
780 if ((pl->sm.selected == AGGR_SELECTED) ||
781 (pl->sm.selected == AGGR_STANDBY)) {
782 pl->sm.mux_state = LACP_WAITING;
783 break;
785 return;
787 case LACP_WAITING:
788 if (pl->sm.selected == AGGR_UNSELECTED) {
789 pl->sm.mux_state = LACP_DETACHED;
790 break;
793 if ((pl->sm.selected == AGGR_SELECTED) && aggrp->aggr.ready) {
794 pl->sm.mux_state = LACP_ATTACHED;
795 break;
797 return;
799 case LACP_ATTACHED:
800 if ((pl->sm.selected == AGGR_UNSELECTED) ||
801 (pl->sm.selected == AGGR_STANDBY)) {
802 pl->sm.mux_state = LACP_DETACHED;
803 break;
806 if ((pl->sm.selected == AGGR_SELECTED) &&
807 pl->PartnerOperPortState.bit.sync) {
808 pl->sm.mux_state = LACP_COLLECTING_DISTRIBUTING;
809 break;
811 return;
813 case LACP_COLLECTING_DISTRIBUTING:
814 if ((pl->sm.selected == AGGR_UNSELECTED) ||
815 (pl->sm.selected == AGGR_STANDBY) ||
816 !pl->PartnerOperPortState.bit.sync) {
817 pl->sm.mux_state = LACP_ATTACHED;
818 break;
820 return;
823 AGGR_LACP_DBG(("lacp_mux_sm(%d):%s--->%s\n",
824 portp->lp_linkid, lacp_mux_str[oldstate],
825 lacp_mux_str[pl->sm.mux_state]));
827 /* perform actions on entering a new state */
828 switch (pl->sm.mux_state) {
829 case LACP_DETACHED:
830 if (pl->ActorOperPortState.bit.collecting ||
831 pl->ActorOperPortState.bit.distributing) {
832 AGGR_LACP_DBG(("trunk link: (%d): "
833 "Collector_Distributor Disabled.\n",
834 portp->lp_linkid));
837 pl->ActorOperPortState.bit.sync =
838 pl->ActorOperPortState.bit.collecting = B_FALSE;
840 /* Turn OFF Collector_Distributor */
841 aggr_set_coll_dist(portp, B_FALSE);
843 pl->ActorOperPortState.bit.distributing = B_FALSE;
844 NTT_updated = B_TRUE;
845 break;
847 case LACP_WAITING:
848 start_wait_while_timer(portp);
849 break;
851 case LACP_ATTACHED:
852 if (pl->ActorOperPortState.bit.collecting ||
853 pl->ActorOperPortState.bit.distributing) {
854 AGGR_LACP_DBG(("trunk link: (%d): "
855 "Collector_Distributor Disabled.\n",
856 portp->lp_linkid));
859 pl->ActorOperPortState.bit.sync = B_TRUE;
860 pl->ActorOperPortState.bit.collecting = B_FALSE;
862 /* Turn OFF Collector_Distributor */
863 aggr_set_coll_dist(portp, B_FALSE);
865 pl->ActorOperPortState.bit.distributing = B_FALSE;
866 NTT_updated = B_TRUE;
867 if (pl->PartnerOperPortState.bit.sync) {
869 * We had already received an updated sync from
870 * the partner. Attempt to transition to
871 * collecting/distributing now.
873 goto again;
875 break;
877 case LACP_COLLECTING_DISTRIBUTING:
878 if (!pl->ActorOperPortState.bit.collecting &&
879 !pl->ActorOperPortState.bit.distributing) {
880 AGGR_LACP_DBG(("trunk link: (%d): "
881 "Collector_Distributor Enabled.\n",
882 portp->lp_linkid));
884 pl->ActorOperPortState.bit.distributing = B_TRUE;
886 /* Turn Collector_Distributor back ON */
887 aggr_set_coll_dist(portp, B_TRUE);
889 pl->ActorOperPortState.bit.collecting = B_TRUE;
890 NTT_updated = B_TRUE;
891 break;
895 * If we updated the state of the NTT variable, then
896 * initiate a LACPDU transmission.
898 if (NTT_updated) {
899 pl->NTT = B_TRUE;
900 lacp_xmit_sm(portp);
902 } /* lacp_mux_sm */
905 static int
906 receive_marker_pdu(aggr_port_t *portp, mblk_t *mp)
908 marker_pdu_t *markerp = (marker_pdu_t *)mp->b_rptr;
910 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
912 AGGR_LACP_DBG(("trunk link: (%d): MARKER PDU received:\n",
913 portp->lp_linkid));
915 /* LACP_OFF state not in specification so check here. */
916 if (!portp->lp_lacp.sm.lacp_on)
917 return (-1);
919 if (MBLKL(mp) < sizeof (marker_pdu_t))
920 return (-1);
922 if (markerp->version != MARKER_VERSION) {
923 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
924 "version = %d does not match s/w version %d\n",
925 portp->lp_linkid, markerp->version, MARKER_VERSION));
926 return (-1);
929 if (markerp->tlv_marker == MARKER_RESPONSE_TLV) {
930 /* We do not yet send out MARKER info PDUs */
931 AGGR_LACP_DBG(("trunk link (%d): MARKER RESPONSE PDU: "
932 " MARKER TLV = %d - We don't send out info type!\n",
933 portp->lp_linkid, markerp->tlv_marker));
934 return (-1);
937 if (markerp->tlv_marker != MARKER_INFO_TLV) {
938 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
939 " MARKER TLV = %d \n", portp->lp_linkid,
940 markerp->tlv_marker));
941 return (-1);
944 if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) {
945 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
946 " MARKER length = %d \n", portp->lp_linkid,
947 markerp->marker_len));
948 return (-1);
951 if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) {
952 AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
953 " MARKER Port %d not equal to Partner port %d\n",
954 portp->lp_linkid, markerp->requestor_port,
955 portp->lp_lacp.PartnerOperPortNum));
956 return (-1);
959 if (ether_cmp(&markerp->system_id,
960 &portp->lp_lacp.PartnerOperSystem) != 0) {
961 AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
962 " MARKER MAC not equal to Partner MAC\n",
963 portp->lp_linkid));
964 return (-1);
968 * Turn into Marker Response PDU
969 * and return mblk to sending system
971 markerp->tlv_marker = MARKER_RESPONSE_TLV;
973 /* reuse the space that was used by received ethernet header */
974 ASSERT(MBLKHEAD(mp) >= sizeof (struct ether_header));
975 mp->b_rptr -= sizeof (struct ether_header);
976 fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
977 return (0);
981 * Update the LACP mode (off, active, or passive) of the specified group.
983 void
984 aggr_lacp_update_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode)
986 aggr_lacp_mode_t old_mode = grp->lg_lacp_mode;
987 aggr_port_t *port;
989 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
990 ASSERT(!grp->lg_closing);
992 if (mode == old_mode)
993 return;
995 grp->lg_lacp_mode = mode;
997 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
998 port->lp_lacp.ActorAdminPortState.bit.activity =
999 port->lp_lacp.ActorOperPortState.bit.activity =
1000 (mode == AGGR_LACP_ACTIVE);
1002 if (old_mode == AGGR_LACP_OFF) {
1003 /* OFF -> {PASSIVE,ACTIVE} */
1004 /* turn OFF Collector_Distributor */
1005 aggr_set_coll_dist(port, B_FALSE);
1006 lacp_on(port);
1007 } else if (mode == AGGR_LACP_OFF) {
1008 /* {PASSIVE,ACTIVE} -> OFF */
1009 lacp_off(port);
1010 /* Turn ON Collector_Distributor */
1011 aggr_set_coll_dist(port, B_TRUE);
1012 } else {
1013 /* PASSIVE->ACTIVE or ACTIVE->PASSIVE */
1014 port->lp_lacp.sm.begin = B_TRUE;
1015 lacp_mux_sm(port);
1016 lacp_periodic_sm(port);
1018 /* kick off state machines */
1019 lacp_receive_sm(port, NULL);
1020 lacp_mux_sm(port);
1027 * Update the LACP timer (short or long) of the specified group.
1029 void
1030 aggr_lacp_update_timer(aggr_grp_t *grp, aggr_lacp_timer_t timer)
1032 aggr_port_t *port;
1034 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1036 if (timer == grp->aggr.PeriodicTimer)
1037 return;
1039 grp->aggr.PeriodicTimer = timer;
1041 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1042 port->lp_lacp.ActorAdminPortState.bit.timeout =
1043 port->lp_lacp.ActorOperPortState.bit.timeout =
1044 (timer == AGGR_LACP_TIMER_SHORT);
1048 void
1049 aggr_port_lacp_set_mode(aggr_grp_t *grp, aggr_port_t *port)
1051 aggr_lacp_mode_t mode;
1052 aggr_lacp_timer_t timer;
1054 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1056 mode = grp->lg_lacp_mode;
1057 timer = grp->aggr.PeriodicTimer;
1059 port->lp_lacp.ActorAdminPortState.bit.activity =
1060 port->lp_lacp.ActorOperPortState.bit.activity =
1061 (mode == AGGR_LACP_ACTIVE);
1063 port->lp_lacp.ActorAdminPortState.bit.timeout =
1064 port->lp_lacp.ActorOperPortState.bit.timeout =
1065 (timer == AGGR_LACP_TIMER_SHORT);
1067 if (mode == AGGR_LACP_OFF) {
1068 /* Turn ON Collector_Distributor */
1069 aggr_set_coll_dist(port, B_TRUE);
1070 } else { /* LACP_ACTIVE/PASSIVE */
1071 lacp_on(port);
1076 * Sets the initial LACP mode (off, active, passive) and LACP timer
1077 * (short, long) of the specified group.
1079 void
1080 aggr_lacp_set_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode,
1081 aggr_lacp_timer_t timer)
1083 aggr_port_t *port;
1085 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1087 grp->lg_lacp_mode = mode;
1088 grp->aggr.PeriodicTimer = timer;
1090 for (port = grp->lg_ports; port != NULL; port = port->lp_next)
1091 aggr_port_lacp_set_mode(grp, port);
1095 * Verify that the Partner MAC and Key recorded by the specified
1096 * port are not found in other ports that are not part of our
1097 * aggregation. Returns B_TRUE if such a port is found, B_FALSE
1098 * otherwise.
1100 static boolean_t
1101 lacp_misconfig_check(aggr_port_t *portp)
1103 aggr_grp_t *grp = portp->lp_grp;
1104 lacp_sel_ports_t *cport;
1106 mutex_enter(&lacp_sel_lock);
1108 for (cport = sel_ports; cport != NULL; cport = cport->sp_next) {
1110 /* skip entries of the group of the port being checked */
1111 if (cport->sp_grp_linkid == grp->lg_linkid)
1112 continue;
1114 if ((ether_cmp(&cport->sp_partner_system,
1115 &grp->aggr.PartnerSystem) == 0) &&
1116 (cport->sp_partner_key == grp->aggr.PartnerOperAggrKey)) {
1117 char mac_str[ETHERADDRL*3];
1118 struct ether_addr *mac = &cport->sp_partner_system;
1121 * The Partner port information is already in use
1122 * by ports in another aggregation so disable this
1123 * port.
1126 (void) snprintf(mac_str, sizeof (mac_str),
1127 "%x:%x:%x:%x:%x:%x",
1128 mac->ether_addr_octet[0], mac->ether_addr_octet[1],
1129 mac->ether_addr_octet[2], mac->ether_addr_octet[3],
1130 mac->ether_addr_octet[4], mac->ether_addr_octet[5]);
1132 portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
1134 cmn_err(CE_NOTE, "aggr %d port %d: Port Partner "
1135 "MAC %s and key %d in use on aggregation %d "
1136 "port %d\n", grp->lg_linkid, portp->lp_linkid,
1137 mac_str, portp->lp_lacp.PartnerOperKey,
1138 cport->sp_grp_linkid, cport->sp_linkid);
1139 break;
1143 mutex_exit(&lacp_sel_lock);
1144 return (cport != NULL);
1148 * Remove the specified port from the list of selected ports.
1150 static void
1151 lacp_sel_ports_del(aggr_port_t *portp)
1153 lacp_sel_ports_t *cport, **prev = NULL;
1155 mutex_enter(&lacp_sel_lock);
1157 prev = &sel_ports;
1158 for (cport = sel_ports; cport != NULL; prev = &cport->sp_next,
1159 cport = cport->sp_next) {
1160 if (portp->lp_linkid == cport->sp_linkid)
1161 break;
1164 if (cport == NULL) {
1165 mutex_exit(&lacp_sel_lock);
1166 return;
1169 *prev = cport->sp_next;
1170 kmem_free(cport, sizeof (*cport));
1172 mutex_exit(&lacp_sel_lock);
1176 * Add the specified port to the list of selected ports. Returns B_FALSE
1177 * if the operation could not be performed due to an memory allocation
1178 * error.
1180 static boolean_t
1181 lacp_sel_ports_add(aggr_port_t *portp)
1183 lacp_sel_ports_t *new_port;
1184 lacp_sel_ports_t *cport, **last;
1186 mutex_enter(&lacp_sel_lock);
1188 /* check if port is already in the list */
1189 last = &sel_ports;
1190 for (cport = sel_ports; cport != NULL;
1191 last = &cport->sp_next, cport = cport->sp_next) {
1192 if (portp->lp_linkid == cport->sp_linkid) {
1193 ASSERT(cport->sp_partner_key ==
1194 portp->lp_lacp.PartnerOperKey);
1195 ASSERT(ether_cmp(&cport->sp_partner_system,
1196 &portp->lp_lacp.PartnerOperSystem) == 0);
1198 mutex_exit(&lacp_sel_lock);
1199 return (B_TRUE);
1203 /* create and initialize new entry */
1204 new_port = kmem_zalloc(sizeof (lacp_sel_ports_t), KM_NOSLEEP);
1205 if (new_port == NULL) {
1206 mutex_exit(&lacp_sel_lock);
1207 return (B_FALSE);
1210 new_port->sp_grp_linkid = portp->lp_grp->lg_linkid;
1211 bcopy(&portp->lp_lacp.PartnerOperSystem,
1212 &new_port->sp_partner_system, sizeof (new_port->sp_partner_system));
1213 new_port->sp_partner_key = portp->lp_lacp.PartnerOperKey;
1214 new_port->sp_linkid = portp->lp_linkid;
1216 *last = new_port;
1218 mutex_exit(&lacp_sel_lock);
1219 return (B_TRUE);
1223 * lacp_selection_logic - LACP selection logic
1224 * Sets the selected variable on a per port basis
1225 * and sets Ready when all waiting ports are ready
1226 * to go online.
1228 * parameters:
1229 * - portp - instance this applies to.
1231 * invoked:
1232 * - when initialization is needed
1233 * - when UNSELECTED is set from the lacp_receive_sm() in LACP_CURRENT state
1234 * - When the lacp_receive_sm goes to the LACP_DEFAULTED state
1235 * - every time the wait_while_timer pops
1236 * - everytime we turn LACP on/off
1238 static void
1239 lacp_selection_logic(aggr_port_t *portp)
1241 aggr_port_t *tpp;
1242 aggr_grp_t *aggrp = portp->lp_grp;
1243 int ports_waiting;
1244 boolean_t reset_mac = B_FALSE;
1245 aggr_lacp_port_t *pl = &portp->lp_lacp;
1247 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1249 /* LACP_OFF state not in specification so check here. */
1250 if (!pl->sm.lacp_on) {
1251 lacp_port_unselect(portp);
1252 aggrp->aggr.ready = B_FALSE;
1253 lacp_mux_sm(portp);
1254 return;
1257 if (pl->sm.begin || !pl->sm.lacp_enabled ||
1258 (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) {
1260 AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1261 "selected %d-->%d (begin=%d, lacp_enabled = %d, "
1262 "lp_state=%d)\n", portp->lp_linkid, pl->sm.selected,
1263 AGGR_UNSELECTED, pl->sm.begin, pl->sm.lacp_enabled,
1264 portp->lp_state));
1266 lacp_port_unselect(portp);
1267 aggrp->aggr.ready = B_FALSE;
1268 lacp_mux_sm(portp);
1269 return;
1273 * If LACP is not enabled then selected is never set.
1275 if (!pl->sm.lacp_enabled) {
1276 AGGR_LACP_DBG(("lacp_selection_logic:(%d): selected %d-->%d\n",
1277 portp->lp_linkid, pl->sm.selected, AGGR_UNSELECTED));
1279 lacp_port_unselect(portp);
1280 lacp_mux_sm(portp);
1281 return;
1285 * Check if the Partner MAC or Key are zero. If so, we have
1286 * not received any LACP info or it has expired and the
1287 * receive machine is in the LACP_DEFAULTED state.
1289 if (ether_cmp(&pl->PartnerOperSystem, &etherzeroaddr) == 0 ||
1290 (pl->PartnerOperKey == 0)) {
1292 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1293 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1294 &etherzeroaddr) != 0 &&
1295 (tpp->lp_lacp.PartnerOperKey != 0))
1296 break;
1300 * If all ports have no key or aggregation address,
1301 * then clear the negotiated Partner MAC and key.
1303 if (tpp == NULL) {
1304 /* Clear the aggregation Partner MAC and key */
1305 aggrp->aggr.PartnerSystem = etherzeroaddr;
1306 aggrp->aggr.PartnerOperAggrKey = 0;
1309 return;
1313 * Insure that at least one port in the aggregation
1314 * matches the Partner aggregation MAC and key. If not,
1315 * then clear the aggregation MAC and key. Later we will
1316 * set the Partner aggregation MAC and key to that of the
1317 * current port's Partner MAC and key.
1319 if (ether_cmp(&pl->PartnerOperSystem,
1320 &aggrp->aggr.PartnerSystem) != 0 ||
1321 (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1323 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1324 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1325 &aggrp->aggr.PartnerSystem) == 0 &&
1326 (tpp->lp_lacp.PartnerOperKey ==
1327 aggrp->aggr.PartnerOperAggrKey)) {
1328 /* Set aggregation Partner MAC and key */
1329 aggrp->aggr.PartnerSystem =
1330 pl->PartnerOperSystem;
1331 aggrp->aggr.PartnerOperAggrKey =
1332 pl->PartnerOperKey;
1333 break;
1337 if (tpp == NULL) {
1338 /* Clear the aggregation Partner MAC and key */
1339 aggrp->aggr.PartnerSystem = etherzeroaddr;
1340 aggrp->aggr.PartnerOperAggrKey = 0;
1341 reset_mac = B_TRUE;
1346 * If our Actor MAC is found in the Partner MAC
1347 * on this port then we have a loopback misconfiguration.
1349 if (ether_cmp(&pl->PartnerOperSystem,
1350 (struct ether_addr *)&aggrp->lg_addr) == 0) {
1351 cmn_err(CE_NOTE, "trunk link: (%d): Loopback condition.\n",
1352 portp->lp_linkid);
1354 lacp_port_unselect(portp);
1355 lacp_mux_sm(portp);
1356 return;
1360 * If our Partner MAC and Key are found on any other
1361 * ports that are not in our aggregation, we have
1362 * a misconfiguration.
1364 if (lacp_misconfig_check(portp)) {
1365 lacp_mux_sm(portp);
1366 return;
1370 * If the Aggregation Partner MAC and Key have not been
1371 * set, then this is either the first port or the aggregation
1372 * MAC and key have been reset. In either case we must set
1373 * the values of the Partner MAC and key.
1375 if (ether_cmp(&aggrp->aggr.PartnerSystem, &etherzeroaddr) == 0 &&
1376 (aggrp->aggr.PartnerOperAggrKey == 0)) {
1377 /* Set aggregation Partner MAC and key */
1378 aggrp->aggr.PartnerSystem = pl->PartnerOperSystem;
1379 aggrp->aggr.PartnerOperAggrKey = pl->PartnerOperKey;
1382 * If we reset Partner aggregation MAC, then restart
1383 * selection_logic on ports that match new MAC address.
1385 if (reset_mac) {
1386 for (tpp = aggrp->lg_ports; tpp; tpp =
1387 tpp->lp_next) {
1388 if (tpp == portp)
1389 continue;
1390 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1391 &aggrp->aggr.PartnerSystem) == 0 &&
1392 (tpp->lp_lacp.PartnerOperKey ==
1393 aggrp->aggr.PartnerOperAggrKey))
1394 lacp_selection_logic(tpp);
1397 } else if (ether_cmp(&pl->PartnerOperSystem,
1398 &aggrp->aggr.PartnerSystem) != 0 ||
1399 (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1401 * The Partner port information does not match
1402 * that of the other ports in the aggregation
1403 * so disable this port.
1405 lacp_port_unselect(portp);
1407 cmn_err(CE_NOTE, "trunk link: (%d): Port Partner MAC "
1408 "or key (%d) incompatible with Aggregation Partner "
1409 "MAC or key (%d)\n", portp->lp_linkid, pl->PartnerOperKey,
1410 aggrp->aggr.PartnerOperAggrKey);
1412 lacp_mux_sm(portp);
1413 return;
1416 /* If we get to here, automatically set selected */
1417 if (pl->sm.selected != AGGR_SELECTED) {
1418 AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1419 "selected %d-->%d\n", portp->lp_linkid,
1420 pl->sm.selected, AGGR_SELECTED));
1421 if (!lacp_port_select(portp))
1422 return;
1423 lacp_mux_sm(portp);
1427 * From this point onward we have selected the port
1428 * and are simply checking if the Ready flag should
1429 * be set.
1433 * If at least two ports are waiting to aggregate
1434 * and ready_n is set on all ports waiting to aggregate
1435 * then set READY for the aggregation.
1438 ports_waiting = 0;
1440 if (!aggrp->aggr.ready) {
1442 * If all ports in the aggregation have received compatible
1443 * partner information and they match up correctly with the
1444 * switch, there is no need to wait for all the
1445 * wait_while_timers to pop.
1447 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1448 if (((tpp->lp_lacp.sm.mux_state == LACP_WAITING) ||
1449 tpp->lp_lacp.sm.begin) &&
1450 !tpp->lp_lacp.PartnerOperPortState.bit.sync) {
1451 /* Add up ports uninitialized or waiting */
1452 ports_waiting++;
1453 if (!tpp->lp_lacp.sm.ready_n) {
1454 DTRACE_PROBE1(port___not__ready,
1455 aggr_port_t *, tpp);
1456 return;
1462 if (aggrp->aggr.ready) {
1463 AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1464 "aggr.ready already set\n", portp->lp_linkid));
1465 lacp_mux_sm(portp);
1466 } else {
1467 AGGR_LACP_DBG(("lacp_selection_logic:(%d): Ready %d-->%d\n",
1468 portp->lp_linkid, aggrp->aggr.ready, B_TRUE));
1469 aggrp->aggr.ready = B_TRUE;
1471 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next)
1472 lacp_mux_sm(tpp);
1478 * wait_while_timer_pop - When the timer pops, we arrive here to
1479 * set ready_n and trigger the selection logic.
1481 static void
1482 wait_while_timer_pop(void *data)
1484 aggr_port_t *portp = data;
1485 aggr_lacp_port_t *pl = &portp->lp_lacp;
1487 mutex_enter(&pl->lacp_timer_lock);
1488 pl->lacp_timer_bits |= LACP_WAIT_WHILE_TIMEOUT;
1489 cv_broadcast(&pl->lacp_timer_cv);
1490 mutex_exit(&pl->lacp_timer_lock);
1494 * wait_while_timer_pop_handler - When the timer pops, we arrive here to
1495 * set ready_n and trigger the selection logic.
1497 static void
1498 wait_while_timer_pop_handler(aggr_port_t *portp)
1500 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1502 AGGR_LACP_DBG(("trunk link:(%d): wait_while_timer pop \n",
1503 portp->lp_linkid));
1504 portp->lp_lacp.sm.ready_n = B_TRUE;
1506 lacp_selection_logic(portp);
1509 static void
1510 start_wait_while_timer(aggr_port_t *portp)
1512 aggr_lacp_port_t *pl = &portp->lp_lacp;
1514 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1516 mutex_enter(&pl->lacp_timer_lock);
1517 if (pl->wait_while_timer.id == 0) {
1518 pl->wait_while_timer.id =
1519 timeout(wait_while_timer_pop, portp,
1520 drv_usectohz(1000000 *
1521 portp->lp_lacp.wait_while_timer.val));
1523 mutex_exit(&pl->lacp_timer_lock);
1527 static void
1528 stop_wait_while_timer(aggr_port_t *portp)
1530 aggr_lacp_port_t *pl = &portp->lp_lacp;
1531 timeout_id_t id;
1533 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1535 mutex_enter(&pl->lacp_timer_lock);
1536 if ((id = pl->wait_while_timer.id) != 0) {
1537 pl->lacp_timer_bits &= ~LACP_WAIT_WHILE_TIMEOUT;
1538 pl->wait_while_timer.id = 0;
1540 mutex_exit(&pl->lacp_timer_lock);
1542 if (id != 0)
1543 (void) untimeout(id);
1547 * Invoked when a port has been attached to a group.
1548 * Complete the processing that couldn't be finished from lacp_on()
1549 * because the port was not started. We know that the link is full
1550 * duplex and ON, otherwise it wouldn't be attached.
1552 void
1553 aggr_lacp_port_attached(aggr_port_t *portp)
1555 aggr_grp_t *grp = portp->lp_grp;
1556 aggr_lacp_port_t *pl = &portp->lp_lacp;
1558 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1559 ASSERT(MAC_PERIM_HELD(portp->lp_mh));
1560 ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED);
1562 AGGR_LACP_DBG(("aggr_lacp_port_attached: port %d\n",
1563 portp->lp_linkid));
1565 portp->lp_lacp.sm.port_enabled = B_TRUE; /* link on */
1567 if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1568 return;
1570 pl->sm.lacp_enabled = B_TRUE;
1571 pl->ActorOperPortState.bit.aggregation = B_TRUE;
1572 pl->sm.begin = B_TRUE;
1574 lacp_receive_sm(portp, NULL);
1575 lacp_mux_sm(portp);
1577 /* Enable Multicast Slow Protocol address */
1578 aggr_lacp_mcast_on(portp);
1580 /* periodic_sm is started up from the receive machine */
1581 lacp_selection_logic(portp);
1585 * Invoked when a port has been detached from a group. Turn off
1586 * LACP processing if it was enabled.
1588 void
1589 aggr_lacp_port_detached(aggr_port_t *portp)
1591 aggr_grp_t *grp = portp->lp_grp;
1593 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1594 ASSERT(MAC_PERIM_HELD(portp->lp_mh));
1596 AGGR_LACP_DBG(("aggr_lacp_port_detached: port %d\n",
1597 portp->lp_linkid));
1599 portp->lp_lacp.sm.port_enabled = B_FALSE;
1601 if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1602 return;
1604 portp->lp_lacp.sm.lacp_enabled = B_FALSE;
1605 lacp_selection_logic(portp);
1606 lacp_mux_sm(portp);
1607 lacp_periodic_sm(portp);
1610 * Disable Slow Protocol Timers.
1612 stop_periodic_timer(portp);
1613 stop_current_while_timer(portp);
1614 stop_wait_while_timer(portp);
1616 /* Disable Multicast Slow Protocol address */
1617 aggr_lacp_mcast_off(portp);
1618 aggr_set_coll_dist(portp, B_FALSE);
1622 * Enable Slow Protocol LACP and Marker PDUs.
1624 static void
1625 lacp_on(aggr_port_t *portp)
1627 aggr_lacp_port_t *pl = &portp->lp_lacp;
1628 mac_perim_handle_t mph;
1630 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1632 mac_perim_enter_by_mh(portp->lp_mh, &mph);
1635 * Reset the state machines and Partner operational
1636 * information. Careful to not reset things like
1637 * our link state.
1639 lacp_reset_port(portp);
1640 pl->sm.lacp_on = B_TRUE;
1642 AGGR_LACP_DBG(("lacp_on:(%d): \n", portp->lp_linkid));
1644 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1645 pl->sm.port_enabled = B_TRUE;
1646 pl->sm.lacp_enabled = B_TRUE;
1647 pl->ActorOperPortState.bit.aggregation = B_TRUE;
1650 lacp_receive_sm(portp, NULL);
1651 lacp_mux_sm(portp);
1653 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1654 /* Enable Multicast Slow Protocol address */
1655 aggr_lacp_mcast_on(portp);
1657 /* periodic_sm is started up from the receive machine */
1658 lacp_selection_logic(portp);
1660 done:
1661 mac_perim_exit(mph);
1662 } /* lacp_on */
1664 /* Disable Slow Protocol LACP and Marker PDUs */
1665 static void
1666 lacp_off(aggr_port_t *portp)
1668 aggr_lacp_port_t *pl = &portp->lp_lacp;
1669 mac_perim_handle_t mph;
1671 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1672 mac_perim_enter_by_mh(portp->lp_mh, &mph);
1674 pl->sm.lacp_on = B_FALSE;
1676 AGGR_LACP_DBG(("lacp_off:(%d): \n", portp->lp_linkid));
1678 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1680 * Disable Slow Protocol Timers.
1682 stop_periodic_timer(portp);
1683 stop_current_while_timer(portp);
1684 stop_wait_while_timer(portp);
1686 /* Disable Multicast Slow Protocol address */
1687 aggr_lacp_mcast_off(portp);
1689 pl->sm.port_enabled = B_FALSE;
1690 pl->sm.lacp_enabled = B_FALSE;
1691 pl->ActorOperPortState.bit.aggregation = B_FALSE;
1694 lacp_mux_sm(portp);
1695 lacp_periodic_sm(portp);
1696 lacp_selection_logic(portp);
1698 /* Turn OFF Collector_Distributor */
1699 aggr_set_coll_dist(portp, B_FALSE);
1701 lacp_reset_port(portp);
1702 mac_perim_exit(mph);
1706 static boolean_t
1707 valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
1710 * 43.4.12 - "a Receive machine shall not validate
1711 * the Version Number, TLV_type, or Reserved fields in received
1712 * LACPDUs."
1713 * ... "a Receive machine may validate the Actor_Information_Length,
1714 * Partner_Information_Length, Collector_Information_Length,
1715 * or Terminator_Length fields."
1717 if ((lacp->actor_info.information_len != sizeof (link_info_t)) ||
1718 (lacp->partner_info.information_len != sizeof (link_info_t)) ||
1719 (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) ||
1720 (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) {
1721 AGGR_LACP_DBG(("trunk link (%d): Malformed LACPDU: "
1722 " Terminator Length = %d \n", portp->lp_linkid,
1723 lacp->terminator_len));
1724 return (B_FALSE);
1727 return (B_TRUE);
1731 static void
1732 start_current_while_timer(aggr_port_t *portp, uint_t time)
1734 aggr_lacp_port_t *pl = &portp->lp_lacp;
1736 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1738 mutex_enter(&pl->lacp_timer_lock);
1739 if (pl->current_while_timer.id == 0) {
1740 if (time > 0)
1741 pl->current_while_timer.val = time;
1742 else if (pl->ActorOperPortState.bit.timeout)
1743 pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
1744 else
1745 pl->current_while_timer.val = LONG_TIMEOUT_TIME;
1747 pl->current_while_timer.id =
1748 timeout(current_while_timer_pop, portp,
1749 drv_usectohz((clock_t)1000000 *
1750 (clock_t)portp->lp_lacp.current_while_timer.val));
1752 mutex_exit(&pl->lacp_timer_lock);
1756 static void
1757 stop_current_while_timer(aggr_port_t *portp)
1759 aggr_lacp_port_t *pl = &portp->lp_lacp;
1760 timeout_id_t id;
1762 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1764 mutex_enter(&pl->lacp_timer_lock);
1765 if ((id = pl->current_while_timer.id) != 0) {
1766 pl->lacp_timer_bits &= ~LACP_CURRENT_WHILE_TIMEOUT;
1767 pl->current_while_timer.id = 0;
1769 mutex_exit(&pl->lacp_timer_lock);
1771 if (id != 0)
1772 (void) untimeout(id);
1775 static void
1776 current_while_timer_pop(void *data)
1778 aggr_port_t *portp = (aggr_port_t *)data;
1779 aggr_lacp_port_t *pl = &portp->lp_lacp;
1781 mutex_enter(&pl->lacp_timer_lock);
1782 pl->lacp_timer_bits |= LACP_CURRENT_WHILE_TIMEOUT;
1783 cv_broadcast(&pl->lacp_timer_cv);
1784 mutex_exit(&pl->lacp_timer_lock);
1787 static void
1788 current_while_timer_pop_handler(aggr_port_t *portp)
1790 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1792 AGGR_LACP_DBG(("trunk link:(%d): current_while_timer "
1793 "pop id=%p\n", portp->lp_linkid,
1794 portp->lp_lacp.current_while_timer.id));
1796 lacp_receive_sm(portp, NULL);
1800 * record_Default - Simply copies over administrative values
1801 * to the partner operational values, and sets our state to indicate we
1802 * are using defaulted values.
1804 static void
1805 record_Default(aggr_port_t *portp)
1807 aggr_lacp_port_t *pl = &portp->lp_lacp;
1809 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1811 pl->PartnerOperPortNum = pl->PartnerAdminPortNum;
1812 pl->PartnerOperPortPriority = pl->PartnerAdminPortPriority;
1813 pl->PartnerOperSystem = pl->PartnerAdminSystem;
1814 pl->PartnerOperSysPriority = pl->PartnerAdminSysPriority;
1815 pl->PartnerOperKey = pl->PartnerAdminKey;
1816 pl->PartnerOperPortState.state = pl->PartnerAdminPortState.state;
1818 pl->ActorOperPortState.bit.defaulted = B_TRUE;
1822 /* Returns B_TRUE on sync value changing */
1823 static boolean_t
1824 record_PDU(aggr_port_t *portp, lacp_t *lacp)
1826 aggr_grp_t *aggrp = portp->lp_grp;
1827 aggr_lacp_port_t *pl = &portp->lp_lacp;
1828 uint8_t save_sync;
1830 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1833 * Partner Information
1835 pl->PartnerOperPortNum = ntohs(lacp->actor_info.port);
1836 pl->PartnerOperPortPriority =
1837 ntohs(lacp->actor_info.port_priority);
1838 pl->PartnerOperSystem = lacp->actor_info.system_id;
1839 pl->PartnerOperSysPriority =
1840 htons(lacp->actor_info.system_priority);
1841 pl->PartnerOperKey = ntohs(lacp->actor_info.key);
1843 /* All state info except for Synchronization */
1844 save_sync = pl->PartnerOperPortState.bit.sync;
1845 pl->PartnerOperPortState.state = lacp->actor_info.state.state;
1847 /* Defaulted set to FALSE */
1848 pl->ActorOperPortState.bit.defaulted = B_FALSE;
1851 * 43.4.9 - (Partner_Port, Partner_Port_Priority, Partner_system,
1852 * Partner_System_Priority, Partner_Key, and
1853 * Partner_State.Aggregation) are compared to the
1854 * corresponding operations paramters values for
1855 * the Actor. If these are equal, or if this is
1856 * an individual link, we are synchronized.
1858 if (((ntohs(lacp->partner_info.port) == pl->ActorPortNumber) &&
1859 (ntohs(lacp->partner_info.port_priority) ==
1860 pl->ActorPortPriority) &&
1861 (ether_cmp(&lacp->partner_info.system_id,
1862 (struct ether_addr *)&aggrp->lg_addr) == 0) &&
1863 (ntohs(lacp->partner_info.system_priority) ==
1864 aggrp->aggr.ActorSystemPriority) &&
1865 (ntohs(lacp->partner_info.key) == pl->ActorOperPortKey) &&
1866 (lacp->partner_info.state.bit.aggregation ==
1867 pl->ActorOperPortState.bit.aggregation)) ||
1868 (!lacp->actor_info.state.bit.aggregation)) {
1870 pl->PartnerOperPortState.bit.sync =
1871 lacp->actor_info.state.bit.sync;
1872 } else {
1873 pl->PartnerOperPortState.bit.sync = B_FALSE;
1876 if (save_sync != pl->PartnerOperPortState.bit.sync) {
1877 AGGR_LACP_DBG(("record_PDU:(%d): partner sync "
1878 "%d -->%d\n", portp->lp_linkid, save_sync,
1879 pl->PartnerOperPortState.bit.sync));
1880 return (B_TRUE);
1881 } else {
1882 return (B_FALSE);
1888 * update_selected - If any of the Partner parameters has
1889 * changed from a previous value, then
1890 * unselect the link from the aggregator.
1892 static boolean_t
1893 update_selected(aggr_port_t *portp, lacp_t *lacp)
1895 aggr_lacp_port_t *pl = &portp->lp_lacp;
1897 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1899 if ((pl->PartnerOperPortNum != ntohs(lacp->actor_info.port)) ||
1900 (pl->PartnerOperPortPriority !=
1901 ntohs(lacp->actor_info.port_priority)) ||
1902 (ether_cmp(&pl->PartnerOperSystem,
1903 &lacp->actor_info.system_id) != 0) ||
1904 (pl->PartnerOperSysPriority !=
1905 ntohs(lacp->actor_info.system_priority)) ||
1906 (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) ||
1907 (pl->PartnerOperPortState.bit.aggregation !=
1908 lacp->actor_info.state.bit.aggregation)) {
1909 AGGR_LACP_DBG(("update_selected:(%d): "
1910 "selected %d-->%d\n", portp->lp_linkid, pl->sm.selected,
1911 AGGR_UNSELECTED));
1913 lacp_port_unselect(portp);
1914 return (B_TRUE);
1915 } else {
1916 return (B_FALSE);
1922 * update_default_selected - If any of the operational Partner parameters
1923 * is different than that of the administrative values
1924 * then unselect the link from the aggregator.
1926 static void
1927 update_default_selected(aggr_port_t *portp)
1929 aggr_lacp_port_t *pl = &portp->lp_lacp;
1931 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1933 if ((pl->PartnerAdminPortNum != pl->PartnerOperPortNum) ||
1934 (pl->PartnerOperPortPriority != pl->PartnerAdminPortPriority) ||
1935 (ether_cmp(&pl->PartnerOperSystem, &pl->PartnerAdminSystem) != 0) ||
1936 (pl->PartnerOperSysPriority != pl->PartnerAdminSysPriority) ||
1937 (pl->PartnerOperKey != pl->PartnerAdminKey) ||
1938 (pl->PartnerOperPortState.bit.aggregation !=
1939 pl->PartnerAdminPortState.bit.aggregation)) {
1941 AGGR_LACP_DBG(("update_default_selected:(%d): "
1942 "selected %d-->%d\n", portp->lp_linkid,
1943 pl->sm.selected, AGGR_UNSELECTED));
1945 lacp_port_unselect(portp);
1951 * update_NTT - If any of the Partner values in the received LACPDU
1952 * are different than that of the Actor operational
1953 * values then set NTT to true.
1955 static void
1956 update_NTT(aggr_port_t *portp, lacp_t *lacp)
1958 aggr_grp_t *aggrp = portp->lp_grp;
1959 aggr_lacp_port_t *pl = &portp->lp_lacp;
1961 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1963 if ((pl->ActorPortNumber != ntohs(lacp->partner_info.port)) ||
1964 (pl->ActorPortPriority !=
1965 ntohs(lacp->partner_info.port_priority)) ||
1966 (ether_cmp(&aggrp->lg_addr,
1967 &lacp->partner_info.system_id) != 0) ||
1968 (aggrp->aggr.ActorSystemPriority !=
1969 ntohs(lacp->partner_info.system_priority)) ||
1970 (pl->ActorOperPortKey != ntohs(lacp->partner_info.key)) ||
1971 (pl->ActorOperPortState.bit.activity !=
1972 lacp->partner_info.state.bit.activity) ||
1973 (pl->ActorOperPortState.bit.timeout !=
1974 lacp->partner_info.state.bit.timeout) ||
1975 (pl->ActorOperPortState.bit.sync !=
1976 lacp->partner_info.state.bit.sync) ||
1977 (pl->ActorOperPortState.bit.aggregation !=
1978 lacp->partner_info.state.bit.aggregation)) {
1980 AGGR_LACP_DBG(("update_NTT:(%d): NTT %d-->%d\n",
1981 portp->lp_linkid, pl->NTT, B_TRUE));
1983 pl->NTT = B_TRUE;
1988 * lacp_receive_sm - LACP receive state machine
1990 * parameters:
1991 * - portp - instance this applies to.
1992 * - lacp - pointer in the case of a received LACPDU.
1993 * This value is NULL if there is no LACPDU.
1995 * invoked:
1996 * - when initialization is needed
1997 * - upon reception of an LACPDU. This is the common case.
1998 * - every time the current_while_timer pops
2000 static void
2001 lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp)
2003 boolean_t sync_updated, selected_updated, save_activity;
2004 aggr_lacp_port_t *pl = &portp->lp_lacp;
2005 lacp_receive_state_t oldstate = pl->sm.receive_state;
2007 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
2009 /* LACP_OFF state not in specification so check here. */
2010 if (!pl->sm.lacp_on)
2011 return;
2013 /* figure next state */
2014 if (pl->sm.begin || pl->sm.port_moved) {
2015 pl->sm.receive_state = LACP_INITIALIZE;
2016 } else if (!pl->sm.port_enabled) { /* DL_NOTE_LINK_DOWN */
2017 pl->sm.receive_state = LACP_PORT_DISABLED;
2018 } else if (!pl->sm.lacp_enabled) { /* DL_NOTE_AGGR_UNAVAIL */
2019 pl->sm.receive_state =
2020 (pl->sm.receive_state == LACP_PORT_DISABLED) ?
2021 LACP_DISABLED : LACP_PORT_DISABLED;
2022 } else if (lacp != NULL) {
2023 if ((pl->sm.receive_state == LACP_EXPIRED) ||
2024 (pl->sm.receive_state == LACP_DEFAULTED)) {
2025 pl->sm.receive_state = LACP_CURRENT;
2027 } else if ((pl->sm.receive_state == LACP_CURRENT) &&
2028 (pl->current_while_timer.id == 0)) {
2029 pl->sm.receive_state = LACP_EXPIRED;
2030 } else if ((pl->sm.receive_state == LACP_EXPIRED) &&
2031 (pl->current_while_timer.id == 0)) {
2032 pl->sm.receive_state = LACP_DEFAULTED;
2035 if (!((lacp && (oldstate == LACP_CURRENT) &&
2036 (pl->sm.receive_state == LACP_CURRENT)))) {
2037 AGGR_LACP_DBG(("lacp_receive_sm(%d):%s--->%s\n",
2038 portp->lp_linkid, lacp_receive_str[oldstate],
2039 lacp_receive_str[pl->sm.receive_state]));
2042 switch (pl->sm.receive_state) {
2043 case LACP_INITIALIZE:
2044 lacp_port_unselect(portp);
2045 record_Default(portp);
2046 pl->ActorOperPortState.bit.expired = B_FALSE;
2047 pl->sm.port_moved = B_FALSE;
2048 pl->sm.receive_state = LACP_PORT_DISABLED;
2049 pl->sm.begin = B_FALSE;
2050 lacp_receive_sm(portp, NULL);
2051 break;
2053 case LACP_PORT_DISABLED:
2054 pl->PartnerOperPortState.bit.sync = B_FALSE;
2056 * Stop current_while_timer in case
2057 * we got here from link down
2059 stop_current_while_timer(portp);
2061 if (pl->sm.port_enabled && !pl->sm.lacp_enabled) {
2062 pl->sm.receive_state = LACP_DISABLED;
2063 lacp_receive_sm(portp, lacp);
2064 /* We goto LACP_DISABLED state */
2065 break;
2066 } else if (pl->sm.port_enabled && pl->sm.lacp_enabled) {
2067 pl->sm.receive_state = LACP_EXPIRED;
2069 * FALL THROUGH TO LACP_EXPIRED CASE:
2070 * We have no way of knowing if we get into
2071 * lacp_receive_sm() from a current_while_timer
2072 * expiring as it has never been kicked off yet!
2074 } else {
2075 /* We stay in LACP_PORT_DISABLED state */
2076 break;
2078 /* LACP_PORT_DISABLED -> LACP_EXPIRED */
2079 /* FALLTHROUGH */
2081 case LACP_EXPIRED:
2083 * Arrives here from LACP_PORT_DISABLED state as well as
2084 * as well as current_while_timer expiring.
2086 pl->PartnerOperPortState.bit.sync = B_FALSE;
2087 pl->PartnerOperPortState.bit.timeout = B_TRUE;
2089 pl->ActorOperPortState.bit.expired = B_TRUE;
2090 start_current_while_timer(portp, SHORT_TIMEOUT_TIME);
2091 lacp_periodic_sm(portp);
2092 break;
2094 case LACP_DISABLED:
2096 * This is the normal state for recv_sm when LACP_OFF
2097 * is set or the NIC is in half duplex mode.
2099 lacp_port_unselect(portp);
2100 record_Default(portp);
2101 pl->PartnerOperPortState.bit.aggregation = B_FALSE;
2102 pl->ActorOperPortState.bit.expired = B_FALSE;
2103 break;
2105 case LACP_DEFAULTED:
2107 * Current_while_timer expired a second time.
2109 update_default_selected(portp);
2110 record_Default(portp); /* overwrite Partner Oper val */
2111 pl->ActorOperPortState.bit.expired = B_FALSE;
2112 pl->PartnerOperPortState.bit.sync = B_TRUE;
2114 lacp_selection_logic(portp);
2115 lacp_mux_sm(portp);
2116 break;
2118 case LACP_CURRENT:
2120 * Reception of LACPDU
2123 if (!lacp) /* no LACPDU so current_while_timer popped */
2124 break;
2126 AGGR_LACP_DBG(("lacp_receive_sm: (%d): LACPDU received:\n",
2127 portp->lp_linkid));
2130 * Validate Actor_Information_Length,
2131 * Partner_Information_Length, Collector_Information_Length,
2132 * and Terminator_Length fields.
2134 if (!valid_lacp_pdu(portp, lacp)) {
2135 AGGR_LACP_DBG(("lacp_receive_sm (%d): "
2136 "Invalid LACPDU received\n",
2137 portp->lp_linkid));
2138 break;
2141 save_activity = pl->PartnerOperPortState.bit.activity;
2142 selected_updated = update_selected(portp, lacp);
2143 update_NTT(portp, lacp);
2144 sync_updated = record_PDU(portp, lacp);
2146 pl->ActorOperPortState.bit.expired = B_FALSE;
2148 if (selected_updated) {
2149 lacp_selection_logic(portp);
2150 lacp_mux_sm(portp);
2151 } else if (sync_updated) {
2152 lacp_mux_sm(portp);
2156 * If the periodic timer value bit has been modified
2157 * or the partner activity bit has been changed then
2158 * we need to respectively:
2159 * - restart the timer with the proper timeout value.
2160 * - possibly enable/disable transmission of LACPDUs.
2162 if ((pl->PartnerOperPortState.bit.timeout &&
2163 (pl->periodic_timer.val != FAST_PERIODIC_TIME)) ||
2164 (!pl->PartnerOperPortState.bit.timeout &&
2165 (pl->periodic_timer.val != SLOW_PERIODIC_TIME)) ||
2166 (pl->PartnerOperPortState.bit.activity !=
2167 save_activity)) {
2168 lacp_periodic_sm(portp);
2171 stop_current_while_timer(portp);
2172 /* Check if we need to transmit an LACPDU */
2173 if (pl->NTT)
2174 lacp_xmit_sm(portp);
2175 start_current_while_timer(portp, 0);
2177 break;
2181 static void
2182 aggr_set_coll_dist(aggr_port_t *portp, boolean_t enable)
2184 mac_perim_handle_t mph;
2186 AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%d) %s\n",
2187 portp->lp_linkid, enable ? "ENABLED" : "DISABLED"));
2189 mac_perim_enter_by_mh(portp->lp_mh, &mph);
2190 if (!enable) {
2192 * Turn OFF Collector_Distributor.
2194 portp->lp_collector_enabled = B_FALSE;
2195 aggr_send_port_disable(portp);
2196 goto done;
2200 * Turn ON Collector_Distributor.
2203 if (!portp->lp_lacp.sm.lacp_on || (portp->lp_lacp.sm.lacp_on &&
2204 (portp->lp_lacp.sm.mux_state == LACP_COLLECTING_DISTRIBUTING))) {
2205 /* Port is compatible and can be aggregated */
2206 portp->lp_collector_enabled = B_TRUE;
2207 aggr_send_port_enable(portp);
2210 done:
2211 mac_perim_exit(mph);
2215 * Because the LACP packet processing needs to enter the aggr's mac perimeter
2216 * and that would potentially cause a deadlock with the thread in which the
2217 * grp/port is deleted, we defer the packet process to a worker thread. Here
2218 * we only enqueue the received Marker or LACPDU for later processing.
2220 void
2221 aggr_lacp_rx_enqueue(aggr_port_t *portp, mblk_t *dmp)
2223 aggr_grp_t *grp = portp->lp_grp;
2224 lacp_t *lacp;
2226 dmp->b_rptr += sizeof (struct ether_header);
2228 if (MBLKL(dmp) < sizeof (lacp_t)) {
2229 freemsg(dmp);
2230 return;
2233 lacp = (lacp_t *)dmp->b_rptr;
2234 if (lacp->subtype != LACP_SUBTYPE && lacp->subtype != MARKER_SUBTYPE) {
2235 AGGR_LACP_DBG(("aggr_lacp_rx_enqueue: (%d): "
2236 "Unknown Slow Protocol type %d\n",
2237 portp->lp_linkid, lacp->subtype));
2238 freemsg(dmp);
2239 return;
2242 mutex_enter(&grp->lg_lacp_lock);
2245 * If the lg_lacp_done is set, this aggregation is in the process of
2246 * being deleted, return directly.
2248 if (grp->lg_lacp_done) {
2249 mutex_exit(&grp->lg_lacp_lock);
2250 freemsg(dmp);
2251 return;
2254 if (grp->lg_lacp_tail == NULL) {
2255 grp->lg_lacp_head = grp->lg_lacp_tail = dmp;
2256 } else {
2257 grp->lg_lacp_tail->b_next = dmp;
2258 grp->lg_lacp_tail = dmp;
2262 * Hold a reference of the port so that the port won't be freed when it
2263 * is removed from the aggr. The b_prev field is borrowed to save the
2264 * port information.
2266 AGGR_PORT_REFHOLD(portp);
2267 dmp->b_prev = (mblk_t *)portp;
2268 cv_broadcast(&grp->lg_lacp_cv);
2269 mutex_exit(&grp->lg_lacp_lock);
2272 static void
2273 aggr_lacp_rx(mblk_t *dmp)
2275 aggr_port_t *portp = (aggr_port_t *)dmp->b_prev;
2276 mac_perim_handle_t mph;
2277 lacp_t *lacp;
2279 dmp->b_prev = NULL;
2281 mac_perim_enter_by_mh(portp->lp_grp->lg_mh, &mph);
2282 if (portp->lp_closing)
2283 goto done;
2285 lacp = (lacp_t *)dmp->b_rptr;
2286 switch (lacp->subtype) {
2287 case LACP_SUBTYPE:
2288 AGGR_LACP_DBG(("aggr_lacp_rx:(%d): LACPDU received.\n",
2289 portp->lp_linkid));
2291 if (!portp->lp_lacp.sm.lacp_on) {
2292 break;
2294 lacp_receive_sm(portp, lacp);
2295 break;
2297 case MARKER_SUBTYPE:
2298 AGGR_LACP_DBG(("aggr_lacp_rx:(%d): Marker Packet received.\n",
2299 portp->lp_linkid));
2301 if (receive_marker_pdu(portp, dmp) != 0)
2302 break;
2304 /* Send the packet over the first TX ring */
2305 dmp = mac_hwring_send_priv(portp->lp_mch,
2306 portp->lp_tx_rings[0], dmp);
2307 if (dmp != NULL)
2308 freemsg(dmp);
2309 mac_perim_exit(mph);
2310 AGGR_PORT_REFRELE(portp);
2311 return;
2314 done:
2315 mac_perim_exit(mph);
2316 AGGR_PORT_REFRELE(portp);
2317 freemsg(dmp);
2320 void
2321 aggr_lacp_rx_thread(void *arg)
2323 callb_cpr_t cprinfo;
2324 aggr_grp_t *grp = (aggr_grp_t *)arg;
2325 aggr_port_t *port;
2326 mblk_t *mp, *nextmp;
2328 CALLB_CPR_INIT(&cprinfo, &grp->lg_lacp_lock, callb_generic_cpr,
2329 "aggr_lacp_rx_thread");
2331 mutex_enter(&grp->lg_lacp_lock);
2334 * Quit the thread if the grp is deleted.
2336 while (!grp->lg_lacp_done) {
2337 if ((mp = grp->lg_lacp_head) == NULL) {
2338 CALLB_CPR_SAFE_BEGIN(&cprinfo);
2339 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
2340 CALLB_CPR_SAFE_END(&cprinfo, &grp->lg_lacp_lock);
2341 continue;
2344 grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
2345 mutex_exit(&grp->lg_lacp_lock);
2347 while (mp != NULL) {
2348 nextmp = mp->b_next;
2349 mp->b_next = NULL;
2350 aggr_lacp_rx(mp);
2351 mp = nextmp;
2353 mutex_enter(&grp->lg_lacp_lock);
2357 * The grp is being destroyed, simply free all of the LACP messages
2358 * left in the queue which did not have the chance to be processed.
2359 * We cannot use freemsgchain() here since we need to clear the
2360 * b_prev field.
2362 for (mp = grp->lg_lacp_head; mp != NULL; mp = nextmp) {
2363 port = (aggr_port_t *)mp->b_prev;
2364 AGGR_PORT_REFRELE(port);
2365 nextmp = mp->b_next;
2366 mp->b_next = NULL;
2367 mp->b_prev = NULL;
2368 freemsg(mp);
2371 grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
2372 grp->lg_lacp_rx_thread = NULL;
2373 cv_broadcast(&grp->lg_lacp_cv);
2374 CALLB_CPR_EXIT(&cprinfo);
2375 thread_exit();