1 /* $NetBSD: ieee8023ad_lacp.c,v 1.8 2007/08/26 22:59:09 dyoung Exp $ */
4 * Copyright (c)2005 YAMAMOTO Takashi,
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: ieee8023ad_lacp.c,v 1.8 2007/08/26 22:59:09 dyoung Exp $");
32 #include <sys/param.h>
33 #include <sys/callout.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/kernel.h> /* hz */
40 #include <net/if_dl.h>
41 #include <net/if_ether.h>
42 #include <net/if_media.h>
44 #include <net/agr/if_agrvar_impl.h>
45 #include <net/agr/if_agrsubr.h>
46 #include <net/agr/ieee8023_slowprotocols.h>
47 #include <net/agr/ieee8023_tlv.h>
48 #include <net/agr/ieee8023ad.h>
49 #include <net/agr/ieee8023ad_lacp.h>
50 #include <net/agr/ieee8023ad_lacp_impl.h>
51 #include <net/agr/ieee8023ad_impl.h>
52 #include <net/agr/ieee8023ad_lacp_sm.h>
53 #include <net/agr/ieee8023ad_lacp_debug.h>
55 static void lacp_fill_actorinfo(struct agr_port
*, struct lacp_peerinfo
*);
57 static uint64_t lacp_aggregator_bandwidth(struct lacp_aggregator
*);
58 static void lacp_suppress_distributing(struct lacp_softc
*,
59 struct lacp_aggregator
*);
60 static void lacp_transit_expire(void *);
61 static void lacp_select_active_aggregator(struct lacp_softc
*);
62 static uint16_t lacp_compose_key(struct lacp_port
*);
65 * actor system priority and port priority.
66 * XXX should be configurable.
69 #define LACP_SYSTEM_PRIO 0x8000
70 #define LACP_PORT_PRIO 0x8000
72 static const struct tlv_template lacp_info_tlv_template
[] = {
73 { LACP_TYPE_ACTORINFO
,
74 sizeof(struct tlvhdr
) + sizeof(struct lacp_peerinfo
) },
75 { LACP_TYPE_PARTNERINFO
,
76 sizeof(struct tlvhdr
) + sizeof(struct lacp_peerinfo
) },
77 { LACP_TYPE_COLLECTORINFO
,
78 sizeof(struct tlvhdr
) + sizeof(struct lacp_collectorinfo
) },
83 * ieee8023ad_lacp_input: process lacpdu
85 * => called from ether_input. (ie. at IPL_NET)
87 * XXX is it better to defer processing to lower IPL?
88 * XXX anyway input rate should be very low...
92 ieee8023ad_lacp_input(struct ifnet
*ifp
, struct mbuf
*m
)
96 struct agr_port
*port
;
100 port
= ifp
->if_agrprivate
; /* XXX race with agr_remport. */
101 if (__predict_false(port
->port_flags
& AGRPORT_DETACHING
)) {
105 sc
= AGR_SC_FROM_PORT(port
);
108 /* running static config? */
109 if (AGR_STATIC(sc
)) {
110 /* static config, no lacp */
115 if (m
->m_pkthdr
.len
!= sizeof(*du
)) {
119 if ((m
->m_flags
& M_MCAST
) == 0) {
123 if (m
->m_len
< sizeof(*du
)) {
124 m
= m_pullup(m
, sizeof(*du
));
130 du
= mtod(m
, struct lacpdu
*);
132 if (memcmp(&du
->ldu_eh
.ether_dhost
,
133 ðermulticastaddr_slowprotocols
, ETHER_ADDR_LEN
)) {
137 KASSERT(du
->ldu_sph
.sph_subtype
== SLOWPROTOCOLS_SUBTYPE_LACP
);
140 * ignore the version for compatibility with
141 * the future protocol revisions.
145 if (du
->ldu_sph
.sph_version
!= 1) {
151 * ignore tlv types for compatibility with
152 * the future protocol revisions.
155 if (tlv_check(du
, sizeof(*du
), &du
->ldu_tlv_actor
,
156 lacp_info_tlv_template
, false)) {
161 lp
= LACP_PORT(port
);
163 #if defined(LACP_DEBUG)
165 LACP_DPRINTF((lp
, "lacpdu receive\n"));
166 lacp_dump_lacpdu(du
);
168 #endif /* defined(LACP_DEBUG) */
183 lacp_fill_actorinfo(struct agr_port
*port
, struct lacp_peerinfo
*info
)
185 struct lacp_port
*lp
= LACP_PORT(port
);
187 info
->lip_systemid
.lsi_prio
= htobe16(LACP_SYSTEM_PRIO
);
188 memcpy(&info
->lip_systemid
.lsi_mac
,
189 CLLADDR(port
->port_ifp
->if_sadl
), ETHER_ADDR_LEN
);
190 info
->lip_portid
.lpi_prio
= htobe16(LACP_PORT_PRIO
);
191 info
->lip_portid
.lpi_portno
= htobe16(port
->port_ifp
->if_index
);
192 info
->lip_state
= lp
->lp_state
;
196 lacp_xmit_lacpdu(struct lacp_port
*lp
)
198 struct agr_port
*port
= lp
->lp_agrport
;
203 /* running static config? */
204 if (AGR_STATIC(AGR_SC_FROM_PORT(port
))) {
205 /* static config, no lacp transmit */
209 KDASSERT(MHLEN
>= sizeof(*du
));
211 m
= m_gethdr(M_DONTWAIT
, MT_DATA
);
215 m
->m_len
= m
->m_pkthdr
.len
= sizeof(*du
);
217 du
= mtod(m
, struct lacpdu
*);
218 memset(du
, 0, sizeof(*du
));
220 memcpy(&du
->ldu_eh
.ether_dhost
, ethermulticastaddr_slowprotocols
,
222 memcpy(&du
->ldu_eh
.ether_shost
, &port
->port_origlladdr
, ETHER_ADDR_LEN
);
223 du
->ldu_eh
.ether_type
= htobe16(ETHERTYPE_SLOWPROTOCOLS
);
225 du
->ldu_sph
.sph_subtype
= SLOWPROTOCOLS_SUBTYPE_LACP
;
226 du
->ldu_sph
.sph_version
= 1;
228 TLV_SET(&du
->ldu_tlv_actor
, LACP_TYPE_ACTORINFO
, sizeof(du
->ldu_actor
));
229 du
->ldu_actor
= lp
->lp_actor
;
231 TLV_SET(&du
->ldu_tlv_partner
, LACP_TYPE_PARTNERINFO
,
232 sizeof(du
->ldu_partner
));
233 du
->ldu_partner
= lp
->lp_partner
;
235 TLV_SET(&du
->ldu_tlv_collector
, LACP_TYPE_COLLECTORINFO
,
236 sizeof(du
->ldu_collector
));
237 du
->ldu_collector
.lci_maxdelay
= 0;
239 #if defined(LACP_DEBUG)
241 LACP_DPRINTF((lp
, "lacpdu transmit\n"));
242 lacp_dump_lacpdu(du
);
244 #endif /* defined(LACP_DEBUG) */
246 m
->m_flags
|= M_MCAST
;
249 * XXX should use higher priority queue.
250 * otherwise network congestion can break aggregation.
253 error
= agr_xmit_frame(port
->port_ifp
, m
);
258 ieee8023ad_lacp_portstate(struct agr_port
*port
)
260 struct lacp_port
*lp
= LACP_PORT(port
);
261 u_int media
= port
->port_media
;
265 AGR_ASSERT_LOCKED(AGR_SC_FROM_PORT(port
));
267 LACP_DPRINTF((lp
, "media changed 0x%x -> 0x%x\n", lp
->lp_media
, media
));
269 old_state
= lp
->lp_state
;
270 old_key
= lp
->lp_key
;
272 lp
->lp_media
= media
;
273 if ((media
& IFM_HDX
) != 0) {
274 lp
->lp_state
&= ~LACP_STATE_AGGREGATION
;
276 lp
->lp_state
|= LACP_STATE_AGGREGATION
;
278 lp
->lp_key
= lacp_compose_key(lp
);
280 if (old_state
!= lp
->lp_state
|| old_key
!= lp
->lp_key
) {
281 LACP_DPRINTF((lp
, "-> UNSELECTED\n"));
282 lp
->lp_selected
= LACP_UNSELECTED
;
287 ieee8023ad_lacp_porttick(struct agr_softc
*sc
, struct agr_port
*port
)
289 struct lacp_port
*lp
= LACP_PORT(port
);
291 AGR_ASSERT_LOCKED(sc
);
298 lacp_sm_ptx_tx_schedule(lp
);
302 lacp_portinit(struct agr_port
*port
)
304 struct lacp_port
*lp
= LACP_PORT(port
);
305 bool active
= true; /* XXX should be configurable */
306 bool fast
= false; /* XXX should be configurable */
308 lp
->lp_agrport
= port
;
309 lacp_fill_actorinfo(port
, &lp
->lp_actor
);
311 (active
? LACP_STATE_ACTIVITY
: 0) |
312 (fast
? LACP_STATE_TIMEOUT
: 0);
313 lp
->lp_aggregator
= NULL
;
314 lp
->lp_media
= port
->port_media
; /* XXX */
315 lp
->lp_key
= lacp_compose_key(lp
);
316 lacp_sm_rx_set_expired(lp
);
320 lacp_portfini(struct agr_port
*port
)
322 struct lacp_port
*lp
= LACP_PORT(port
);
323 struct lacp_aggregator
*la
= lp
->lp_aggregator
;
326 LACP_DPRINTF((lp
, "portfini\n"));
328 for (i
= 0; i
< LACP_NTIMER
; i
++) {
329 LACP_TIMER_DISARM(lp
, i
);
336 lacp_disable_distributing(lp
);
340 /* -------------------- */
342 lacp_disable_collecting(struct lacp_port
*lp
)
344 struct agr_port
*port
= lp
->lp_agrport
;
346 lp
->lp_state
&= ~LACP_STATE_COLLECTING
;
347 port
->port_flags
&= ~AGRPORT_COLLECTING
;
351 lacp_enable_collecting(struct lacp_port
*lp
)
353 struct agr_port
*port
= lp
->lp_agrport
;
355 lp
->lp_state
|= LACP_STATE_COLLECTING
;
356 port
->port_flags
|= AGRPORT_COLLECTING
;
360 lacp_disable_distributing(struct lacp_port
*lp
)
362 struct agr_port
*port
= lp
->lp_agrport
;
363 struct lacp_aggregator
*la
= lp
->lp_aggregator
;
364 struct lacp_softc
*lsc
= LACP_SOFTC(AGR_SC_FROM_PORT(port
));
365 #if defined(LACP_DEBUG)
366 char buf
[LACP_LAGIDSTR_MAX
+1];
367 #endif /* defined(LACP_DEBUG) */
369 if ((lp
->lp_state
& LACP_STATE_DISTRIBUTING
) == 0) {
374 KASSERT(!TAILQ_EMPTY(&la
->la_ports
));
375 KASSERT(la
->la_nports
> 0);
376 KASSERT(la
->la_refcnt
>= la
->la_nports
);
378 LACP_DPRINTF((lp
, "disable distributing on aggregator %s, "
380 lacp_format_lagid_aggregator(la
, buf
, sizeof(buf
)),
381 la
->la_nports
, la
->la_nports
- 1));
383 TAILQ_REMOVE(&la
->la_ports
, lp
, lp_dist_q
);
386 lacp_suppress_distributing(lsc
, la
);
388 lp
->lp_state
&= ~LACP_STATE_DISTRIBUTING
;
389 port
->port_flags
&= ~AGRPORT_DISTRIBUTING
;
391 if (lsc
->lsc_active_aggregator
== la
) {
392 lacp_select_active_aggregator(lsc
);
397 lacp_enable_distributing(struct lacp_port
*lp
)
399 struct agr_port
*port
= lp
->lp_agrport
;
400 struct lacp_aggregator
*la
= lp
->lp_aggregator
;
401 struct lacp_softc
*lsc
= LACP_SOFTC(AGR_SC_FROM_PORT(port
));
402 #if defined(LACP_DEBUG)
403 char buf
[LACP_LAGIDSTR_MAX
+1];
404 #endif /* defined(LACP_DEBUG) */
406 if ((lp
->lp_state
& LACP_STATE_DISTRIBUTING
) != 0) {
412 LACP_DPRINTF((lp
, "enable distributing on aggregator %s, "
414 lacp_format_lagid_aggregator(la
, buf
, sizeof(buf
)),
415 la
->la_nports
, la
->la_nports
+ 1));
417 KASSERT(la
->la_refcnt
> la
->la_nports
);
418 TAILQ_INSERT_HEAD(&la
->la_ports
, lp
, lp_dist_q
);
421 lacp_suppress_distributing(lsc
, la
);
423 lp
->lp_state
|= LACP_STATE_DISTRIBUTING
;
424 port
->port_flags
|= AGRPORT_DISTRIBUTING
;
426 if (lsc
->lsc_active_aggregator
!= la
) {
427 lacp_select_active_aggregator(lsc
);
432 lacp_transit_expire(void *vp
)
434 struct agr_softc
*sc
= vp
;
435 struct lacp_softc
*lsc
= LACP_SOFTC(sc
);
438 LACP_DPRINTF((NULL
, "%s\n", __func__
));
439 lsc
->lsc_suppress_distributing
= false;
443 /* -------------------- */
446 ieee8023ad_portinit(struct agr_port
*port
)
448 struct ieee8023ad_port
*iport
= IEEE8023AD_PORT(port
);
450 memset(iport
, 0, sizeof(iport
));
456 ieee8023ad_portfini(struct agr_port
*port
)
458 struct agr_softc
*sc
= AGR_SC_FROM_PORT(port
);
468 ieee8023ad_ctor(struct agr_softc
*sc
)
470 struct ieee8023ad_softc
*isc
= IEEE8023AD_SOFTC(sc
);
471 struct lacp_softc
*lsc
= &isc
->isc_lacpsc
;
473 lsc
->lsc_active_aggregator
= NULL
;
474 TAILQ_INIT(&lsc
->lsc_aggregators
);
475 callout_init(&lsc
->lsc_transit_callout
, 0);
476 callout_setfunc(&lsc
->lsc_transit_callout
, lacp_transit_expire
, sc
);
480 ieee8023ad_dtor(struct agr_softc
*sc
)
482 struct ieee8023ad_softc
*isc
= IEEE8023AD_SOFTC(sc
);
483 struct lacp_softc
*lsc
= &isc
->isc_lacpsc
;
485 LACP_DPRINTF((NULL
, "%s\n", __func__
));
487 callout_stop(&lsc
->lsc_transit_callout
);
488 KASSERT(TAILQ_EMPTY(&lsc
->lsc_aggregators
));
489 KASSERT(lsc
->lsc_active_aggregator
== NULL
);
492 /* -------------------- */
495 ieee8023ad_select_tx_port(struct agr_softc
*sc
, struct mbuf
*m
)
497 const struct lacp_softc
*lsc
= LACP_SOFTC(sc
);
498 const struct lacp_aggregator
*la
;
499 const struct lacp_port
*lp
;
503 if (__predict_false(lsc
->lsc_suppress_distributing
&&
504 !AGR_ROUNDROBIN(sc
))) {
505 LACP_DPRINTF((NULL
, "%s: waiting transit\n", __func__
));
506 sc
->sc_if
.if_collisions
++; /* XXX abuse */
510 la
= lsc
->lsc_active_aggregator
;
511 if (__predict_false(la
== NULL
)) {
512 LACP_DPRINTF((NULL
, "%s: no active aggregator\n", __func__
));
516 nports
= la
->la_nports
;
519 if (AGR_ROUNDROBIN(sc
)) {
520 /* packet ordering rule violation */
521 hash
= sc
->sc_rr_counter
++;
523 hash
= (*sc
->sc_iftop
->iftop_hashmbuf
)(sc
, m
);
526 lp
= TAILQ_FIRST(&la
->la_ports
);
529 lp
= TAILQ_NEXT(lp
, lp_dist_q
);
533 KASSERT((lp
->lp_state
& LACP_STATE_DISTRIBUTING
) != 0);
535 return lp
->lp_agrport
;
539 * lacp_suppress_distributing: drop transmit packets for a while
540 * to preserve packet ordering.
544 lacp_suppress_distributing(struct lacp_softc
*lsc
, struct lacp_aggregator
*la
)
547 if (lsc
->lsc_active_aggregator
!= la
) {
551 LACP_DPRINTF((NULL
, "%s\n", __func__
));
552 lsc
->lsc_suppress_distributing
= true;
553 /* XXX should consider collector max delay */
554 callout_schedule(&lsc
->lsc_transit_callout
,
555 LACP_TRANSIT_DELAY
* hz
/ 1000);
558 /* -------------------- */
561 lacp_compare_peerinfo(const struct lacp_peerinfo
*a
,
562 const struct lacp_peerinfo
*b
)
565 return memcmp(a
, b
, offsetof(struct lacp_peerinfo
, lip_state
));
569 lacp_compare_systemid(const struct lacp_systemid
*a
,
570 const struct lacp_systemid
*b
)
573 return memcmp(a
, b
, sizeof(*a
));
577 lacp_compare_portid(const struct lacp_portid
*a
,
578 const struct lacp_portid
*b
)
581 return memcmp(a
, b
, sizeof(*a
));
584 /* -------------------- */
587 lacp_aggregator_bandwidth(struct lacp_aggregator
*la
)
589 struct lacp_port
*lp
;
592 lp
= TAILQ_FIRST(&la
->la_ports
);
597 speed
= ifmedia_baudrate(lp
->lp_media
);
598 speed
*= la
->la_nports
;
600 LACP_DPRINTF((lp
, "speed 0? media=0x%x nports=%d\n",
601 lp
->lp_media
, la
->la_nports
));
608 * lacp_select_active_aggregator: select an aggregator to be used to transmit
609 * packets from agr(4) interface.
613 lacp_select_active_aggregator(struct lacp_softc
*lsc
)
615 struct lacp_aggregator
*la
;
616 struct lacp_aggregator
*best_la
= NULL
;
617 uint64_t best_speed
= 0;
618 #if defined(LACP_DEBUG)
619 char buf
[LACP_LAGIDSTR_MAX
+1];
620 #endif /* defined(LACP_DEBUG) */
622 LACP_DPRINTF((NULL
, "%s:\n", __func__
));
624 TAILQ_FOREACH(la
, &lsc
->lsc_aggregators
, la_q
) {
627 if (la
->la_nports
== 0) {
631 speed
= lacp_aggregator_bandwidth(la
);
632 LACP_DPRINTF((NULL
, "%s, speed=%" PRIu64
", nports=%d\n",
633 lacp_format_lagid_aggregator(la
, buf
, sizeof(buf
)),
634 speed
, la
->la_nports
));
635 if (speed
> best_speed
||
636 (speed
== best_speed
&&
637 la
== lsc
->lsc_active_aggregator
)) {
643 KASSERT(best_la
== NULL
|| best_la
->la_nports
> 0);
644 KASSERT(best_la
== NULL
|| !TAILQ_EMPTY(&best_la
->la_ports
));
646 #if defined(LACP_DEBUG)
647 if (lsc
->lsc_active_aggregator
!= best_la
) {
648 LACP_DPRINTF((NULL
, "active aggregator changed\n"));
649 LACP_DPRINTF((NULL
, "old %s\n",
650 lacp_format_lagid_aggregator(lsc
->lsc_active_aggregator
,
653 LACP_DPRINTF((NULL
, "active aggregator not changed\n"));
655 LACP_DPRINTF((NULL
, "new %s\n",
656 lacp_format_lagid_aggregator(best_la
, buf
, sizeof(buf
))));
657 #endif /* defined(LACP_DEBUG) */
659 if (lsc
->lsc_active_aggregator
!= best_la
) {
660 lsc
->lsc_active_aggregator
= best_la
;
662 lacp_suppress_distributing(lsc
, best_la
);
668 lacp_compose_key(struct lacp_port
*lp
)
670 u_int media
= lp
->lp_media
;
673 KASSERT(IFM_TYPE(media
) == IFM_ETHER
);
675 if (!(lp
->lp_state
& LACP_STATE_AGGREGATION
)) {
678 * non-aggregatable links should have unique keys.
680 * XXX this isn't really unique as if_index is 16 bit.
683 /* bit 0..14: (some bits of) if_index of this port */
684 key
= lp
->lp_agrport
->port_ifp
->if_index
;
688 u_int subtype
= IFM_SUBTYPE(media
);
690 KASSERT((media
& IFM_HDX
) == 0); /* should be handled above */
691 KASSERT((subtype
& 0x1f) == subtype
);
693 /* bit 0..4: IFM_SUBTYPE */
695 /* bit 5..14: (some bits of) if_index of agr device */
696 key
|= 0x7fe0 & ((lp
->lp_agrport
->port_agrifp
->if_index
) << 5);