Expand PMF_FN_* macros.
[netbsd-mini2440.git] / sys / net / agr / ieee8023ad_lacp.c
blob7c9002d7fb9ce917ff465e590cf5b038f468fe1b
1 /* $NetBSD: ieee8023ad_lacp.c,v 1.8 2007/08/26 22:59:09 dyoung Exp $ */
3 /*-
4 * Copyright (c)2005 YAMAMOTO Takashi,
5 * All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: ieee8023ad_lacp.c,v 1.8 2007/08/26 22:59:09 dyoung Exp $");
32 #include <sys/param.h>
33 #include <sys/callout.h>
34 #include <sys/mbuf.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/kernel.h> /* hz */
39 #include <net/if.h>
40 #include <net/if_dl.h>
41 #include <net/if_ether.h>
42 #include <net/if_media.h>
44 #include <net/agr/if_agrvar_impl.h>
45 #include <net/agr/if_agrsubr.h>
46 #include <net/agr/ieee8023_slowprotocols.h>
47 #include <net/agr/ieee8023_tlv.h>
48 #include <net/agr/ieee8023ad.h>
49 #include <net/agr/ieee8023ad_lacp.h>
50 #include <net/agr/ieee8023ad_lacp_impl.h>
51 #include <net/agr/ieee8023ad_impl.h>
52 #include <net/agr/ieee8023ad_lacp_sm.h>
53 #include <net/agr/ieee8023ad_lacp_debug.h>
55 static void lacp_fill_actorinfo(struct agr_port *, struct lacp_peerinfo *);
57 static uint64_t lacp_aggregator_bandwidth(struct lacp_aggregator *);
58 static void lacp_suppress_distributing(struct lacp_softc *,
59 struct lacp_aggregator *);
60 static void lacp_transit_expire(void *);
61 static void lacp_select_active_aggregator(struct lacp_softc *);
62 static uint16_t lacp_compose_key(struct lacp_port *);
65 * actor system priority and port priority.
66 * XXX should be configurable.
69 #define LACP_SYSTEM_PRIO 0x8000
70 #define LACP_PORT_PRIO 0x8000
72 static const struct tlv_template lacp_info_tlv_template[] = {
73 { LACP_TYPE_ACTORINFO,
74 sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
75 { LACP_TYPE_PARTNERINFO,
76 sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
77 { LACP_TYPE_COLLECTORINFO,
78 sizeof(struct tlvhdr) + sizeof(struct lacp_collectorinfo) },
79 { 0, 0 },
83 * ieee8023ad_lacp_input: process lacpdu
85 * => called from ether_input. (ie. at IPL_NET)
87 * XXX is it better to defer processing to lower IPL?
88 * XXX anyway input rate should be very low...
91 int
92 ieee8023ad_lacp_input(struct ifnet *ifp, struct mbuf *m)
94 struct lacpdu *du;
95 struct agr_softc *sc;
96 struct agr_port *port;
97 struct lacp_port *lp;
98 int error = 0;
100 port = ifp->if_agrprivate; /* XXX race with agr_remport. */
101 if (__predict_false(port->port_flags & AGRPORT_DETACHING)) {
102 goto bad;
105 sc = AGR_SC_FROM_PORT(port);
106 KASSERT(port);
108 /* running static config? */
109 if (AGR_STATIC(sc)) {
110 /* static config, no lacp */
111 goto bad;
115 if (m->m_pkthdr.len != sizeof(*du)) {
116 goto bad;
119 if ((m->m_flags & M_MCAST) == 0) {
120 goto bad;
123 if (m->m_len < sizeof(*du)) {
124 m = m_pullup(m, sizeof(*du));
125 if (m == NULL) {
126 return ENOMEM;
130 du = mtod(m, struct lacpdu *);
132 if (memcmp(&du->ldu_eh.ether_dhost,
133 &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
134 goto bad;
137 KASSERT(du->ldu_sph.sph_subtype == SLOWPROTOCOLS_SUBTYPE_LACP);
140 * ignore the version for compatibility with
141 * the future protocol revisions.
144 #if 0
145 if (du->ldu_sph.sph_version != 1) {
146 goto bad;
148 #endif
151 * ignore tlv types for compatibility with
152 * the future protocol revisions.
155 if (tlv_check(du, sizeof(*du), &du->ldu_tlv_actor,
156 lacp_info_tlv_template, false)) {
157 goto bad;
160 AGR_LOCK(sc);
161 lp = LACP_PORT(port);
163 #if defined(LACP_DEBUG)
164 if (lacpdebug) {
165 LACP_DPRINTF((lp, "lacpdu receive\n"));
166 lacp_dump_lacpdu(du);
168 #endif /* defined(LACP_DEBUG) */
169 lacp_sm_rx(lp, du);
171 AGR_UNLOCK(sc);
173 m_freem(m);
175 return error;
177 bad:
178 m_freem(m);
179 return EINVAL;
182 static void
183 lacp_fill_actorinfo(struct agr_port *port, struct lacp_peerinfo *info)
185 struct lacp_port *lp = LACP_PORT(port);
187 info->lip_systemid.lsi_prio = htobe16(LACP_SYSTEM_PRIO);
188 memcpy(&info->lip_systemid.lsi_mac,
189 CLLADDR(port->port_ifp->if_sadl), ETHER_ADDR_LEN);
190 info->lip_portid.lpi_prio = htobe16(LACP_PORT_PRIO);
191 info->lip_portid.lpi_portno = htobe16(port->port_ifp->if_index);
192 info->lip_state = lp->lp_state;
196 lacp_xmit_lacpdu(struct lacp_port *lp)
198 struct agr_port *port = lp->lp_agrport;
199 struct mbuf *m;
200 struct lacpdu *du;
201 int error;
203 /* running static config? */
204 if (AGR_STATIC(AGR_SC_FROM_PORT(port))) {
205 /* static config, no lacp transmit */
206 return 0;
209 KDASSERT(MHLEN >= sizeof(*du));
211 m = m_gethdr(M_DONTWAIT, MT_DATA);
212 if (m == NULL) {
213 return ENOMEM;
215 m->m_len = m->m_pkthdr.len = sizeof(*du);
217 du = mtod(m, struct lacpdu *);
218 memset(du, 0, sizeof(*du));
220 memcpy(&du->ldu_eh.ether_dhost, ethermulticastaddr_slowprotocols,
221 ETHER_ADDR_LEN);
222 memcpy(&du->ldu_eh.ether_shost, &port->port_origlladdr, ETHER_ADDR_LEN);
223 du->ldu_eh.ether_type = htobe16(ETHERTYPE_SLOWPROTOCOLS);
225 du->ldu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP;
226 du->ldu_sph.sph_version = 1;
228 TLV_SET(&du->ldu_tlv_actor, LACP_TYPE_ACTORINFO, sizeof(du->ldu_actor));
229 du->ldu_actor = lp->lp_actor;
231 TLV_SET(&du->ldu_tlv_partner, LACP_TYPE_PARTNERINFO,
232 sizeof(du->ldu_partner));
233 du->ldu_partner = lp->lp_partner;
235 TLV_SET(&du->ldu_tlv_collector, LACP_TYPE_COLLECTORINFO,
236 sizeof(du->ldu_collector));
237 du->ldu_collector.lci_maxdelay = 0;
239 #if defined(LACP_DEBUG)
240 if (lacpdebug) {
241 LACP_DPRINTF((lp, "lacpdu transmit\n"));
242 lacp_dump_lacpdu(du);
244 #endif /* defined(LACP_DEBUG) */
246 m->m_flags |= M_MCAST;
249 * XXX should use higher priority queue.
250 * otherwise network congestion can break aggregation.
253 error = agr_xmit_frame(port->port_ifp, m);
254 return error;
257 void
258 ieee8023ad_lacp_portstate(struct agr_port *port)
260 struct lacp_port *lp = LACP_PORT(port);
261 u_int media = port->port_media;
262 uint8_t old_state;
263 uint16_t old_key;
265 AGR_ASSERT_LOCKED(AGR_SC_FROM_PORT(port));
267 LACP_DPRINTF((lp, "media changed 0x%x -> 0x%x\n", lp->lp_media, media));
269 old_state = lp->lp_state;
270 old_key = lp->lp_key;
272 lp->lp_media = media;
273 if ((media & IFM_HDX) != 0) {
274 lp->lp_state &= ~LACP_STATE_AGGREGATION;
275 } else {
276 lp->lp_state |= LACP_STATE_AGGREGATION;
278 lp->lp_key = lacp_compose_key(lp);
280 if (old_state != lp->lp_state || old_key != lp->lp_key) {
281 LACP_DPRINTF((lp, "-> UNSELECTED\n"));
282 lp->lp_selected = LACP_UNSELECTED;
286 void
287 ieee8023ad_lacp_porttick(struct agr_softc *sc, struct agr_port *port)
289 struct lacp_port *lp = LACP_PORT(port);
291 AGR_ASSERT_LOCKED(sc);
293 lacp_run_timers(lp);
295 lacp_select(lp);
296 lacp_sm_mux(lp);
297 lacp_sm_tx(lp);
298 lacp_sm_ptx_tx_schedule(lp);
301 void
302 lacp_portinit(struct agr_port *port)
304 struct lacp_port *lp = LACP_PORT(port);
305 bool active = true; /* XXX should be configurable */
306 bool fast = false; /* XXX should be configurable */
308 lp->lp_agrport = port;
309 lacp_fill_actorinfo(port, &lp->lp_actor);
310 lp->lp_state =
311 (active ? LACP_STATE_ACTIVITY : 0) |
312 (fast ? LACP_STATE_TIMEOUT : 0);
313 lp->lp_aggregator = NULL;
314 lp->lp_media = port->port_media; /* XXX */
315 lp->lp_key = lacp_compose_key(lp);
316 lacp_sm_rx_set_expired(lp);
319 void
320 lacp_portfini(struct agr_port *port)
322 struct lacp_port *lp = LACP_PORT(port);
323 struct lacp_aggregator *la = lp->lp_aggregator;
324 int i;
326 LACP_DPRINTF((lp, "portfini\n"));
328 for (i = 0; i < LACP_NTIMER; i++) {
329 LACP_TIMER_DISARM(lp, i);
332 if (la == NULL) {
333 return;
336 lacp_disable_distributing(lp);
337 lacp_unselect(lp);
340 /* -------------------- */
341 void
342 lacp_disable_collecting(struct lacp_port *lp)
344 struct agr_port *port = lp->lp_agrport;
346 lp->lp_state &= ~LACP_STATE_COLLECTING;
347 port->port_flags &= ~AGRPORT_COLLECTING;
350 void
351 lacp_enable_collecting(struct lacp_port *lp)
353 struct agr_port *port = lp->lp_agrport;
355 lp->lp_state |= LACP_STATE_COLLECTING;
356 port->port_flags |= AGRPORT_COLLECTING;
359 void
360 lacp_disable_distributing(struct lacp_port *lp)
362 struct agr_port *port = lp->lp_agrport;
363 struct lacp_aggregator *la = lp->lp_aggregator;
364 struct lacp_softc *lsc = LACP_SOFTC(AGR_SC_FROM_PORT(port));
365 #if defined(LACP_DEBUG)
366 char buf[LACP_LAGIDSTR_MAX+1];
367 #endif /* defined(LACP_DEBUG) */
369 if ((lp->lp_state & LACP_STATE_DISTRIBUTING) == 0) {
370 return;
373 KASSERT(la);
374 KASSERT(!TAILQ_EMPTY(&la->la_ports));
375 KASSERT(la->la_nports > 0);
376 KASSERT(la->la_refcnt >= la->la_nports);
378 LACP_DPRINTF((lp, "disable distributing on aggregator %s, "
379 "nports %d -> %d\n",
380 lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
381 la->la_nports, la->la_nports - 1));
383 TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q);
384 la->la_nports--;
386 lacp_suppress_distributing(lsc, la);
388 lp->lp_state &= ~LACP_STATE_DISTRIBUTING;
389 port->port_flags &= ~AGRPORT_DISTRIBUTING;
391 if (lsc->lsc_active_aggregator == la) {
392 lacp_select_active_aggregator(lsc);
396 void
397 lacp_enable_distributing(struct lacp_port *lp)
399 struct agr_port *port = lp->lp_agrport;
400 struct lacp_aggregator *la = lp->lp_aggregator;
401 struct lacp_softc *lsc = LACP_SOFTC(AGR_SC_FROM_PORT(port));
402 #if defined(LACP_DEBUG)
403 char buf[LACP_LAGIDSTR_MAX+1];
404 #endif /* defined(LACP_DEBUG) */
406 if ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0) {
407 return;
410 KASSERT(la);
412 LACP_DPRINTF((lp, "enable distributing on aggregator %s, "
413 "nports %d -> %d\n",
414 lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
415 la->la_nports, la->la_nports + 1));
417 KASSERT(la->la_refcnt > la->la_nports);
418 TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q);
419 la->la_nports++;
421 lacp_suppress_distributing(lsc, la);
423 lp->lp_state |= LACP_STATE_DISTRIBUTING;
424 port->port_flags |= AGRPORT_DISTRIBUTING;
426 if (lsc->lsc_active_aggregator != la) {
427 lacp_select_active_aggregator(lsc);
431 static void
432 lacp_transit_expire(void *vp)
434 struct agr_softc *sc = vp;
435 struct lacp_softc *lsc = LACP_SOFTC(sc);
437 AGR_LOCK(sc);
438 LACP_DPRINTF((NULL, "%s\n", __func__));
439 lsc->lsc_suppress_distributing = false;
440 AGR_UNLOCK(sc);
443 /* -------------------- */
444 /* XXX */
445 void
446 ieee8023ad_portinit(struct agr_port *port)
448 struct ieee8023ad_port *iport = IEEE8023AD_PORT(port);
450 memset(iport, 0, sizeof(iport));
452 lacp_portinit(port);
455 void
456 ieee8023ad_portfini(struct agr_port *port)
458 struct agr_softc *sc = AGR_SC_FROM_PORT(port);
460 AGR_LOCK(sc);
462 lacp_portfini(port);
464 AGR_UNLOCK(sc);
467 void
468 ieee8023ad_ctor(struct agr_softc *sc)
470 struct ieee8023ad_softc *isc = IEEE8023AD_SOFTC(sc);
471 struct lacp_softc *lsc = &isc->isc_lacpsc;
473 lsc->lsc_active_aggregator = NULL;
474 TAILQ_INIT(&lsc->lsc_aggregators);
475 callout_init(&lsc->lsc_transit_callout, 0);
476 callout_setfunc(&lsc->lsc_transit_callout, lacp_transit_expire, sc);
479 void
480 ieee8023ad_dtor(struct agr_softc *sc)
482 struct ieee8023ad_softc *isc = IEEE8023AD_SOFTC(sc);
483 struct lacp_softc *lsc = &isc->isc_lacpsc;
485 LACP_DPRINTF((NULL, "%s\n", __func__));
487 callout_stop(&lsc->lsc_transit_callout);
488 KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators));
489 KASSERT(lsc->lsc_active_aggregator == NULL);
492 /* -------------------- */
494 struct agr_port *
495 ieee8023ad_select_tx_port(struct agr_softc *sc, struct mbuf *m)
497 const struct lacp_softc *lsc = LACP_SOFTC(sc);
498 const struct lacp_aggregator *la;
499 const struct lacp_port *lp;
500 uint32_t hash;
501 int nports;
503 if (__predict_false(lsc->lsc_suppress_distributing &&
504 !AGR_ROUNDROBIN(sc))) {
505 LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
506 sc->sc_if.if_collisions++; /* XXX abuse */
507 return NULL;
510 la = lsc->lsc_active_aggregator;
511 if (__predict_false(la == NULL)) {
512 LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__));
513 return NULL;
516 nports = la->la_nports;
517 KASSERT(nports > 0);
519 if (AGR_ROUNDROBIN(sc)) {
520 /* packet ordering rule violation */
521 hash = sc->sc_rr_counter++;
522 } else {
523 hash = (*sc->sc_iftop->iftop_hashmbuf)(sc, m);
525 hash %= nports;
526 lp = TAILQ_FIRST(&la->la_ports);
527 KASSERT(lp != NULL);
528 while (hash--) {
529 lp = TAILQ_NEXT(lp, lp_dist_q);
530 KASSERT(lp != NULL);
533 KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0);
535 return lp->lp_agrport;
539 * lacp_suppress_distributing: drop transmit packets for a while
540 * to preserve packet ordering.
543 static void
544 lacp_suppress_distributing(struct lacp_softc *lsc, struct lacp_aggregator *la)
547 if (lsc->lsc_active_aggregator != la) {
548 return;
551 LACP_DPRINTF((NULL, "%s\n", __func__));
552 lsc->lsc_suppress_distributing = true;
553 /* XXX should consider collector max delay */
554 callout_schedule(&lsc->lsc_transit_callout,
555 LACP_TRANSIT_DELAY * hz / 1000);
558 /* -------------------- */
561 lacp_compare_peerinfo(const struct lacp_peerinfo *a,
562 const struct lacp_peerinfo *b)
565 return memcmp(a, b, offsetof(struct lacp_peerinfo, lip_state));
569 lacp_compare_systemid(const struct lacp_systemid *a,
570 const struct lacp_systemid *b)
573 return memcmp(a, b, sizeof(*a));
577 lacp_compare_portid(const struct lacp_portid *a,
578 const struct lacp_portid *b)
581 return memcmp(a, b, sizeof(*a));
584 /* -------------------- */
586 static uint64_t
587 lacp_aggregator_bandwidth(struct lacp_aggregator *la)
589 struct lacp_port *lp;
590 uint64_t speed;
592 lp = TAILQ_FIRST(&la->la_ports);
593 if (lp == NULL) {
594 return 0;
597 speed = ifmedia_baudrate(lp->lp_media);
598 speed *= la->la_nports;
599 if (speed == 0) {
600 LACP_DPRINTF((lp, "speed 0? media=0x%x nports=%d\n",
601 lp->lp_media, la->la_nports));
604 return speed;
608 * lacp_select_active_aggregator: select an aggregator to be used to transmit
609 * packets from agr(4) interface.
612 static void
613 lacp_select_active_aggregator(struct lacp_softc *lsc)
615 struct lacp_aggregator *la;
616 struct lacp_aggregator *best_la = NULL;
617 uint64_t best_speed = 0;
618 #if defined(LACP_DEBUG)
619 char buf[LACP_LAGIDSTR_MAX+1];
620 #endif /* defined(LACP_DEBUG) */
622 LACP_DPRINTF((NULL, "%s:\n", __func__));
624 TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
625 uint64_t speed;
627 if (la->la_nports == 0) {
628 continue;
631 speed = lacp_aggregator_bandwidth(la);
632 LACP_DPRINTF((NULL, "%s, speed=%" PRIu64 ", nports=%d\n",
633 lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
634 speed, la->la_nports));
635 if (speed > best_speed ||
636 (speed == best_speed &&
637 la == lsc->lsc_active_aggregator)) {
638 best_la = la;
639 best_speed = speed;
643 KASSERT(best_la == NULL || best_la->la_nports > 0);
644 KASSERT(best_la == NULL || !TAILQ_EMPTY(&best_la->la_ports));
646 #if defined(LACP_DEBUG)
647 if (lsc->lsc_active_aggregator != best_la) {
648 LACP_DPRINTF((NULL, "active aggregator changed\n"));
649 LACP_DPRINTF((NULL, "old %s\n",
650 lacp_format_lagid_aggregator(lsc->lsc_active_aggregator,
651 buf, sizeof(buf))));
652 } else {
653 LACP_DPRINTF((NULL, "active aggregator not changed\n"));
655 LACP_DPRINTF((NULL, "new %s\n",
656 lacp_format_lagid_aggregator(best_la, buf, sizeof(buf))));
657 #endif /* defined(LACP_DEBUG) */
659 if (lsc->lsc_active_aggregator != best_la) {
660 lsc->lsc_active_aggregator = best_la;
661 if (best_la) {
662 lacp_suppress_distributing(lsc, best_la);
667 uint16_t
668 lacp_compose_key(struct lacp_port *lp)
670 u_int media = lp->lp_media;
671 uint16_t key;
673 KASSERT(IFM_TYPE(media) == IFM_ETHER);
675 if (!(lp->lp_state & LACP_STATE_AGGREGATION)) {
678 * non-aggregatable links should have unique keys.
680 * XXX this isn't really unique as if_index is 16 bit.
683 /* bit 0..14: (some bits of) if_index of this port */
684 key = lp->lp_agrport->port_ifp->if_index;
685 /* bit 15: 1 */
686 key |= 0x8000;
687 } else {
688 u_int subtype = IFM_SUBTYPE(media);
690 KASSERT((media & IFM_HDX) == 0); /* should be handled above */
691 KASSERT((subtype & 0x1f) == subtype);
693 /* bit 0..4: IFM_SUBTYPE */
694 key = subtype;
695 /* bit 5..14: (some bits of) if_index of agr device */
696 key |= 0x7fe0 & ((lp->lp_agrport->port_agrifp->if_index) << 5);
697 /* bit 15: 0 */
700 return htobe16(key);