Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / uts / common / io / mac / mac_bcast.c
blob1ff33c3578df5e541852b9e88038cc1169fbcfbb
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/types.h>
27 #include <sys/sysmacros.h>
28 #include <sys/conf.h>
29 #include <sys/cmn_err.h>
30 #include <sys/list.h>
31 #include <sys/kmem.h>
32 #include <sys/stream.h>
33 #include <sys/modctl.h>
34 #include <sys/ddi.h>
35 #include <sys/sunddi.h>
36 #include <sys/atomic.h>
37 #include <sys/stat.h>
38 #include <sys/modhash.h>
39 #include <sys/strsubr.h>
40 #include <sys/strsun.h>
41 #include <sys/sdt.h>
42 #include <sys/mac.h>
43 #include <sys/mac_impl.h>
44 #include <sys/mac_client_impl.h>
45 #include <sys/mac_client_priv.h>
46 #include <sys/mac_flow_impl.h>
49 * Broadcast and multicast traffic must be distributed to the MAC clients
50 * that are defined on top of the same MAC. The set of
51 * destinations to which a multicast packet must be sent is a subset
52 * of all MAC clients defined on top of the MAC. A MAC client can be member
53 * of more than one such subset.
55 * To accomodate these requirements, we introduce broadcast groups.
56 * A broadcast group is associated with a broadcast or multicast
57 * address. The members of a broadcast group consist of the MAC clients
58 * that should received copies of packets sent to the address
59 * associated with the group, and are defined on top of the
60 * same MAC.
62 * The broadcast groups defined on top of a MAC are chained,
63 * hanging off the mac_impl_t. The broadcast group id's are
64 * unique globally (tracked by mac_bcast_id).
68 * The same MAC client may be added for different <addr,vid> tuple,
69 * we maintain a ref count for the number of times it has been added
70 * to account for deleting the MAC client from the group.
72 typedef struct mac_bcast_grp_mcip_s {
73 mac_client_impl_t *mgb_client;
74 int mgb_client_ref;
75 } mac_bcast_grp_mcip_t;
77 typedef struct mac_bcast_grp_s { /* Protected by */
78 struct mac_bcast_grp_s *mbg_next; /* SL */
79 void *mbg_addr; /* SL */
80 uint16_t mbg_vid; /* SL */
81 mac_impl_t *mbg_mac_impl; /* WO */
82 mac_addrtype_t mbg_addrtype; /* WO */
83 flow_entry_t *mbg_flow_ent; /* WO */
84 mac_bcast_grp_mcip_t *mbg_clients; /* mi_rw_lock */
85 uint_t mbg_nclients; /* mi_rw_lock */
86 uint_t mbg_nclients_alloc; /* SL */
87 uint64_t mbg_clients_gen; /* mi_rw_lock */
88 uint32_t mbg_id; /* atomic */
89 } mac_bcast_grp_t;
91 static kmem_cache_t *mac_bcast_grp_cache;
92 static uint32_t mac_bcast_id = 0;
94 void
95 mac_bcast_init(void)
97 mac_bcast_grp_cache = kmem_cache_create("mac_bcast_grp_cache",
98 sizeof (mac_bcast_grp_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
101 void
102 mac_bcast_fini(void)
104 kmem_cache_destroy(mac_bcast_grp_cache);
107 mac_impl_t *
108 mac_bcast_grp_mip(void *grp)
110 mac_bcast_grp_t *bcast_grp = grp;
112 return (bcast_grp->mbg_mac_impl);
116 * Free the specific broadcast group. Invoked when the last reference
117 * to the group is released.
119 void
120 mac_bcast_grp_free(void *bcast_grp)
122 mac_bcast_grp_t *grp = bcast_grp;
123 mac_impl_t *mip = grp->mbg_mac_impl;
125 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
127 ASSERT(grp->mbg_addr != NULL);
128 kmem_free(grp->mbg_addr, mip->mi_type->mt_addr_length);
129 kmem_free(grp->mbg_clients,
130 grp->mbg_nclients_alloc * sizeof (mac_bcast_grp_mcip_t));
131 mip->mi_bcast_ngrps--;
132 kmem_cache_free(mac_bcast_grp_cache, grp);
136 * arg1: broadcast group
137 * arg2: sender MAC client if it is being sent by a MAC client,
138 * NULL if it was received from the wire.
140 void
141 mac_bcast_send(void *arg1, void *arg2, mblk_t *mp_chain, boolean_t is_loopback)
143 mac_bcast_grp_t *grp = arg1;
144 mac_client_impl_t *src_mcip = arg2, *dst_mcip;
145 mac_impl_t *mip = grp->mbg_mac_impl;
146 uint64_t gen;
147 uint_t i;
148 mblk_t *mp_chain1;
149 flow_entry_t *flent;
150 int err;
152 rw_enter(&mip->mi_rw_lock, RW_READER);
155 * Pass a copy of the mp chain to every MAC client except the sender
156 * MAC client, if the packet was not received from the underlying NIC.
158 * The broadcast group lock should not be held across calls to
159 * the flow's callback function, since the same group could
160 * potentially be accessed from the same context. When the lock
161 * is reacquired, changes to the broadcast group while the lock
162 * was released are caught using a generation counter incremented
163 * each time the list of MAC clients associated with the broadcast
164 * group is changed.
166 for (i = 0; i < grp->mbg_nclients_alloc; i++) {
167 dst_mcip = grp->mbg_clients[i].mgb_client;
168 if (dst_mcip == NULL)
169 continue;
170 flent = dst_mcip->mci_flent;
171 if (flent == NULL || dst_mcip == src_mcip) {
173 * Don't send a copy of the packet back to
174 * its sender.
176 continue;
180 * It is important to hold a reference on the
181 * flow_ent here.
183 if ((mp_chain1 = mac_copymsgchain_cksum(mp_chain)) == NULL)
184 break;
186 * Fix the checksum for packets originating
187 * from the local machine.
189 if ((src_mcip != NULL) &&
190 (mp_chain1 = mac_fix_cksum(mp_chain1)) == NULL)
191 break;
193 FLOW_TRY_REFHOLD(flent, err);
194 if (err != 0) {
195 freemsgchain(mp_chain1);
196 continue;
199 gen = grp->mbg_clients_gen;
201 rw_exit(&mip->mi_rw_lock);
203 DTRACE_PROBE4(mac__bcast__send__to, mac_client_impl_t *,
204 src_mcip, flow_fn_t, dst_mcip->mci_flent->fe_cb_fn,
205 void *, dst_mcip->mci_flent->fe_cb_arg1,
206 void *, dst_mcip->mci_flent->fe_cb_arg2);
208 (dst_mcip->mci_flent->fe_cb_fn)(dst_mcip->mci_flent->fe_cb_arg1,
209 dst_mcip->mci_flent->fe_cb_arg2, mp_chain1, is_loopback);
210 FLOW_REFRELE(flent);
212 rw_enter(&mip->mi_rw_lock, RW_READER);
214 /* update stats */
215 if (grp->mbg_addrtype == MAC_ADDRTYPE_MULTICAST) {
216 MCIP_STAT_UPDATE(dst_mcip, multircv, 1);
217 MCIP_STAT_UPDATE(dst_mcip, multircvbytes,
218 msgdsize(mp_chain));
219 } else {
220 MCIP_STAT_UPDATE(dst_mcip, brdcstrcv, 1);
221 MCIP_STAT_UPDATE(dst_mcip, brdcstrcvbytes,
222 msgdsize(mp_chain));
225 if (grp->mbg_clients_gen != gen) {
227 * The list of MAC clients associated with the group
228 * was changed while the lock was released.
229 * Give up on the current packet.
231 rw_exit(&mip->mi_rw_lock);
232 freemsgchain(mp_chain);
233 return;
236 rw_exit(&mip->mi_rw_lock);
238 if (src_mcip != NULL) {
240 * The packet was sent from one of the MAC clients,
241 * so we need to send a copy of the packet to the
242 * underlying NIC so that it can be sent on the wire.
244 MCIP_STAT_UPDATE(src_mcip, multixmt, 1);
245 MCIP_STAT_UPDATE(src_mcip, multixmtbytes, msgdsize(mp_chain));
246 MCIP_STAT_UPDATE(src_mcip, brdcstxmt, 1);
247 MCIP_STAT_UPDATE(src_mcip, brdcstxmtbytes, msgdsize(mp_chain));
249 MAC_TX(mip, mip->mi_default_tx_ring, mp_chain, src_mcip);
250 if (mp_chain != NULL)
251 freemsgchain(mp_chain);
252 } else {
253 freemsgchain(mp_chain);
258 * Add the specified MAC client to the group corresponding to the specified
259 * broadcast or multicast address.
260 * Return 0 on success, or an errno value on failure.
263 mac_bcast_add(mac_client_impl_t *mcip, const uint8_t *addr, uint16_t vid,
264 mac_addrtype_t addrtype)
266 mac_impl_t *mip = mcip->mci_mip;
267 mac_bcast_grp_t *grp = NULL, **last_grp;
268 size_t addr_len = mip->mi_type->mt_addr_length;
269 int rc = 0;
270 int i, index = -1;
271 mac_mcast_addrs_t **prev_mi_addr = NULL;
272 mac_mcast_addrs_t **prev_mci_addr = NULL;
274 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
276 ASSERT(addrtype == MAC_ADDRTYPE_MULTICAST ||
277 addrtype == MAC_ADDRTYPE_BROADCAST);
280 * Add the MAC client to the list of MAC clients associated
281 * with the group.
283 if (addrtype == MAC_ADDRTYPE_MULTICAST) {
284 mac_mcast_addrs_t *maddr;
287 * In case of a driver (say aggr), we need this information
288 * on a per MAC instance basis.
290 prev_mi_addr = &mip->mi_mcast_addrs;
291 for (maddr = *prev_mi_addr; maddr != NULL;
292 prev_mi_addr = &maddr->mma_next, maddr = maddr->mma_next) {
293 if (bcmp(maddr->mma_addr, addr, addr_len) == 0)
294 break;
296 if (maddr == NULL) {
298 * For multicast addresses, have the underlying MAC
299 * join the corresponding multicast group.
301 rc = mip->mi_multicst(mip->mi_driver, B_TRUE, addr);
302 if (rc != 0)
303 return (rc);
304 maddr = kmem_zalloc(sizeof (mac_mcast_addrs_t),
305 KM_SLEEP);
306 bcopy(addr, maddr->mma_addr, addr_len);
307 *prev_mi_addr = maddr;
308 } else {
309 prev_mi_addr = NULL;
311 maddr->mma_ref++;
314 * We maintain a separate list for each MAC client. Get
315 * the entry or add, if it is not present.
317 prev_mci_addr = &mcip->mci_mcast_addrs;
318 for (maddr = *prev_mci_addr; maddr != NULL;
319 prev_mci_addr = &maddr->mma_next, maddr = maddr->mma_next) {
320 if (bcmp(maddr->mma_addr, addr, addr_len) == 0)
321 break;
323 if (maddr == NULL) {
324 maddr = kmem_zalloc(sizeof (mac_mcast_addrs_t),
325 KM_SLEEP);
326 bcopy(addr, maddr->mma_addr, addr_len);
327 *prev_mci_addr = maddr;
328 } else {
329 prev_mci_addr = NULL;
331 maddr->mma_ref++;
334 /* The list is protected by the perimeter */
335 last_grp = &mip->mi_bcast_grp;
336 for (grp = *last_grp; grp != NULL;
337 last_grp = &grp->mbg_next, grp = grp->mbg_next) {
338 if (bcmp(grp->mbg_addr, addr, addr_len) == 0 &&
339 grp->mbg_vid == vid)
340 break;
343 if (grp == NULL) {
345 * The group does not yet exist, create it.
347 flow_desc_t flow_desc;
348 char flow_name[MAXFLOWNAMELEN];
350 grp = kmem_cache_alloc(mac_bcast_grp_cache, KM_SLEEP);
351 bzero(grp, sizeof (mac_bcast_grp_t));
352 grp->mbg_next = NULL;
353 grp->mbg_mac_impl = mip;
355 DTRACE_PROBE1(mac__bcast__add__new__group, mac_bcast_grp_t *,
356 grp);
358 grp->mbg_addr = kmem_zalloc(addr_len, KM_SLEEP);
359 bcopy(addr, grp->mbg_addr, addr_len);
360 grp->mbg_addrtype = addrtype;
361 grp->mbg_vid = vid;
364 * Add a new flow to the underlying MAC.
366 bzero(&flow_desc, sizeof (flow_desc));
367 bcopy(addr, &flow_desc.fd_dst_mac, addr_len);
368 flow_desc.fd_mac_len = (uint32_t)addr_len;
370 flow_desc.fd_mask = FLOW_LINK_DST;
371 if (vid != 0) {
372 flow_desc.fd_vid = vid;
373 flow_desc.fd_mask |= FLOW_LINK_VID;
376 grp->mbg_id = atomic_inc_32_nv(&mac_bcast_id);
377 (void) sprintf(flow_name,
378 "mac/%s/mcast%d", mip->mi_name, grp->mbg_id);
380 rc = mac_flow_create(&flow_desc, NULL, flow_name,
381 grp, FLOW_MCAST, &grp->mbg_flow_ent);
382 if (rc != 0) {
383 kmem_free(grp->mbg_addr, addr_len);
384 kmem_cache_free(mac_bcast_grp_cache, grp);
385 goto fail;
387 grp->mbg_flow_ent->fe_mbg = grp;
388 mip->mi_bcast_ngrps++;
391 * Initial creation reference on the flow. This is released
392 * in the corresponding delete action i_mac_bcast_delete()
394 FLOW_REFHOLD(grp->mbg_flow_ent);
397 * When the multicast and broadcast packet is received
398 * by the underlying NIC, mac_rx_classify() will invoke
399 * mac_bcast_send() with arg2=NULL, which will cause
400 * mac_bcast_send() to send a copy of the packet(s)
401 * to every MAC client opened on top of the underlying MAC.
403 * When the mac_bcast_send() function is invoked from
404 * the transmit path of a MAC client, it will specify the
405 * transmitting MAC client as the arg2 value, which will
406 * allow mac_bcast_send() to skip that MAC client and not
407 * send it a copy of the packet.
409 * We program the classifier to dispatch matching broadcast
410 * packets to mac_bcast_send().
413 grp->mbg_flow_ent->fe_cb_fn = mac_bcast_send;
414 grp->mbg_flow_ent->fe_cb_arg1 = grp;
415 grp->mbg_flow_ent->fe_cb_arg2 = NULL;
417 rc = mac_flow_add(mip->mi_flow_tab, grp->mbg_flow_ent);
418 if (rc != 0) {
419 FLOW_FINAL_REFRELE(grp->mbg_flow_ent);
420 goto fail;
423 *last_grp = grp;
426 ASSERT(grp->mbg_addrtype == addrtype);
429 * Add the MAC client to the list of MAC clients associated
430 * with the group.
432 rw_enter(&mip->mi_rw_lock, RW_WRITER);
433 for (i = 0; i < grp->mbg_nclients_alloc; i++) {
435 * The MAC client was already added, say when we have
436 * different unicast addresses with the same vid.
437 * Just increment the ref and we are done.
439 if (grp->mbg_clients[i].mgb_client == mcip) {
440 grp->mbg_clients[i].mgb_client_ref++;
441 rw_exit(&mip->mi_rw_lock);
442 return (0);
443 } else if (grp->mbg_clients[i].mgb_client == NULL &&
444 index == -1) {
445 index = i;
448 if (grp->mbg_nclients_alloc == grp->mbg_nclients) {
449 mac_bcast_grp_mcip_t *new_clients;
450 uint_t new_size = grp->mbg_nclients+1;
452 new_clients = kmem_zalloc(new_size *
453 sizeof (mac_bcast_grp_mcip_t), KM_SLEEP);
455 if (grp->mbg_nclients > 0) {
456 ASSERT(grp->mbg_clients != NULL);
457 bcopy(grp->mbg_clients, new_clients, grp->mbg_nclients *
458 sizeof (mac_bcast_grp_mcip_t));
459 kmem_free(grp->mbg_clients, grp->mbg_nclients *
460 sizeof (mac_bcast_grp_mcip_t));
463 grp->mbg_clients = new_clients;
464 grp->mbg_nclients_alloc = new_size;
465 index = new_size - 1;
468 ASSERT(index != -1);
469 grp->mbg_clients[index].mgb_client = mcip;
470 grp->mbg_clients[index].mgb_client_ref = 1;
471 grp->mbg_nclients++;
473 * Since we're adding to the list of MAC clients using that group,
474 * kick the generation count, which will allow mac_bcast_send()
475 * to detect that condition after re-acquiring the lock.
477 grp->mbg_clients_gen++;
478 rw_exit(&mip->mi_rw_lock);
479 return (0);
481 fail:
482 if (prev_mi_addr != NULL) {
483 kmem_free(*prev_mi_addr, sizeof (mac_mcast_addrs_t));
484 *prev_mi_addr = NULL;
485 (void) mip->mi_multicst(mip->mi_driver, B_FALSE, addr);
487 if (prev_mci_addr != NULL) {
488 kmem_free(*prev_mci_addr, sizeof (mac_mcast_addrs_t));
489 *prev_mci_addr = NULL;
491 return (rc);
495 * Remove the specified MAC client from the group corresponding to
496 * the specific broadcast or multicast address.
498 * Note: mac_bcast_delete() calls mac_remove_flow() which
499 * will call cv_wait for fe_refcnt to drop to 0. So this function
500 * should not be called from interrupt or STREAMS context.
502 void
503 mac_bcast_delete(mac_client_impl_t *mcip, const uint8_t *addr, uint16_t vid)
505 mac_impl_t *mip = mcip->mci_mip;
506 mac_bcast_grp_t *grp = NULL, **prev;
507 size_t addr_len = mip->mi_type->mt_addr_length;
508 flow_entry_t *flent;
509 uint_t i;
510 mac_mcast_addrs_t *maddr = NULL;
511 mac_mcast_addrs_t **mprev;
513 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
515 /* find the broadcast group. The list is protected by the perimeter */
516 prev = &mip->mi_bcast_grp;
517 for (grp = mip->mi_bcast_grp; grp != NULL; prev = &grp->mbg_next,
518 grp = grp->mbg_next) {
519 if (bcmp(grp->mbg_addr, addr, addr_len) == 0 &&
520 grp->mbg_vid == vid)
521 break;
523 ASSERT(grp != NULL);
526 * Remove the MAC client from the list of MAC clients associated
527 * with that broadcast group.
529 * We mark the mbg_clients[] location corresponding to the removed MAC
530 * client NULL and reuse that location when we add a new MAC client.
533 rw_enter(&mip->mi_rw_lock, RW_WRITER);
535 for (i = 0; i < grp->mbg_nclients_alloc; i++) {
536 if (grp->mbg_clients[i].mgb_client == mcip)
537 break;
540 ASSERT(i < grp->mbg_nclients_alloc);
542 * If there are more references to this MAC client, then we let
543 * it remain till it goes to 0.
545 if (--grp->mbg_clients[i].mgb_client_ref > 0)
546 goto update_maddr;
548 grp->mbg_clients[i].mgb_client = NULL;
549 grp->mbg_clients[i].mgb_client_ref = 0;
552 * Since we're removing from the list of MAC clients using that group,
553 * kick the generation count, which will allow mac_bcast_send()
554 * to detect that condition.
556 grp->mbg_clients_gen++;
558 if (--grp->mbg_nclients == 0) {
560 * The last MAC client of the group was just removed.
561 * Unlink the current group from the list of groups
562 * defined on top of the underlying NIC. The group
563 * structure will stay around until the last reference
564 * is dropped.
566 *prev = grp->mbg_next;
568 update_maddr:
569 rw_exit(&mip->mi_rw_lock);
571 if (grp->mbg_addrtype == MAC_ADDRTYPE_MULTICAST) {
572 mprev = &mcip->mci_mcast_addrs;
573 for (maddr = mcip->mci_mcast_addrs; maddr != NULL;
574 mprev = &maddr->mma_next, maddr = maddr->mma_next) {
575 if (bcmp(grp->mbg_addr, maddr->mma_addr,
576 mip->mi_type->mt_addr_length) == 0)
577 break;
579 ASSERT(maddr != NULL);
580 if (--maddr->mma_ref == 0) {
581 *mprev = maddr->mma_next;
582 maddr->mma_next = NULL;
583 kmem_free(maddr, sizeof (mac_mcast_addrs_t));
586 mprev = &mip->mi_mcast_addrs;
587 for (maddr = mip->mi_mcast_addrs; maddr != NULL;
588 mprev = &maddr->mma_next, maddr = maddr->mma_next) {
589 if (bcmp(grp->mbg_addr, maddr->mma_addr,
590 mip->mi_type->mt_addr_length) == 0)
591 break;
593 ASSERT(maddr != NULL);
594 if (--maddr->mma_ref == 0) {
595 (void) mip->mi_multicst(mip->mi_driver, B_FALSE, addr);
596 *mprev = maddr->mma_next;
597 maddr->mma_next = NULL;
598 kmem_free(maddr, sizeof (mac_mcast_addrs_t));
603 * If the group itself is being removed, remove the
604 * corresponding flow from the underlying NIC.
606 flent = grp->mbg_flow_ent;
607 if (grp->mbg_nclients == 0) {
608 mac_flow_remove(mip->mi_flow_tab, flent, B_FALSE);
609 mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
610 FLOW_FINAL_REFRELE(flent);
615 * This will be called by a driver, such as aggr, when a port is added/removed
616 * to add/remove the port to/from all the multcast addresses for that aggr.
618 void
619 mac_bcast_refresh(mac_impl_t *mip, mac_multicst_t refresh_fn, void *arg,
620 boolean_t add)
622 mac_mcast_addrs_t *grp, *next;
624 ASSERT(refresh_fn != NULL);
626 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
629 * Walk the multicast address list and call the refresh function for
630 * each address.
633 for (grp = mip->mi_mcast_addrs; grp != NULL; grp = next) {
635 * Save the next pointer just in case the refresh
636 * function's action causes the group entry to be
637 * freed.
638 * We won't be adding to this list as part of the
639 * refresh.
641 next = grp->mma_next;
642 refresh_fn(arg, add, grp->mma_addr);
647 * Walk the MAC client's multicast address list and add/remove the addr/vid
648 * ('arg' is 'flent') to all the addresses.
650 void
651 mac_client_bcast_refresh(mac_client_impl_t *mcip, mac_multicst_t refresh_fn,
652 void *arg, boolean_t add)
654 mac_mcast_addrs_t *grp, *next;
655 mac_impl_t *mip = mcip->mci_mip;
657 ASSERT(refresh_fn != NULL);
659 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
661 * Walk the multicast address list and call the refresh function for
662 * each address.
663 * Broadcast addresses are not added or removed through the multicast
664 * entry points, so don't include them as part of the refresh.
666 for (grp = mcip->mci_mcast_addrs; grp != NULL; grp = next) {
668 * Save the next pointer just in case the refresh
669 * function's action causes the group entry to be
670 * freed.
671 * We won't be adding to this list as part of the
672 * refresh.
674 next = grp->mma_next;
675 refresh_fn(arg, add, grp->mma_addr);