4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/types.h>
27 #include <sys/sysmacros.h>
29 #include <sys/cmn_err.h>
32 #include <sys/stream.h>
33 #include <sys/modctl.h>
35 #include <sys/sunddi.h>
36 #include <sys/atomic.h>
38 #include <sys/modhash.h>
39 #include <sys/strsubr.h>
40 #include <sys/strsun.h>
43 #include <sys/mac_impl.h>
44 #include <sys/mac_client_impl.h>
45 #include <sys/mac_client_priv.h>
46 #include <sys/mac_flow_impl.h>
49 * Broadcast and multicast traffic must be distributed to the MAC clients
50 * that are defined on top of the same MAC. The set of
51 * destinations to which a multicast packet must be sent is a subset
52 * of all MAC clients defined on top of the MAC. A MAC client can be member
53 * of more than one such subset.
55 * To accomodate these requirements, we introduce broadcast groups.
56 * A broadcast group is associated with a broadcast or multicast
57 * address. The members of a broadcast group consist of the MAC clients
58 * that should received copies of packets sent to the address
59 * associated with the group, and are defined on top of the
62 * The broadcast groups defined on top of a MAC are chained,
63 * hanging off the mac_impl_t. The broadcast group id's are
64 * unique globally (tracked by mac_bcast_id).
68 * The same MAC client may be added for different <addr,vid> tuple,
69 * we maintain a ref count for the number of times it has been added
70 * to account for deleting the MAC client from the group.
72 typedef struct mac_bcast_grp_mcip_s
{
73 mac_client_impl_t
*mgb_client
;
75 } mac_bcast_grp_mcip_t
;
77 typedef struct mac_bcast_grp_s
{ /* Protected by */
78 struct mac_bcast_grp_s
*mbg_next
; /* SL */
79 void *mbg_addr
; /* SL */
80 uint16_t mbg_vid
; /* SL */
81 mac_impl_t
*mbg_mac_impl
; /* WO */
82 mac_addrtype_t mbg_addrtype
; /* WO */
83 flow_entry_t
*mbg_flow_ent
; /* WO */
84 mac_bcast_grp_mcip_t
*mbg_clients
; /* mi_rw_lock */
85 uint_t mbg_nclients
; /* mi_rw_lock */
86 uint_t mbg_nclients_alloc
; /* SL */
87 uint64_t mbg_clients_gen
; /* mi_rw_lock */
88 uint32_t mbg_id
; /* atomic */
91 static kmem_cache_t
*mac_bcast_grp_cache
;
92 static uint32_t mac_bcast_id
= 0;
97 mac_bcast_grp_cache
= kmem_cache_create("mac_bcast_grp_cache",
98 sizeof (mac_bcast_grp_t
), 0, NULL
, NULL
, NULL
, NULL
, NULL
, 0);
104 kmem_cache_destroy(mac_bcast_grp_cache
);
108 mac_bcast_grp_mip(void *grp
)
110 mac_bcast_grp_t
*bcast_grp
= grp
;
112 return (bcast_grp
->mbg_mac_impl
);
116 * Free the specific broadcast group. Invoked when the last reference
117 * to the group is released.
120 mac_bcast_grp_free(void *bcast_grp
)
122 mac_bcast_grp_t
*grp
= bcast_grp
;
123 mac_impl_t
*mip
= grp
->mbg_mac_impl
;
125 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
127 ASSERT(grp
->mbg_addr
!= NULL
);
128 kmem_free(grp
->mbg_addr
, mip
->mi_type
->mt_addr_length
);
129 kmem_free(grp
->mbg_clients
,
130 grp
->mbg_nclients_alloc
* sizeof (mac_bcast_grp_mcip_t
));
131 mip
->mi_bcast_ngrps
--;
132 kmem_cache_free(mac_bcast_grp_cache
, grp
);
136 * arg1: broadcast group
137 * arg2: sender MAC client if it is being sent by a MAC client,
138 * NULL if it was received from the wire.
141 mac_bcast_send(void *arg1
, void *arg2
, mblk_t
*mp_chain
, boolean_t is_loopback
)
143 mac_bcast_grp_t
*grp
= arg1
;
144 mac_client_impl_t
*src_mcip
= arg2
, *dst_mcip
;
145 mac_impl_t
*mip
= grp
->mbg_mac_impl
;
152 rw_enter(&mip
->mi_rw_lock
, RW_READER
);
155 * Pass a copy of the mp chain to every MAC client except the sender
156 * MAC client, if the packet was not received from the underlying NIC.
158 * The broadcast group lock should not be held across calls to
159 * the flow's callback function, since the same group could
160 * potentially be accessed from the same context. When the lock
161 * is reacquired, changes to the broadcast group while the lock
162 * was released are caught using a generation counter incremented
163 * each time the list of MAC clients associated with the broadcast
166 for (i
= 0; i
< grp
->mbg_nclients_alloc
; i
++) {
167 dst_mcip
= grp
->mbg_clients
[i
].mgb_client
;
168 if (dst_mcip
== NULL
)
170 flent
= dst_mcip
->mci_flent
;
171 if (flent
== NULL
|| dst_mcip
== src_mcip
) {
173 * Don't send a copy of the packet back to
180 * It is important to hold a reference on the
183 if ((mp_chain1
= mac_copymsgchain_cksum(mp_chain
)) == NULL
)
186 * Fix the checksum for packets originating
187 * from the local machine.
189 if ((src_mcip
!= NULL
) &&
190 (mp_chain1
= mac_fix_cksum(mp_chain1
)) == NULL
)
193 FLOW_TRY_REFHOLD(flent
, err
);
195 freemsgchain(mp_chain1
);
199 gen
= grp
->mbg_clients_gen
;
201 rw_exit(&mip
->mi_rw_lock
);
203 DTRACE_PROBE4(mac__bcast__send__to
, mac_client_impl_t
*,
204 src_mcip
, flow_fn_t
, dst_mcip
->mci_flent
->fe_cb_fn
,
205 void *, dst_mcip
->mci_flent
->fe_cb_arg1
,
206 void *, dst_mcip
->mci_flent
->fe_cb_arg2
);
208 (dst_mcip
->mci_flent
->fe_cb_fn
)(dst_mcip
->mci_flent
->fe_cb_arg1
,
209 dst_mcip
->mci_flent
->fe_cb_arg2
, mp_chain1
, is_loopback
);
212 rw_enter(&mip
->mi_rw_lock
, RW_READER
);
215 if (grp
->mbg_addrtype
== MAC_ADDRTYPE_MULTICAST
) {
216 MCIP_STAT_UPDATE(dst_mcip
, multircv
, 1);
217 MCIP_STAT_UPDATE(dst_mcip
, multircvbytes
,
220 MCIP_STAT_UPDATE(dst_mcip
, brdcstrcv
, 1);
221 MCIP_STAT_UPDATE(dst_mcip
, brdcstrcvbytes
,
225 if (grp
->mbg_clients_gen
!= gen
) {
227 * The list of MAC clients associated with the group
228 * was changed while the lock was released.
229 * Give up on the current packet.
231 rw_exit(&mip
->mi_rw_lock
);
232 freemsgchain(mp_chain
);
236 rw_exit(&mip
->mi_rw_lock
);
238 if (src_mcip
!= NULL
) {
240 * The packet was sent from one of the MAC clients,
241 * so we need to send a copy of the packet to the
242 * underlying NIC so that it can be sent on the wire.
244 MCIP_STAT_UPDATE(src_mcip
, multixmt
, 1);
245 MCIP_STAT_UPDATE(src_mcip
, multixmtbytes
, msgdsize(mp_chain
));
246 MCIP_STAT_UPDATE(src_mcip
, brdcstxmt
, 1);
247 MCIP_STAT_UPDATE(src_mcip
, brdcstxmtbytes
, msgdsize(mp_chain
));
249 MAC_TX(mip
, mip
->mi_default_tx_ring
, mp_chain
, src_mcip
);
250 if (mp_chain
!= NULL
)
251 freemsgchain(mp_chain
);
253 freemsgchain(mp_chain
);
258 * Add the specified MAC client to the group corresponding to the specified
259 * broadcast or multicast address.
260 * Return 0 on success, or an errno value on failure.
263 mac_bcast_add(mac_client_impl_t
*mcip
, const uint8_t *addr
, uint16_t vid
,
264 mac_addrtype_t addrtype
)
266 mac_impl_t
*mip
= mcip
->mci_mip
;
267 mac_bcast_grp_t
*grp
= NULL
, **last_grp
;
268 size_t addr_len
= mip
->mi_type
->mt_addr_length
;
271 mac_mcast_addrs_t
**prev_mi_addr
= NULL
;
272 mac_mcast_addrs_t
**prev_mci_addr
= NULL
;
274 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
276 ASSERT(addrtype
== MAC_ADDRTYPE_MULTICAST
||
277 addrtype
== MAC_ADDRTYPE_BROADCAST
);
280 * Add the MAC client to the list of MAC clients associated
283 if (addrtype
== MAC_ADDRTYPE_MULTICAST
) {
284 mac_mcast_addrs_t
*maddr
;
287 * In case of a driver (say aggr), we need this information
288 * on a per MAC instance basis.
290 prev_mi_addr
= &mip
->mi_mcast_addrs
;
291 for (maddr
= *prev_mi_addr
; maddr
!= NULL
;
292 prev_mi_addr
= &maddr
->mma_next
, maddr
= maddr
->mma_next
) {
293 if (bcmp(maddr
->mma_addr
, addr
, addr_len
) == 0)
298 * For multicast addresses, have the underlying MAC
299 * join the corresponding multicast group.
301 rc
= mip
->mi_multicst(mip
->mi_driver
, B_TRUE
, addr
);
304 maddr
= kmem_zalloc(sizeof (mac_mcast_addrs_t
),
306 bcopy(addr
, maddr
->mma_addr
, addr_len
);
307 *prev_mi_addr
= maddr
;
314 * We maintain a separate list for each MAC client. Get
315 * the entry or add, if it is not present.
317 prev_mci_addr
= &mcip
->mci_mcast_addrs
;
318 for (maddr
= *prev_mci_addr
; maddr
!= NULL
;
319 prev_mci_addr
= &maddr
->mma_next
, maddr
= maddr
->mma_next
) {
320 if (bcmp(maddr
->mma_addr
, addr
, addr_len
) == 0)
324 maddr
= kmem_zalloc(sizeof (mac_mcast_addrs_t
),
326 bcopy(addr
, maddr
->mma_addr
, addr_len
);
327 *prev_mci_addr
= maddr
;
329 prev_mci_addr
= NULL
;
334 /* The list is protected by the perimeter */
335 last_grp
= &mip
->mi_bcast_grp
;
336 for (grp
= *last_grp
; grp
!= NULL
;
337 last_grp
= &grp
->mbg_next
, grp
= grp
->mbg_next
) {
338 if (bcmp(grp
->mbg_addr
, addr
, addr_len
) == 0 &&
345 * The group does not yet exist, create it.
347 flow_desc_t flow_desc
;
348 char flow_name
[MAXFLOWNAMELEN
];
350 grp
= kmem_cache_alloc(mac_bcast_grp_cache
, KM_SLEEP
);
351 bzero(grp
, sizeof (mac_bcast_grp_t
));
352 grp
->mbg_next
= NULL
;
353 grp
->mbg_mac_impl
= mip
;
355 DTRACE_PROBE1(mac__bcast__add__new__group
, mac_bcast_grp_t
*,
358 grp
->mbg_addr
= kmem_zalloc(addr_len
, KM_SLEEP
);
359 bcopy(addr
, grp
->mbg_addr
, addr_len
);
360 grp
->mbg_addrtype
= addrtype
;
364 * Add a new flow to the underlying MAC.
366 bzero(&flow_desc
, sizeof (flow_desc
));
367 bcopy(addr
, &flow_desc
.fd_dst_mac
, addr_len
);
368 flow_desc
.fd_mac_len
= (uint32_t)addr_len
;
370 flow_desc
.fd_mask
= FLOW_LINK_DST
;
372 flow_desc
.fd_vid
= vid
;
373 flow_desc
.fd_mask
|= FLOW_LINK_VID
;
376 grp
->mbg_id
= atomic_inc_32_nv(&mac_bcast_id
);
377 (void) sprintf(flow_name
,
378 "mac/%s/mcast%d", mip
->mi_name
, grp
->mbg_id
);
380 rc
= mac_flow_create(&flow_desc
, NULL
, flow_name
,
381 grp
, FLOW_MCAST
, &grp
->mbg_flow_ent
);
383 kmem_free(grp
->mbg_addr
, addr_len
);
384 kmem_cache_free(mac_bcast_grp_cache
, grp
);
387 grp
->mbg_flow_ent
->fe_mbg
= grp
;
388 mip
->mi_bcast_ngrps
++;
391 * Initial creation reference on the flow. This is released
392 * in the corresponding delete action i_mac_bcast_delete()
394 FLOW_REFHOLD(grp
->mbg_flow_ent
);
397 * When the multicast and broadcast packet is received
398 * by the underlying NIC, mac_rx_classify() will invoke
399 * mac_bcast_send() with arg2=NULL, which will cause
400 * mac_bcast_send() to send a copy of the packet(s)
401 * to every MAC client opened on top of the underlying MAC.
403 * When the mac_bcast_send() function is invoked from
404 * the transmit path of a MAC client, it will specify the
405 * transmitting MAC client as the arg2 value, which will
406 * allow mac_bcast_send() to skip that MAC client and not
407 * send it a copy of the packet.
409 * We program the classifier to dispatch matching broadcast
410 * packets to mac_bcast_send().
413 grp
->mbg_flow_ent
->fe_cb_fn
= mac_bcast_send
;
414 grp
->mbg_flow_ent
->fe_cb_arg1
= grp
;
415 grp
->mbg_flow_ent
->fe_cb_arg2
= NULL
;
417 rc
= mac_flow_add(mip
->mi_flow_tab
, grp
->mbg_flow_ent
);
419 FLOW_FINAL_REFRELE(grp
->mbg_flow_ent
);
426 ASSERT(grp
->mbg_addrtype
== addrtype
);
429 * Add the MAC client to the list of MAC clients associated
432 rw_enter(&mip
->mi_rw_lock
, RW_WRITER
);
433 for (i
= 0; i
< grp
->mbg_nclients_alloc
; i
++) {
435 * The MAC client was already added, say when we have
436 * different unicast addresses with the same vid.
437 * Just increment the ref and we are done.
439 if (grp
->mbg_clients
[i
].mgb_client
== mcip
) {
440 grp
->mbg_clients
[i
].mgb_client_ref
++;
441 rw_exit(&mip
->mi_rw_lock
);
443 } else if (grp
->mbg_clients
[i
].mgb_client
== NULL
&&
448 if (grp
->mbg_nclients_alloc
== grp
->mbg_nclients
) {
449 mac_bcast_grp_mcip_t
*new_clients
;
450 uint_t new_size
= grp
->mbg_nclients
+1;
452 new_clients
= kmem_zalloc(new_size
*
453 sizeof (mac_bcast_grp_mcip_t
), KM_SLEEP
);
455 if (grp
->mbg_nclients
> 0) {
456 ASSERT(grp
->mbg_clients
!= NULL
);
457 bcopy(grp
->mbg_clients
, new_clients
, grp
->mbg_nclients
*
458 sizeof (mac_bcast_grp_mcip_t
));
459 kmem_free(grp
->mbg_clients
, grp
->mbg_nclients
*
460 sizeof (mac_bcast_grp_mcip_t
));
463 grp
->mbg_clients
= new_clients
;
464 grp
->mbg_nclients_alloc
= new_size
;
465 index
= new_size
- 1;
469 grp
->mbg_clients
[index
].mgb_client
= mcip
;
470 grp
->mbg_clients
[index
].mgb_client_ref
= 1;
473 * Since we're adding to the list of MAC clients using that group,
474 * kick the generation count, which will allow mac_bcast_send()
475 * to detect that condition after re-acquiring the lock.
477 grp
->mbg_clients_gen
++;
478 rw_exit(&mip
->mi_rw_lock
);
482 if (prev_mi_addr
!= NULL
) {
483 kmem_free(*prev_mi_addr
, sizeof (mac_mcast_addrs_t
));
484 *prev_mi_addr
= NULL
;
485 (void) mip
->mi_multicst(mip
->mi_driver
, B_FALSE
, addr
);
487 if (prev_mci_addr
!= NULL
) {
488 kmem_free(*prev_mci_addr
, sizeof (mac_mcast_addrs_t
));
489 *prev_mci_addr
= NULL
;
495 * Remove the specified MAC client from the group corresponding to
496 * the specific broadcast or multicast address.
498 * Note: mac_bcast_delete() calls mac_remove_flow() which
499 * will call cv_wait for fe_refcnt to drop to 0. So this function
500 * should not be called from interrupt or STREAMS context.
503 mac_bcast_delete(mac_client_impl_t
*mcip
, const uint8_t *addr
, uint16_t vid
)
505 mac_impl_t
*mip
= mcip
->mci_mip
;
506 mac_bcast_grp_t
*grp
= NULL
, **prev
;
507 size_t addr_len
= mip
->mi_type
->mt_addr_length
;
510 mac_mcast_addrs_t
*maddr
= NULL
;
511 mac_mcast_addrs_t
**mprev
;
513 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
515 /* find the broadcast group. The list is protected by the perimeter */
516 prev
= &mip
->mi_bcast_grp
;
517 for (grp
= mip
->mi_bcast_grp
; grp
!= NULL
; prev
= &grp
->mbg_next
,
518 grp
= grp
->mbg_next
) {
519 if (bcmp(grp
->mbg_addr
, addr
, addr_len
) == 0 &&
526 * Remove the MAC client from the list of MAC clients associated
527 * with that broadcast group.
529 * We mark the mbg_clients[] location corresponding to the removed MAC
530 * client NULL and reuse that location when we add a new MAC client.
533 rw_enter(&mip
->mi_rw_lock
, RW_WRITER
);
535 for (i
= 0; i
< grp
->mbg_nclients_alloc
; i
++) {
536 if (grp
->mbg_clients
[i
].mgb_client
== mcip
)
540 ASSERT(i
< grp
->mbg_nclients_alloc
);
542 * If there are more references to this MAC client, then we let
543 * it remain till it goes to 0.
545 if (--grp
->mbg_clients
[i
].mgb_client_ref
> 0)
548 grp
->mbg_clients
[i
].mgb_client
= NULL
;
549 grp
->mbg_clients
[i
].mgb_client_ref
= 0;
552 * Since we're removing from the list of MAC clients using that group,
553 * kick the generation count, which will allow mac_bcast_send()
554 * to detect that condition.
556 grp
->mbg_clients_gen
++;
558 if (--grp
->mbg_nclients
== 0) {
560 * The last MAC client of the group was just removed.
561 * Unlink the current group from the list of groups
562 * defined on top of the underlying NIC. The group
563 * structure will stay around until the last reference
566 *prev
= grp
->mbg_next
;
569 rw_exit(&mip
->mi_rw_lock
);
571 if (grp
->mbg_addrtype
== MAC_ADDRTYPE_MULTICAST
) {
572 mprev
= &mcip
->mci_mcast_addrs
;
573 for (maddr
= mcip
->mci_mcast_addrs
; maddr
!= NULL
;
574 mprev
= &maddr
->mma_next
, maddr
= maddr
->mma_next
) {
575 if (bcmp(grp
->mbg_addr
, maddr
->mma_addr
,
576 mip
->mi_type
->mt_addr_length
) == 0)
579 ASSERT(maddr
!= NULL
);
580 if (--maddr
->mma_ref
== 0) {
581 *mprev
= maddr
->mma_next
;
582 maddr
->mma_next
= NULL
;
583 kmem_free(maddr
, sizeof (mac_mcast_addrs_t
));
586 mprev
= &mip
->mi_mcast_addrs
;
587 for (maddr
= mip
->mi_mcast_addrs
; maddr
!= NULL
;
588 mprev
= &maddr
->mma_next
, maddr
= maddr
->mma_next
) {
589 if (bcmp(grp
->mbg_addr
, maddr
->mma_addr
,
590 mip
->mi_type
->mt_addr_length
) == 0)
593 ASSERT(maddr
!= NULL
);
594 if (--maddr
->mma_ref
== 0) {
595 (void) mip
->mi_multicst(mip
->mi_driver
, B_FALSE
, addr
);
596 *mprev
= maddr
->mma_next
;
597 maddr
->mma_next
= NULL
;
598 kmem_free(maddr
, sizeof (mac_mcast_addrs_t
));
603 * If the group itself is being removed, remove the
604 * corresponding flow from the underlying NIC.
606 flent
= grp
->mbg_flow_ent
;
607 if (grp
->mbg_nclients
== 0) {
608 mac_flow_remove(mip
->mi_flow_tab
, flent
, B_FALSE
);
609 mac_flow_wait(flent
, FLOW_DRIVER_UPCALL
);
610 FLOW_FINAL_REFRELE(flent
);
615 * This will be called by a driver, such as aggr, when a port is added/removed
616 * to add/remove the port to/from all the multcast addresses for that aggr.
619 mac_bcast_refresh(mac_impl_t
*mip
, mac_multicst_t refresh_fn
, void *arg
,
622 mac_mcast_addrs_t
*grp
, *next
;
624 ASSERT(refresh_fn
!= NULL
);
626 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
629 * Walk the multicast address list and call the refresh function for
633 for (grp
= mip
->mi_mcast_addrs
; grp
!= NULL
; grp
= next
) {
635 * Save the next pointer just in case the refresh
636 * function's action causes the group entry to be
638 * We won't be adding to this list as part of the
641 next
= grp
->mma_next
;
642 refresh_fn(arg
, add
, grp
->mma_addr
);
647 * Walk the MAC client's multicast address list and add/remove the addr/vid
648 * ('arg' is 'flent') to all the addresses.
651 mac_client_bcast_refresh(mac_client_impl_t
*mcip
, mac_multicst_t refresh_fn
,
652 void *arg
, boolean_t add
)
654 mac_mcast_addrs_t
*grp
, *next
;
655 mac_impl_t
*mip
= mcip
->mci_mip
;
657 ASSERT(refresh_fn
!= NULL
);
659 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
661 * Walk the multicast address list and call the refresh function for
663 * Broadcast addresses are not added or removed through the multicast
664 * entry points, so don't include them as part of the refresh.
666 for (grp
= mcip
->mci_mcast_addrs
; grp
!= NULL
; grp
= next
) {
668 * Save the next pointer just in case the refresh
669 * function's action causes the group entry to be
671 * We won't be adding to this list as part of the
674 next
= grp
->mma_next
;
675 refresh_fn(arg
, add
, grp
->mma_addr
);