4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
29 #include <inet/common.h>
30 #include <sys/strsubr.h>
31 #include <sys/stropts.h>
32 #include <sys/strsun.h>
34 #include <sys/dld_impl.h>
35 #include <sys/cpuvar.h>
36 #include <sys/callb.h>
38 #include <sys/mac_client.h>
39 #include <sys/mac_client_priv.h>
40 #include <sys/mac_flow.h>
42 static int str_constructor(void *, void *, int);
43 static void str_destructor(void *, void *);
44 static mblk_t
*str_unitdata_ind(dld_str_t
*, mblk_t
*, boolean_t
);
45 static void str_notify_promisc_on_phys(dld_str_t
*);
46 static void str_notify_promisc_off_phys(dld_str_t
*);
47 static void str_notify_phys_addr(dld_str_t
*, uint_t
, const uint8_t *);
48 static void str_notify_link_up(dld_str_t
*);
49 static void str_notify_link_down(dld_str_t
*);
50 static void str_notify_capab_reneg(dld_str_t
*);
51 static void str_notify_speed(dld_str_t
*, uint32_t);
53 static void ioc_native(dld_str_t
*, mblk_t
*);
54 static void ioc_margin(dld_str_t
*, mblk_t
*);
55 static void ioc_raw(dld_str_t
*, mblk_t
*);
56 static void ioc_fast(dld_str_t
*, mblk_t
*);
57 static void ioc_lowlink(dld_str_t
*, mblk_t
*);
58 static void ioc(dld_str_t
*, mblk_t
*);
59 static void dld_ioc(dld_str_t
*, mblk_t
*);
60 static void dld_wput_nondata(dld_str_t
*, mblk_t
*);
62 static void str_mdata_raw_put(dld_str_t
*, mblk_t
*);
63 static mblk_t
*i_dld_ether_header_update_tag(mblk_t
*, uint_t
, uint16_t,
65 static mblk_t
*i_dld_ether_header_strip_tag(mblk_t
*, boolean_t
);
67 static uint32_t str_count
;
68 static kmem_cache_t
*str_cachep
;
69 static mod_hash_t
*str_hashp
;
72 #define STR_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key))
74 #define dld_taskq system_taskq
76 static kmutex_t dld_taskq_lock
;
77 static kcondvar_t dld_taskq_cv
;
78 static list_t dld_taskq_list
; /* List of dld_str_t */
79 boolean_t dld_taskq_quit
;
80 boolean_t dld_taskq_done
;
82 static void dld_taskq_dispatch(void);
85 * Some notes on entry points, flow-control, queueing.
87 * This driver exports the traditional STREAMS put entry point as well as
88 * the non-STREAMS fast-path transmit routine which is provided to IP via
89 * the DL_CAPAB_POLL negotiation. The put procedure handles all control
90 * and data operations, while the fast-path routine deals only with M_DATA
91 * fast-path packets. Regardless of the entry point, all outbound packets
92 * will end up in DLD_TX(), where they will be delivered to the MAC layer.
94 * The transmit logic operates in the following way: All packets coming
95 * into DLD will be sent to the MAC layer through DLD_TX(). Flow-control
96 * happens when the MAC layer indicates the packets couldn't be
97 * transmitted due to 1) lack of resources (e.g. running out of
98 * descriptors), or 2) reaching the allowed bandwidth limit for this
99 * particular flow. The indication comes in the form of a Tx cookie that
100 * identifies the blocked ring. In such case, DLD will place a
101 * dummy message on its write-side STREAMS queue so that the queue is
102 * marked as "full". Any subsequent packets arriving at the driver will
103 * still be sent to the MAC layer where it either gets queued in the Tx
104 * SRS or discarded it if queue limit is exceeded. The write-side STREAMS
105 * queue gets enabled when MAC layer notifies DLD through MAC_NOTE_TX.
106 * When the write service procedure runs, it will remove the dummy
107 * message from the write-side STREAMS queue; in effect this will trigger
108 * backenabling. The sizes of q_hiwat and q_lowat are set to 1 and 0,
109 * respectively, due to the above reasons.
111 * All non-data operations, both DLPI and ioctls are single threaded on a per
112 * dld_str_t endpoint. This is done using a taskq so that the control operation
113 * has kernel context and can cv_wait for resources. In addition all set type
114 * operations that involve mac level state modification are serialized on a
115 * per mac end point using the perimeter mechanism provided by the mac layer.
116 * This serializes all mac clients trying to modify a single mac end point over
117 * the entire sequence of mac calls made by that client as an atomic unit. The
118 * mac framework locking is described in mac.c. A critical element is that
119 * DLD/DLS does not hold any locks across the mac perimeter.
121 * dld_finddevinfo() returns the dev_info_t * corresponding to a particular
122 * dev_t. It searches str_hashp (a table of dld_str_t's) for streams that
123 * match dev_t. If a stream is found and it is attached, its dev_info_t *
124 * is returned. If the mac handle is non-null, it can be safely accessed
125 * below. The mac handle won't be freed until the mac_unregister which
126 * won't happen until the driver detaches. The DDI framework ensures that
127 * the detach won't happen while a getinfo is in progress.
129 typedef struct i_dld_str_state_s
{
138 i_dld_str_walker(mod_hash_key_t key
, mod_hash_val_t
*val
, void *arg
)
140 i_dld_str_state_t
*statep
= arg
;
141 dld_str_t
*dsp
= (dld_str_t
*)val
;
144 if (statep
->ds_major
!= dsp
->ds_major
)
145 return (MH_WALK_CONTINUE
);
147 ASSERT(statep
->ds_minor
!= 0);
150 if (statep
->ds_minor
== dsp
->ds_minor
) {
152 * Clone: a clone minor is unique. we can terminate the
153 * walk if we find a matching stream -- even if we fail
154 * to obtain the devinfo.
157 statep
->ds_dip
= mac_devinfo_get(mh
);
158 statep
->ds_instance
= DLS_MINOR2INST(mac_minor(mh
));
160 return (MH_WALK_TERMINATE
);
162 return (MH_WALK_CONTINUE
);
166 dld_finddevinfo(dev_t dev
)
169 i_dld_str_state_t state
;
171 if (getminor(dev
) == 0)
175 * See if it's a minor node of a link
177 if ((dip
= dls_link_devinfo(dev
)) != NULL
)
180 state
.ds_minor
= getminor(dev
);
181 state
.ds_major
= getmajor(dev
);
183 state
.ds_instance
= -1;
185 mod_hash_walk(str_hashp
, i_dld_str_walker
, &state
);
186 return (state
.ds_dip
);
190 dld_devt_to_instance(dev_t dev
)
193 i_dld_str_state_t state
;
196 * GLDv3 numbers DLPI style 1 node as the instance number + 1.
197 * Minor number 0 is reserved for the DLPI style 2 unattached
201 if ((minor
= getminor(dev
)) == 0)
205 * Check for unopened style 1 node.
206 * Note that this doesn't *necessarily* work for legacy
207 * devices, but this code is only called within the
208 * getinfo(9e) implementation for true GLDv3 devices, so it
211 if (minor
> 0 && minor
<= DLS_MAX_MINOR
) {
212 return (DLS_MINOR2INST(minor
));
215 state
.ds_minor
= getminor(dev
);
216 state
.ds_major
= getmajor(dev
);
218 state
.ds_instance
= -1;
220 mod_hash_walk(str_hashp
, i_dld_str_walker
, &state
);
221 return (state
.ds_instance
);
225 * devo_getinfo: getinfo(9e)
227 * NB: This may be called for a provider before the provider's
228 * instances are attached. Hence, if a particular provider needs a
229 * special mapping (the mac instance != ddi_get_instance()), then it
230 * may need to provide its own implmentation using the
231 * mac_devt_to_instance() function, and translating the returned mac
232 * instance to a devinfo instance. For dev_t's where the minor number
233 * is too large (i.e. > MAC_MAX_MINOR), the provider can call this
234 * function indirectly via the mac_getinfo() function.
238 dld_getinfo(dev_info_t
*dip
, ddi_info_cmd_t cmd
, void *arg
, void **resp
)
241 minor_t minor
= getminor((dev_t
)arg
);
242 int rc
= DDI_FAILURE
;
245 case DDI_INFO_DEVT2DEVINFO
:
246 if ((devinfo
= dld_finddevinfo((dev_t
)arg
)) != NULL
) {
247 *(dev_info_t
**)resp
= devinfo
;
251 case DDI_INFO_DEVT2INSTANCE
:
252 if (minor
> 0 && minor
<= DLS_MAX_MINOR
) {
253 *resp
= (void *)(uintptr_t)DLS_MINOR2INST(minor
);
255 } else if (minor
> DLS_MAX_MINOR
&&
256 (devinfo
= dld_finddevinfo((dev_t
)arg
)) != NULL
) {
257 *resp
= (void *)(uintptr_t)ddi_get_instance(devinfo
);
266 dld_str_private(queue_t
*q
)
268 return (((dld_str_t
*)(q
->q_ptr
))->ds_private
);
272 dld_str_open(queue_t
*rq
, dev_t
*devp
, void *private)
279 major
= getmajor(*devp
);
280 minor
= getminor(*devp
);
283 * Create a new dld_str_t for the stream. This will grab a new minor
284 * number that will be handed back in the cloned dev_t. Creation may
285 * fail if we can't allocate the dummy mblk used for flow-control.
287 dsp
= dld_str_create(rq
, DLD_DLPI
, major
,
288 ((minor
== 0) ? DL_STYLE2
: DL_STYLE1
));
292 ASSERT(dsp
->ds_dlstate
== DL_UNATTACHED
);
293 dsp
->ds_private
= private;
298 if ((err
= dld_str_attach(dsp
, (t_uscalar_t
)minor
- 1)) != 0)
301 ASSERT(dsp
->ds_dlstate
== DL_UNBOUND
);
303 (void) qassociate(rq
, -1);
307 * Enable the queue srv(9e) routine.
312 * Construct a cloned dev_t to hand back.
314 *devp
= makedevice(getmajor(*devp
), dsp
->ds_minor
);
318 dld_str_destroy(dsp
);
323 dld_str_close(queue_t
*rq
)
325 dld_str_t
*dsp
= rq
->q_ptr
;
328 * All modules on top have been popped off. So there can't be any
329 * threads from the top.
331 ASSERT(dsp
->ds_datathr_cnt
== 0);
334 * Wait until pending DLPI requests are processed.
336 mutex_enter(&dsp
->ds_lock
);
337 while (dsp
->ds_dlpi_pending
)
338 cv_wait(&dsp
->ds_dlpi_pending_cv
, &dsp
->ds_lock
);
339 mutex_exit(&dsp
->ds_lock
);
343 * This stream was open to a provider node. Check to see
344 * if it has been cleanly shut down.
346 if (dsp
->ds_dlstate
!= DL_UNATTACHED
) {
348 * The stream is either open to a style 1 provider or
349 * this is not clean shutdown. Detach from the PPA.
350 * (This is still ok even in the style 1 case).
355 dld_str_destroy(dsp
);
364 dld_open(queue_t
*rq
, dev_t
*devp
, int flag
, int sflag
, cred_t
*credp
)
366 if (sflag
== MODOPEN
)
370 * This is a cloning driver and therefore each queue should only
371 * ever get opened once.
373 if (rq
->q_ptr
!= NULL
)
376 return (dld_str_open(rq
, devp
, NULL
));
380 * qi_qclose: close(9e)
384 dld_close(queue_t
*rq
, int flags __unused
, cred_t
*credp __unused
)
387 * Disable the queue srv(9e) routine.
391 return (dld_str_close(rq
));
398 dld_wput(queue_t
*wq
, mblk_t
*mp
)
400 dld_str_t
*dsp
= (dld_str_t
*)wq
->q_ptr
;
403 switch (DB_TYPE(mp
)) {
405 mutex_enter(&dsp
->ds_lock
);
407 if ((dsp
->ds_dlstate
!= DL_IDLE
) ||
408 (mode
!= DLD_FASTPATH
&& mode
!= DLD_RAW
)) {
409 mutex_exit(&dsp
->ds_lock
);
414 DLD_DATATHR_INC(dsp
);
415 mutex_exit(&dsp
->ds_lock
);
416 if (mode
== DLD_FASTPATH
) {
417 if (dsp
->ds_mip
->mi_media
== DL_ETHER
&&
418 (MBLKL(mp
) < sizeof (struct ether_header
))) {
421 (void) str_mdata_fastpath_put(dsp
, mp
, 0, 0);
424 str_mdata_raw_put(dsp
, mp
);
426 DLD_DATATHR_DCR(dsp
);
432 if (MBLKL(mp
) < sizeof (t_uscalar_t
))
435 prim
= ((union DL_primitives
*)mp
->b_rptr
)->dl_primitive
;
437 if (prim
== DL_UNITDATA_REQ
) {
438 proto_unitdata_req(dsp
, mp
);
440 dld_wput_nondata(dsp
, mp
);
446 dld_wput_nondata(dsp
, mp
);
450 if (*mp
->b_rptr
& FLUSHW
) {
452 *mp
->b_rptr
&= ~FLUSHW
;
455 if (*mp
->b_rptr
& FLUSHR
) {
472 dld_wsrv(queue_t
*wq
)
474 dld_str_t
*dsp
= wq
->q_ptr
;
480 dld_init_ops(struct dev_ops
*ops
, const char *name
)
482 struct streamtab
*stream
;
483 struct qinit
*rq
, *wq
;
484 struct module_info
*modinfo
;
486 modinfo
= kmem_zalloc(sizeof (struct module_info
), KM_SLEEP
);
487 modinfo
->mi_idname
= kmem_zalloc(FMNAMESZ
, KM_SLEEP
);
488 (void) snprintf(modinfo
->mi_idname
, FMNAMESZ
, "%s", name
);
489 modinfo
->mi_minpsz
= 0;
490 modinfo
->mi_maxpsz
= 64*1024;
491 modinfo
->mi_hiwat
= 1;
492 modinfo
->mi_lowat
= 0;
494 rq
= kmem_zalloc(sizeof (struct qinit
), KM_SLEEP
);
495 rq
->qi_qopen
= dld_open
;
496 rq
->qi_qclose
= dld_close
;
497 rq
->qi_minfo
= modinfo
;
499 wq
= kmem_zalloc(sizeof (struct qinit
), KM_SLEEP
);
500 wq
->qi_putp
= (pfi_t
)dld_wput
;
501 wq
->qi_srvp
= (pfi_t
)dld_wsrv
;
502 wq
->qi_minfo
= modinfo
;
504 stream
= kmem_zalloc(sizeof (struct streamtab
), KM_SLEEP
);
505 stream
->st_rdinit
= rq
;
506 stream
->st_wrinit
= wq
;
507 ops
->devo_cb_ops
->cb_str
= stream
;
509 if (ops
->devo_getinfo
== NULL
)
510 ops
->devo_getinfo
= &dld_getinfo
;
514 dld_fini_ops(struct dev_ops
*ops
)
516 struct streamtab
*stream
;
517 struct qinit
*rq
, *wq
;
518 struct module_info
*modinfo
;
520 stream
= ops
->devo_cb_ops
->cb_str
;
521 rq
= stream
->st_rdinit
;
522 wq
= stream
->st_wrinit
;
523 modinfo
= rq
->qi_minfo
;
524 ASSERT(wq
->qi_minfo
== modinfo
);
526 kmem_free(stream
, sizeof (struct streamtab
));
527 kmem_free(wq
, sizeof (struct qinit
));
528 kmem_free(rq
, sizeof (struct qinit
));
529 kmem_free(modinfo
->mi_idname
, FMNAMESZ
);
530 kmem_free(modinfo
, sizeof (struct module_info
));
534 * Initialize this module's data structures.
540 * Create dld_str_t object cache.
542 str_cachep
= kmem_cache_create("dld_str_cache", sizeof (dld_str_t
),
543 0, str_constructor
, str_destructor
, NULL
, NULL
, NULL
, 0);
544 ASSERT(str_cachep
!= NULL
);
547 * Create a hash table for maintaining dld_str_t's.
548 * The ds_minor field (the clone minor number) of a dld_str_t
549 * is used as a key for this hash table because this number is
550 * globally unique (allocated from "dls_minor_arena").
552 str_hashp
= mod_hash_create_idhash("dld_str_hash", STR_HASHSZ
,
553 mod_hash_null_valdtor
);
555 mutex_init(&dld_taskq_lock
, NULL
, MUTEX_DRIVER
, NULL
);
556 cv_init(&dld_taskq_cv
, NULL
, CV_DRIVER
, NULL
);
558 dld_taskq_quit
= B_FALSE
;
559 dld_taskq_done
= B_FALSE
;
560 list_create(&dld_taskq_list
, sizeof (dld_str_t
),
561 offsetof(dld_str_t
, ds_tqlist
));
562 (void) thread_create(NULL
, 0, dld_taskq_dispatch
, NULL
, 0,
563 &p0
, TS_RUN
, minclsyspri
);
567 * Tear down this module's data structures.
573 * Make sure that there are no objects in use.
579 * Ask the dld_taskq thread to quit and wait for it to be done
581 mutex_enter(&dld_taskq_lock
);
582 dld_taskq_quit
= B_TRUE
;
583 cv_signal(&dld_taskq_cv
);
584 while (!dld_taskq_done
)
585 cv_wait(&dld_taskq_cv
, &dld_taskq_lock
);
586 mutex_exit(&dld_taskq_lock
);
587 list_destroy(&dld_taskq_list
);
589 * Destroy object cache.
591 kmem_cache_destroy(str_cachep
);
592 mod_hash_destroy_idhash(str_hashp
);
597 * Create a new dld_str_t object.
600 dld_str_create(queue_t
*rq
, uint_t type
, major_t major
, t_uscalar_t style
)
606 * Allocate an object from the cache.
608 atomic_inc_32(&str_count
);
609 dsp
= kmem_cache_alloc(str_cachep
, KM_SLEEP
);
612 * Allocate the dummy mblk for flow-control.
614 dsp
->ds_tx_flow_mp
= allocb(1, BPRI_HI
);
615 if (dsp
->ds_tx_flow_mp
== NULL
) {
616 kmem_cache_free(str_cachep
, dsp
);
617 atomic_dec_32(&str_count
);
621 dsp
->ds_major
= major
;
622 dsp
->ds_style
= style
;
625 * Initialize the queue pointers.
627 ASSERT(RD(rq
) == rq
);
630 rq
->q_ptr
= WR(rq
)->q_ptr
= (void *)dsp
;
633 * We want explicit control over our write-side STREAMS queue
634 * where the dummy mblk gets added/removed for flow-control.
638 err
= mod_hash_insert(str_hashp
, STR_HASH_KEY(dsp
->ds_minor
),
639 (mod_hash_val_t
)dsp
);
645 * Destroy a dld_str_t object.
648 dld_str_destroy(dld_str_t
*dsp
)
655 * Clear the queue pointers.
659 ASSERT(wq
== WR(rq
));
660 rq
->q_ptr
= wq
->q_ptr
= NULL
;
661 dsp
->ds_rq
= dsp
->ds_wq
= NULL
;
663 ASSERT(dsp
->ds_dlstate
== DL_UNATTACHED
);
664 ASSERT(dsp
->ds_sap
== 0);
665 ASSERT(dsp
->ds_mh
== NULL
);
666 ASSERT(dsp
->ds_mch
== NULL
);
667 ASSERT(dsp
->ds_promisc
== 0);
668 ASSERT(dsp
->ds_mph
== NULL
);
669 ASSERT(dsp
->ds_mip
== NULL
);
670 ASSERT(dsp
->ds_mnh
== NULL
);
672 ASSERT(dsp
->ds_polling
== B_FALSE
);
673 ASSERT(dsp
->ds_direct
== B_FALSE
);
674 ASSERT(dsp
->ds_lso
== B_FALSE
);
675 ASSERT(dsp
->ds_lso_max
== 0);
676 ASSERT(dsp
->ds_passivestate
!= DLD_ACTIVE
);
679 * Reinitialize all the flags.
681 dsp
->ds_notifications
= 0;
682 dsp
->ds_passivestate
= DLD_UNINITIALIZED
;
683 dsp
->ds_mode
= DLD_UNITDATA
;
684 dsp
->ds_native
= B_FALSE
;
685 dsp
->ds_nonip
= B_FALSE
;
687 ASSERT(dsp
->ds_datathr_cnt
== 0);
688 ASSERT(dsp
->ds_pending_head
== NULL
);
689 ASSERT(dsp
->ds_pending_tail
== NULL
);
690 ASSERT(!dsp
->ds_dlpi_pending
);
692 ASSERT(dsp
->ds_dlp
== NULL
);
693 ASSERT(dsp
->ds_dmap
== NULL
);
694 ASSERT(dsp
->ds_rx
== NULL
);
695 ASSERT(dsp
->ds_rx_arg
== NULL
);
696 ASSERT(dsp
->ds_next
== NULL
);
697 ASSERT(dsp
->ds_head
== NULL
);
700 * Free the dummy mblk if exists.
702 if (dsp
->ds_tx_flow_mp
!= NULL
) {
703 freeb(dsp
->ds_tx_flow_mp
);
704 dsp
->ds_tx_flow_mp
= NULL
;
707 (void) mod_hash_remove(str_hashp
, STR_HASH_KEY(dsp
->ds_minor
), &val
);
708 ASSERT(dsp
== (dld_str_t
*)val
);
711 * Free the object back to the cache.
713 kmem_cache_free(str_cachep
, dsp
);
714 atomic_dec_32(&str_count
);
718 * kmem_cache contructor function: see kmem_cache_create(9f).
722 str_constructor(void *buf
, void *cdrarg
, int kmflags
)
724 dld_str_t
*dsp
= buf
;
726 bzero(buf
, sizeof (dld_str_t
));
729 * Allocate a new minor number.
731 if ((dsp
->ds_minor
= mac_minor_hold(kmflags
== KM_SLEEP
)) == 0)
735 * Initialize the DLPI state machine.
737 dsp
->ds_dlstate
= DL_UNATTACHED
;
739 mutex_init(&dsp
->ds_lock
, NULL
, MUTEX_DRIVER
, NULL
);
740 cv_init(&dsp
->ds_datathr_cv
, NULL
, CV_DRIVER
, NULL
);
741 cv_init(&dsp
->ds_dlpi_pending_cv
, NULL
, CV_DRIVER
, NULL
);
747 * kmem_cache destructor function.
751 str_destructor(void *buf
, void *cdrarg
)
753 dld_str_t
*dsp
= buf
;
756 * Release the minor number.
758 mac_minor_rele(dsp
->ds_minor
);
760 ASSERT(dsp
->ds_tx_flow_mp
== NULL
);
762 mutex_destroy(&dsp
->ds_lock
);
763 cv_destroy(&dsp
->ds_datathr_cv
);
764 cv_destroy(&dsp
->ds_dlpi_pending_cv
);
768 * Update the priority bits and VID (may need to insert tag if mp points
769 * to an untagged packet.
770 * If vid is VLAN_ID_NONE, use the VID encoded in the packet.
773 i_dld_ether_header_update_tag(mblk_t
*mp
, uint_t pri
, uint16_t vid
,
774 link_tagmode_t tagmode
)
777 struct ether_vlan_header
*evhp
;
778 struct ether_header
*ehp
;
779 uint16_t old_tci
= 0;
782 ASSERT(pri
!= 0 || vid
!= VLAN_ID_NONE
);
784 evhp
= (struct ether_vlan_header
*)mp
->b_rptr
;
785 if (ntohs(evhp
->ether_tpid
) == ETHERTYPE_VLAN
) {
787 * Tagged packet, update the priority bits.
789 len
= sizeof (struct ether_vlan_header
);
791 if ((DB_REF(mp
) > 1) || (MBLKL(mp
) < len
)) {
793 * In case some drivers only check the db_ref
794 * count of the first mblk, we pullup the
795 * message into a single mblk.
797 hmp
= msgpullup(mp
, -1);
798 if ((hmp
== NULL
) || (MBLKL(hmp
) < len
)) {
807 evhp
= (struct ether_vlan_header
*)mp
->b_rptr
;
808 old_tci
= ntohs(evhp
->ether_tci
);
811 * Untagged packet. Two factors will cause us to insert a
813 * - This is a VLAN link (vid is specified)
814 * - The link supports user priority tagging and the priority
817 if (vid
== VLAN_ID_NONE
&& tagmode
== LINK_TAGMODE_VLANONLY
)
820 hmp
= allocb(sizeof (struct ether_vlan_header
), BPRI_MED
);
824 evhp
= (struct ether_vlan_header
*)hmp
->b_rptr
;
825 ehp
= (struct ether_header
*)mp
->b_rptr
;
828 * Copy the MAC addresses and typelen
830 bcopy(ehp
, evhp
, (ETHERADDRL
* 2));
831 evhp
->ether_type
= ehp
->ether_type
;
832 evhp
->ether_tpid
= htons(ETHERTYPE_VLAN
);
834 hmp
->b_wptr
+= sizeof (struct ether_vlan_header
);
835 mp
->b_rptr
+= sizeof (struct ether_header
);
838 * Free the original message if it's now empty. Link the
839 * rest of the messages to the header message.
841 if (MBLKL(mp
) == 0) {
842 hmp
->b_cont
= mp
->b_cont
;
851 pri
= VLAN_PRI(old_tci
);
852 if (vid
== VLAN_ID_NONE
)
853 vid
= VLAN_ID(old_tci
);
854 evhp
->ether_tci
= htons(VLAN_TCI(pri
, VLAN_CFI(old_tci
), vid
));
859 * M_DATA put (IP fast-path mode)
862 str_mdata_fastpath_put(dld_str_t
*dsp
, mblk_t
*mp
, uintptr_t f_hint
,
865 boolean_t is_ethernet
= (dsp
->ds_mip
->mi_media
== DL_ETHER
);
868 mac_tx_cookie_t cookie
;
872 * Update the priority bits to the assigned priority.
874 pri
= (VLAN_MBLKPRI(mp
) == 0) ? dsp
->ds_pri
: VLAN_MBLKPRI(mp
);
877 newmp
= i_dld_ether_header_update_tag(mp
, pri
,
878 VLAN_ID_NONE
, dsp
->ds_dlp
->dl_tagmode
);
885 if ((cookie
= DLD_TX(dsp
, mp
, f_hint
, flag
)) != (uintptr_t)NULL
) {
891 /* TODO: bump kstat? */
893 return ((uintptr_t)NULL
);
897 * M_DATA put (DLIOCRAW mode)
900 str_mdata_raw_put(dld_str_t
*dsp
, mblk_t
*mp
)
902 boolean_t is_ethernet
= (dsp
->ds_mip
->mi_media
== DL_ETHER
);
905 mac_header_info_t mhi
;
906 uint_t pri
, vid
, dvid
;
910 * Certain MAC type plugins provide an illusion for raw DLPI
911 * consumers. They pretend that the MAC layer is something that
912 * it's not for the benefit of observability tools. For example,
913 * mac_wifi pretends that it's Ethernet for such consumers.
914 * Here, unless native mode is enabled, we call into the MAC layer so
915 * that this illusion can be maintained. The plugin will optionally
916 * transform the MAC header here into something that can be passed
917 * down. The header goes from raw mode to "cooked" mode.
919 if (!dsp
->ds_native
) {
920 if ((newmp
= mac_header_cook(dsp
->ds_mh
, mp
)) == NULL
)
928 * Check the packet is not too big and that any remaining
929 * fragment list is composed entirely of M_DATA messages. (We
930 * know the first fragment was M_DATA otherwise we could not
933 for (bp
= mp
->b_cont
; bp
!= NULL
; bp
= bp
->b_cont
) {
934 if (DB_TYPE(bp
) != M_DATA
)
939 if (mac_vlan_header_info(dsp
->ds_mh
, mp
, &mhi
) != 0)
942 mac_sdu_get(dsp
->ds_mh
, NULL
, &max_sdu
);
944 * If LSO is enabled, check the size against lso_max. Otherwise,
945 * compare the packet size with max_sdu.
947 max_sdu
= dsp
->ds_lso
? dsp
->ds_lso_max
: max_sdu
;
948 if (size
> max_sdu
+ mhi
.mhi_hdrsize
)
952 dvid
= mac_client_vid(dsp
->ds_mch
);
955 * Discard the packet if this is a VLAN stream but the VID in
956 * the packet is not correct.
958 vid
= VLAN_ID(mhi
.mhi_tci
);
959 if ((dvid
!= VLAN_ID_NONE
) && (vid
!= VLAN_ID_NONE
))
963 * Discard the packet if this packet is a tagged packet
964 * but both pri and VID are 0.
966 pri
= VLAN_PRI(mhi
.mhi_tci
);
967 if (mhi
.mhi_istagged
&& !mhi
.mhi_ispvid
&& pri
== 0 &&
972 * Update the priority bits to the per-stream priority if
973 * priority is not set in the packet. Update the VID for
974 * packets on a VLAN stream.
976 pri
= (pri
== 0) ? dsp
->ds_pri
: 0;
977 if ((pri
!= 0) || (dvid
!= VLAN_ID_NONE
)) {
978 if ((newmp
= i_dld_ether_header_update_tag(mp
, pri
,
979 dvid
, dsp
->ds_dlp
->dl_tagmode
)) == NULL
) {
986 if (DLD_TX(dsp
, mp
, 0, 0) != (uintptr_t)NULL
) {
987 /* Turn on flow-control for dld */
993 /* TODO: bump kstat? */
998 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1).
1001 dld_str_attach(dld_str_t
*dsp
, t_uscalar_t ppa
)
1005 const char *drvname
;
1006 mac_perim_handle_t mph
= NULL
;
1007 boolean_t qassociated
= B_FALSE
;
1008 dls_link_t
*dlp
= NULL
;
1009 dls_dl_handle_t ddp
= NULL
;
1011 if ((drvname
= ddi_major_to_name(dsp
->ds_major
)) == NULL
)
1014 if (dsp
->ds_style
== DL_STYLE2
&& ppa
> DLS_MAX_PPA
)
1018 * /dev node access. This will still be supported for backward
1019 * compatibility reason.
1021 if ((dsp
->ds_style
== DL_STYLE2
) && (strcmp(drvname
, "aggr") != 0) &&
1022 (strcmp(drvname
, "vnic") != 0)) {
1023 if (qassociate(dsp
->ds_wq
, DLS_PPA2INST(ppa
)) != 0)
1025 qassociated
= B_TRUE
;
1028 dev
= makedevice(dsp
->ds_major
, (minor_t
)ppa
+ 1);
1029 if ((err
= dls_devnet_hold_by_dev(dev
, &ddp
)) != 0)
1032 if ((err
= mac_perim_enter_by_macname(dls_devnet_mac(ddp
), &mph
)) != 0)
1038 if ((err
= dls_link_hold(dls_devnet_mac(ddp
), &dlp
)) != 0)
1041 if ((err
= dls_open(dlp
, ddp
, dsp
)) != 0)
1045 * Set the default packet priority.
1050 * Add a notify function so that the we get updates from the MAC.
1052 dsp
->ds_mnh
= mac_notify_add(dsp
->ds_mh
, str_notify
, dsp
);
1053 dsp
->ds_dlstate
= DL_UNBOUND
;
1054 mac_perim_exit(mph
);
1061 mac_perim_exit(mph
);
1063 dls_devnet_rele(ddp
);
1065 (void) qassociate(dsp
->ds_wq
, -1);
1071 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called
1072 * from close(2) for style 2.
1075 dld_str_detach(dld_str_t
*dsp
)
1077 mac_perim_handle_t mph
;
1080 ASSERT(dsp
->ds_datathr_cnt
== 0);
1082 mac_perim_enter_by_mh(dsp
->ds_mh
, &mph
);
1084 * Remove the notify function.
1086 * Note that we cannot wait for the notification callback to be removed
1087 * since it could cause the deadlock with str_notify() since they both
1088 * need the mac perimeter. Continue if we cannot remove the
1089 * notification callback right now and wait after we leave the
1092 err
= mac_notify_remove(dsp
->ds_mnh
, B_FALSE
);
1096 * Disable the capabilities
1098 dld_capabilities_disable(dsp
);
1103 dsp
->ds_lso
= B_FALSE
;
1104 dsp
->ds_lso_max
= 0;
1107 mac_perim_exit(mph
);
1110 * Now we leave the mac perimeter. If mac_notify_remove() failed
1111 * because the notification callback was in progress, wait for
1112 * it to finish before we proceed.
1115 mac_notify_remove_wait(dsp
->ds_mh
);
1118 * An unreferenced tagged (non-persistent) vlan gets destroyed
1119 * automatically in the call to dls_devnet_rele.
1121 dls_devnet_rele(dsp
->ds_ddh
);
1128 if (dsp
->ds_style
== DL_STYLE2
)
1129 (void) qassociate(dsp
->ds_wq
, -1);
1132 * Re-initialize the DLPI state machine.
1134 dsp
->ds_dlstate
= DL_UNATTACHED
;
1138 * This function is only called for VLAN streams. In raw mode, we strip VLAN
1139 * tags before sending packets up to the DLS clients, with the exception of
1140 * special priority tagged packets, in that case, we set the VID to 0.
1141 * mp must be a VLAN tagged packet.
1144 i_dld_ether_header_strip_tag(mblk_t
*mp
, boolean_t keep_pri
)
1147 struct ether_vlan_header
*evhp
;
1148 uint16_t tci
, new_tci
;
1150 ASSERT(MBLKL(mp
) >= sizeof (struct ether_vlan_header
));
1151 if (DB_REF(mp
) > 1) {
1152 newmp
= copymsg(mp
);
1158 evhp
= (struct ether_vlan_header
*)mp
->b_rptr
;
1160 tci
= ntohs(evhp
->ether_tci
);
1161 if (VLAN_PRI(tci
) == 0 || !keep_pri
) {
1163 * Priority is 0, strip the tag.
1165 ovbcopy(mp
->b_rptr
, mp
->b_rptr
+ VLAN_TAGSZ
, 2 * ETHERADDRL
);
1166 mp
->b_rptr
+= VLAN_TAGSZ
;
1169 * Priority is not 0, update the VID to 0.
1171 new_tci
= VLAN_TCI(VLAN_PRI(tci
), VLAN_CFI(tci
), VLAN_ID_NONE
);
1172 evhp
->ether_tci
= htons(new_tci
);
1178 * Raw mode receive function.
1182 dld_str_rx_raw(void *arg
, mac_resource_handle_t mrh
, mblk_t
*mp
,
1183 mac_header_info_t
*mhip
)
1185 dld_str_t
*dsp
= (dld_str_t
*)arg
;
1186 boolean_t is_ethernet
= (dsp
->ds_mip
->mi_media
== DL_ETHER
);
1187 mblk_t
*next
, *newmp
;
1192 * Get the pointer to the next packet in the chain and then
1193 * clear b_next before the packet gets passed on.
1199 * Wind back b_rptr to point at the MAC header.
1201 ASSERT(mp
->b_rptr
>= DB_BASE(mp
) + mhip
->mhi_hdrsize
);
1202 mp
->b_rptr
-= mhip
->mhi_hdrsize
;
1205 * Certain MAC type plugins provide an illusion for raw
1206 * DLPI consumers. They pretend that the MAC layer is
1207 * something that it's not for the benefit of observability
1208 * tools. For example, mac_wifi pretends that it's Ethernet
1209 * for such consumers. Here, unless native mode is enabled,
1210 * we call into the MAC layer so that this illusion can be
1211 * maintained. The plugin will optionally transform the MAC
1212 * header here into something that can be passed up to raw
1213 * consumers. The header goes from "cooked" mode to raw mode.
1215 if (!dsp
->ds_native
) {
1216 newmp
= mac_header_uncook(dsp
->ds_mh
, mp
);
1217 if (newmp
== NULL
) {
1225 * Strip the VLAN tag for VLAN streams.
1228 mac_client_vid(dsp
->ds_mch
) != VLAN_ID_NONE
) {
1230 * The priority should be kept only for VLAN
1233 newmp
= i_dld_ether_header_strip_tag(mp
,
1234 mac_client_is_vlan_vnic(dsp
->ds_mch
));
1235 if (newmp
== NULL
) {
1243 * Pass the packet on.
1245 if (canputnext(dsp
->ds_rq
))
1246 putnext(dsp
->ds_rq
, mp
);
1252 * Move on to the next packet in the chain.
1255 } while (mp
!= NULL
);
1259 * Fast-path receive function.
1263 dld_str_rx_fastpath(void *arg
, mac_resource_handle_t mrh
, mblk_t
*mp
,
1264 mac_header_info_t
*mhip
)
1266 dld_str_t
*dsp
= (dld_str_t
*)arg
;
1271 * MAC header stripping rules:
1273 * a. VLAN streams. Strip the whole VLAN header including the tag.
1274 * b. Physical streams
1275 * - VLAN packets (non-zero VID). The stream must be either a
1276 * DL_PROMISC_SAP listener or a ETHERTYPE_VLAN listener.
1277 * Strip the Ethernet header but keep the VLAN header.
1278 * - Special tagged packets (zero VID)
1279 * * The stream is either a DL_PROMISC_SAP listener or a
1280 * ETHERTYPE_VLAN listener, strip the Ethernet header but
1281 * keep the VLAN header.
1282 * * Otherwise, strip the whole VLAN header.
1283 * - Untagged packets. Strip the whole MAC header.
1285 if (mhip
->mhi_istagged
&&
1286 (mac_client_vid(dsp
->ds_mch
) == VLAN_ID_NONE
) &&
1287 ((dsp
->ds_sap
== ETHERTYPE_VLAN
) ||
1288 (dsp
->ds_promisc
& DLS_PROMISC_SAP
))) {
1289 offset
= VLAN_TAGSZ
;
1295 * Get the pointer to the next packet in the chain and then
1296 * clear b_next before the packet gets passed on.
1302 * Wind back b_rptr to point at the VLAN header.
1304 ASSERT(mp
->b_rptr
>= DB_BASE(mp
) + offset
);
1305 mp
->b_rptr
-= offset
;
1308 * Pass the packet on.
1310 if (canputnext(dsp
->ds_rq
))
1311 putnext(dsp
->ds_rq
, mp
);
1315 * Move on to the next packet in the chain.
1318 } while (mp
!= NULL
);
1322 * Default receive function (send DL_UNITDATA_IND messages).
1326 dld_str_rx_unitdata(void *arg
, mac_resource_handle_t mrh
, mblk_t
*mp
,
1327 mac_header_info_t
*mhip
)
1329 dld_str_t
*dsp
= (dld_str_t
*)arg
;
1333 boolean_t strip_vlan
= B_TRUE
;
1336 * See MAC header stripping rules in the dld_str_rx_fastpath() function.
1338 if (mhip
->mhi_istagged
&&
1339 (mac_client_vid(dsp
->ds_mch
) == VLAN_ID_NONE
) &&
1340 ((dsp
->ds_sap
== ETHERTYPE_VLAN
) ||
1341 (dsp
->ds_promisc
& DLS_PROMISC_SAP
))) {
1342 offset
= VLAN_TAGSZ
;
1343 strip_vlan
= B_FALSE
;
1349 * Get the pointer to the next packet in the chain and then
1350 * clear b_next before the packet gets passed on.
1356 * Wind back b_rptr to point at the MAC header.
1358 ASSERT(mp
->b_rptr
>= DB_BASE(mp
) + mhip
->mhi_hdrsize
);
1359 mp
->b_rptr
-= mhip
->mhi_hdrsize
;
1362 * Create the DL_UNITDATA_IND M_PROTO.
1364 if ((ud_mp
= str_unitdata_ind(dsp
, mp
, strip_vlan
)) == NULL
) {
1370 * Advance b_rptr to point at the payload (or the VLAN header).
1372 mp
->b_rptr
+= (mhip
->mhi_hdrsize
- offset
);
1375 * Prepend the DL_UNITDATA_IND.
1382 if (canputnext(dsp
->ds_rq
))
1383 putnext(dsp
->ds_rq
, ud_mp
);
1388 * Move on to the next packet in the chain.
1391 } while (mp
!= NULL
);
1395 * DL_NOTIFY_IND: DL_NOTE_SDU_SIZE
1398 str_notify_sdu_size(dld_str_t
*dsp
, uint_t max_sdu
, uint_t multicast_sdu
)
1401 dl_notify_ind_t
*dlip
;
1403 if (!(dsp
->ds_notifications
& (DL_NOTE_SDU_SIZE
|DL_NOTE_SDU_SIZE2
)))
1406 if ((mp
= mexchange(dsp
->ds_wq
, NULL
, sizeof (dl_notify_ind_t
),
1407 M_PROTO
, 0)) == NULL
)
1410 bzero(mp
->b_rptr
, sizeof (dl_notify_ind_t
));
1411 dlip
= (dl_notify_ind_t
*)mp
->b_rptr
;
1412 dlip
->dl_primitive
= DL_NOTIFY_IND
;
1413 if (dsp
->ds_notifications
& DL_NOTE_SDU_SIZE2
) {
1414 dlip
->dl_notification
= DL_NOTE_SDU_SIZE2
;
1415 dlip
->dl_data1
= max_sdu
;
1416 dlip
->dl_data2
= multicast_sdu
;
1418 dlip
->dl_notification
= DL_NOTE_SDU_SIZE
;
1419 dlip
->dl_data
= max_sdu
;
1422 qreply(dsp
->ds_wq
, mp
);
1426 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the
1427 * current state of the interface.
1430 dld_str_notify_ind(dld_str_t
*dsp
)
1432 mac_notify_type_t type
;
1434 for (type
= 0; type
< MAC_NNOTE
; type
++)
1435 str_notify(dsp
, type
);
1438 typedef struct dl_unitdata_ind_wrapper
{
1439 dl_unitdata_ind_t dl_unitdata
;
1440 uint8_t dl_dest_addr
[MAXMACADDRLEN
+ sizeof (uint16_t)];
1441 uint8_t dl_src_addr
[MAXMACADDRLEN
+ sizeof (uint16_t)];
1442 } dl_unitdata_ind_wrapper_t
;
1445 * Create a DL_UNITDATA_IND M_PROTO message.
1448 str_unitdata_ind(dld_str_t
*dsp
, mblk_t
*mp
, boolean_t strip_vlan
)
1451 dl_unitdata_ind_wrapper_t
*dlwp
;
1452 dl_unitdata_ind_t
*dlp
;
1453 mac_header_info_t mhi
;
1459 * Get the packet header information.
1461 if (mac_vlan_header_info(dsp
->ds_mh
, mp
, &mhi
) != 0)
1465 * Allocate a message large enough to contain the wrapper structure
1468 if ((nmp
= mexchange(dsp
->ds_wq
, NULL
,
1469 sizeof (dl_unitdata_ind_wrapper_t
), M_PROTO
,
1470 DL_UNITDATA_IND
)) == NULL
)
1473 dlwp
= (dl_unitdata_ind_wrapper_t
*)nmp
->b_rptr
;
1475 dlp
= &(dlwp
->dl_unitdata
);
1476 ASSERT(dlp
== (dl_unitdata_ind_t
*)nmp
->b_rptr
);
1477 ASSERT(dlp
->dl_primitive
== DL_UNITDATA_IND
);
1480 * Copy in the destination address.
1482 addr_length
= dsp
->ds_mip
->mi_addr_length
;
1483 daddr
= dlwp
->dl_dest_addr
;
1484 dlp
->dl_dest_addr_offset
= (uintptr_t)daddr
- (uintptr_t)dlp
;
1485 bcopy(mhi
.mhi_daddr
, daddr
, addr_length
);
1488 * Set the destination DLSAP to the SAP value encoded in the packet.
1490 if (mhi
.mhi_istagged
&& !strip_vlan
)
1491 *(uint16_t *)(daddr
+ addr_length
) = ETHERTYPE_VLAN
;
1493 *(uint16_t *)(daddr
+ addr_length
) = mhi
.mhi_bindsap
;
1494 dlp
->dl_dest_addr_length
= addr_length
+ sizeof (uint16_t);
1497 * If the destination address was multicast or broadcast then the
1498 * dl_group_address field should be non-zero.
1500 dlp
->dl_group_address
= (mhi
.mhi_dsttype
== MAC_ADDRTYPE_MULTICAST
) ||
1501 (mhi
.mhi_dsttype
== MAC_ADDRTYPE_BROADCAST
);
1504 * Copy in the source address if one exists. Some MAC types (DL_IB
1505 * for example) may not have access to source information.
1507 if (mhi
.mhi_saddr
== NULL
) {
1508 dlp
->dl_src_addr_offset
= dlp
->dl_src_addr_length
= 0;
1510 saddr
= dlwp
->dl_src_addr
;
1511 dlp
->dl_src_addr_offset
= (uintptr_t)saddr
- (uintptr_t)dlp
;
1512 bcopy(mhi
.mhi_saddr
, saddr
, addr_length
);
1515 * Set the source DLSAP to the packet ethertype.
1517 *(uint16_t *)(saddr
+ addr_length
) = mhi
.mhi_origsap
;
1518 dlp
->dl_src_addr_length
= addr_length
+ sizeof (uint16_t);
1525 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS
1528 str_notify_promisc_on_phys(dld_str_t
*dsp
)
1531 dl_notify_ind_t
*dlip
;
1533 if (!(dsp
->ds_notifications
& DL_NOTE_PROMISC_ON_PHYS
))
1536 if ((mp
= mexchange(dsp
->ds_wq
, NULL
, sizeof (dl_notify_ind_t
),
1537 M_PROTO
, 0)) == NULL
)
1540 bzero(mp
->b_rptr
, sizeof (dl_notify_ind_t
));
1541 dlip
= (dl_notify_ind_t
*)mp
->b_rptr
;
1542 dlip
->dl_primitive
= DL_NOTIFY_IND
;
1543 dlip
->dl_notification
= DL_NOTE_PROMISC_ON_PHYS
;
1545 qreply(dsp
->ds_wq
, mp
);
1549 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS
1552 str_notify_promisc_off_phys(dld_str_t
*dsp
)
1555 dl_notify_ind_t
*dlip
;
1557 if (!(dsp
->ds_notifications
& DL_NOTE_PROMISC_OFF_PHYS
))
1560 if ((mp
= mexchange(dsp
->ds_wq
, NULL
, sizeof (dl_notify_ind_t
),
1561 M_PROTO
, 0)) == NULL
)
1564 bzero(mp
->b_rptr
, sizeof (dl_notify_ind_t
));
1565 dlip
= (dl_notify_ind_t
*)mp
->b_rptr
;
1566 dlip
->dl_primitive
= DL_NOTIFY_IND
;
1567 dlip
->dl_notification
= DL_NOTE_PROMISC_OFF_PHYS
;
1569 qreply(dsp
->ds_wq
, mp
);
1573 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR
1576 str_notify_phys_addr(dld_str_t
*dsp
, uint_t addr_type
, const uint8_t *addr
)
1579 dl_notify_ind_t
*dlip
;
1583 if (!(dsp
->ds_notifications
& DL_NOTE_PHYS_ADDR
))
1586 addr_length
= dsp
->ds_mip
->mi_addr_length
;
1587 if ((mp
= mexchange(dsp
->ds_wq
, NULL
,
1588 sizeof (dl_notify_ind_t
) + addr_length
+ sizeof (uint16_t),
1589 M_PROTO
, 0)) == NULL
)
1592 bzero(mp
->b_rptr
, sizeof (dl_notify_ind_t
));
1593 dlip
= (dl_notify_ind_t
*)mp
->b_rptr
;
1594 dlip
->dl_primitive
= DL_NOTIFY_IND
;
1595 dlip
->dl_notification
= DL_NOTE_PHYS_ADDR
;
1596 dlip
->dl_data
= addr_type
;
1597 dlip
->dl_addr_offset
= sizeof (dl_notify_ind_t
);
1598 dlip
->dl_addr_length
= addr_length
+ sizeof (uint16_t);
1600 bcopy(addr
, &dlip
[1], addr_length
);
1602 ethertype
= (dsp
->ds_sap
< ETHERTYPE_802_MIN
) ? 0 : dsp
->ds_sap
;
1603 *(uint16_t *)((uchar_t
*)(dlip
+ 1) + addr_length
) = ethertype
;
1605 qreply(dsp
->ds_wq
, mp
);
1609 * DL_NOTIFY_IND: DL_NOTE_LINK_UP
1612 str_notify_link_up(dld_str_t
*dsp
)
1615 dl_notify_ind_t
*dlip
;
1617 if (!(dsp
->ds_notifications
& DL_NOTE_LINK_UP
))
1620 if ((mp
= mexchange(dsp
->ds_wq
, NULL
, sizeof (dl_notify_ind_t
),
1621 M_PROTO
, 0)) == NULL
)
1624 bzero(mp
->b_rptr
, sizeof (dl_notify_ind_t
));
1625 dlip
= (dl_notify_ind_t
*)mp
->b_rptr
;
1626 dlip
->dl_primitive
= DL_NOTIFY_IND
;
1627 dlip
->dl_notification
= DL_NOTE_LINK_UP
;
1629 qreply(dsp
->ds_wq
, mp
);
1633 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN
1636 str_notify_link_down(dld_str_t
*dsp
)
1639 dl_notify_ind_t
*dlip
;
1641 if (!(dsp
->ds_notifications
& DL_NOTE_LINK_DOWN
))
1644 if ((mp
= mexchange(dsp
->ds_wq
, NULL
, sizeof (dl_notify_ind_t
),
1645 M_PROTO
, 0)) == NULL
)
1648 bzero(mp
->b_rptr
, sizeof (dl_notify_ind_t
));
1649 dlip
= (dl_notify_ind_t
*)mp
->b_rptr
;
1650 dlip
->dl_primitive
= DL_NOTIFY_IND
;
1651 dlip
->dl_notification
= DL_NOTE_LINK_DOWN
;
1653 qreply(dsp
->ds_wq
, mp
);
1657 * DL_NOTIFY_IND: DL_NOTE_SPEED
1660 str_notify_speed(dld_str_t
*dsp
, uint32_t speed
)
1663 dl_notify_ind_t
*dlip
;
1665 if (!(dsp
->ds_notifications
& DL_NOTE_SPEED
))
1668 if ((mp
= mexchange(dsp
->ds_wq
, NULL
, sizeof (dl_notify_ind_t
),
1669 M_PROTO
, 0)) == NULL
)
1672 bzero(mp
->b_rptr
, sizeof (dl_notify_ind_t
));
1673 dlip
= (dl_notify_ind_t
*)mp
->b_rptr
;
1674 dlip
->dl_primitive
= DL_NOTIFY_IND
;
1675 dlip
->dl_notification
= DL_NOTE_SPEED
;
1676 dlip
->dl_data
= speed
;
1678 qreply(dsp
->ds_wq
, mp
);
1682 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG
1685 str_notify_capab_reneg(dld_str_t
*dsp
)
1688 dl_notify_ind_t
*dlip
;
1690 if (!(dsp
->ds_notifications
& DL_NOTE_CAPAB_RENEG
))
1693 if ((mp
= mexchange(dsp
->ds_wq
, NULL
, sizeof (dl_notify_ind_t
),
1694 M_PROTO
, 0)) == NULL
)
1697 bzero(mp
->b_rptr
, sizeof (dl_notify_ind_t
));
1698 dlip
= (dl_notify_ind_t
*)mp
->b_rptr
;
1699 dlip
->dl_primitive
= DL_NOTIFY_IND
;
1700 dlip
->dl_notification
= DL_NOTE_CAPAB_RENEG
;
1702 qreply(dsp
->ds_wq
, mp
);
1706 * DL_NOTIFY_IND: DL_NOTE_FASTPATH_FLUSH
1709 str_notify_fastpath_flush(dld_str_t
*dsp
)
1712 dl_notify_ind_t
*dlip
;
1714 if (!(dsp
->ds_notifications
& DL_NOTE_FASTPATH_FLUSH
))
1717 if ((mp
= mexchange(dsp
->ds_wq
, NULL
, sizeof (dl_notify_ind_t
),
1718 M_PROTO
, 0)) == NULL
)
1721 bzero(mp
->b_rptr
, sizeof (dl_notify_ind_t
));
1722 dlip
= (dl_notify_ind_t
*)mp
->b_rptr
;
1723 dlip
->dl_primitive
= DL_NOTIFY_IND
;
1724 dlip
->dl_notification
= DL_NOTE_FASTPATH_FLUSH
;
1726 qreply(dsp
->ds_wq
, mp
);
1730 str_notify_allowed_ips(dld_str_t
*dsp
)
1733 dl_notify_ind_t
*dlip
;
1737 if (!(dsp
->ds_notifications
& DL_NOTE_ALLOWED_IPS
))
1740 mp_size
= sizeof (mac_protect_t
) + sizeof (dl_notify_ind_t
);
1741 if ((mp
= mexchange(dsp
->ds_wq
, NULL
, mp_size
, M_PROTO
, 0)) == NULL
)
1744 mrp
= mac_protect_get(dsp
->ds_mh
);
1745 bzero(mp
->b_rptr
, mp_size
);
1746 dlip
= (dl_notify_ind_t
*)mp
->b_rptr
;
1747 dlip
->dl_primitive
= DL_NOTIFY_IND
;
1748 dlip
->dl_notification
= DL_NOTE_ALLOWED_IPS
;
1750 dlip
->dl_addr_offset
= sizeof (dl_notify_ind_t
);
1751 dlip
->dl_addr_length
= sizeof (mac_protect_t
);
1752 bcopy(mrp
, mp
->b_rptr
+ sizeof (dl_notify_ind_t
),
1753 sizeof (mac_protect_t
));
1755 qreply(dsp
->ds_wq
, mp
);
1759 * MAC notification callback.
1762 str_notify(void *arg
, mac_notify_type_t type
)
1764 dld_str_t
*dsp
= (dld_str_t
*)arg
;
1765 queue_t
*q
= dsp
->ds_wq
;
1766 mac_handle_t mh
= dsp
->ds_mh
;
1767 mac_client_handle_t mch
= dsp
->ds_mch
;
1768 uint8_t addr
[MAXMACADDRLEN
];
1775 case MAC_NOTE_DEVPROMISC
:
1777 * Send the appropriate DL_NOTIFY_IND.
1779 if (mac_promisc_get(mh
))
1780 str_notify_promisc_on_phys(dsp
);
1782 str_notify_promisc_off_phys(dsp
);
1785 case MAC_NOTE_UNICST
:
1787 * This notification is sent whenever the MAC unicast
1790 mac_unicast_primary_get(mh
, addr
);
1793 * Send the appropriate DL_NOTIFY_IND.
1795 str_notify_phys_addr(dsp
, DL_CURR_PHYS_ADDR
, addr
);
1800 * Only send up DL_NOTE_DEST_ADDR if the link has a
1801 * destination address.
1803 if (mac_dst_get(dsp
->ds_mh
, addr
))
1804 str_notify_phys_addr(dsp
, DL_CURR_DEST_ADDR
, addr
);
1807 case MAC_NOTE_LOWLINK
:
1810 * LOWLINK refers to the actual link status. For links that
1811 * are not part of a bridge instance LOWLINK and LINK state
1812 * are the same. But for a link part of a bridge instance
1813 * LINK state refers to the aggregate link status: "up" when
1814 * at least one link part of the bridge is up and is "down"
1815 * when all links part of the bridge are down.
1817 * Clients can request to be notified of the LOWLINK state
1818 * using the DLIOCLOWLINK ioctl. Clients such as the bridge
1819 * daemon request lowlink state changes and upper layer clients
1820 * receive notifications of the aggregate link state changes
1821 * which is the default when requesting LINK UP/DOWN state
1826 * Check that the notification type matches the one that we
1827 * want. If we want lower-level link notifications, and this
1828 * is upper, or if we want upper and this is lower, then
1831 if ((type
== MAC_NOTE_LOWLINK
) != dsp
->ds_lowlink
)
1834 * This notification is sent every time the MAC driver
1835 * updates the link state.
1837 switch (mac_client_stat_get(mch
, dsp
->ds_lowlink
?
1838 MAC_STAT_LOWLINK_STATE
: MAC_STAT_LINK_STATE
)) {
1839 case LINK_STATE_UP
: {
1842 * The link is up so send the appropriate
1845 str_notify_link_up(dsp
);
1847 speed
= mac_stat_get(mh
, MAC_STAT_IFSPEED
);
1848 str_notify_speed(dsp
, (uint32_t)(speed
/ 1000ull));
1851 case LINK_STATE_DOWN
:
1853 * The link is down so send the appropriate
1856 str_notify_link_down(dsp
);
1864 case MAC_NOTE_CAPAB_CHG
:
1866 * This notification is sent whenever the MAC resources
1867 * change or capabilities change. We need to renegotiate
1868 * the capabilities. Send the appropriate DL_NOTIFY_IND.
1870 str_notify_capab_reneg(dsp
);
1873 case MAC_NOTE_SDU_SIZE
: {
1875 uint_t multicast_sdu
;
1876 mac_sdu_get2(dsp
->ds_mh
, NULL
, &max_sdu
, &multicast_sdu
);
1877 str_notify_sdu_size(dsp
, max_sdu
, multicast_sdu
);
1881 case MAC_NOTE_FASTPATH_FLUSH
:
1882 str_notify_fastpath_flush(dsp
);
1885 /* Unused notifications */
1886 case MAC_NOTE_MARGIN
:
1889 case MAC_NOTE_ALLOWED_IPS
:
1890 str_notify_allowed_ips(dsp
);
1900 * This function is called via a taskq mechansim to process all control
1901 * messages on a per 'dsp' end point.
1904 dld_wput_nondata_task(void *arg
)
1906 dld_str_t
*dsp
= arg
;
1909 mutex_enter(&dsp
->ds_lock
);
1910 while (dsp
->ds_pending_head
!= NULL
) {
1911 mp
= dsp
->ds_pending_head
;
1912 dsp
->ds_pending_head
= mp
->b_next
;
1914 if (dsp
->ds_pending_head
== NULL
)
1915 dsp
->ds_pending_tail
= NULL
;
1916 mutex_exit(&dsp
->ds_lock
);
1918 switch (DB_TYPE(mp
)) {
1930 mutex_enter(&dsp
->ds_lock
);
1932 ASSERT(dsp
->ds_pending_tail
== NULL
);
1933 dsp
->ds_dlpi_pending
= 0;
1934 cv_broadcast(&dsp
->ds_dlpi_pending_cv
);
1935 mutex_exit(&dsp
->ds_lock
);
1939 * Kernel thread to handle taskq dispatch failures in dld_wput_data. This
1940 * thread is started at boot time.
1943 dld_taskq_dispatch(void)
1945 callb_cpr_t cprinfo
;
1948 CALLB_CPR_INIT(&cprinfo
, &dld_taskq_lock
, callb_generic_cpr
,
1949 "dld_taskq_dispatch");
1950 mutex_enter(&dld_taskq_lock
);
1952 while (!dld_taskq_quit
) {
1953 dsp
= list_head(&dld_taskq_list
);
1954 while (dsp
!= NULL
) {
1955 list_remove(&dld_taskq_list
, dsp
);
1956 mutex_exit(&dld_taskq_lock
);
1957 VERIFY(taskq_dispatch(dld_taskq
, dld_wput_nondata_task
,
1958 dsp
, TQ_SLEEP
) != 0);
1959 mutex_enter(&dld_taskq_lock
);
1960 dsp
= list_head(&dld_taskq_list
);
1963 CALLB_CPR_SAFE_BEGIN(&cprinfo
);
1964 cv_wait(&dld_taskq_cv
, &dld_taskq_lock
);
1965 CALLB_CPR_SAFE_END(&cprinfo
, &dld_taskq_lock
);
1968 dld_taskq_done
= B_TRUE
;
1969 cv_signal(&dld_taskq_cv
);
1970 CALLB_CPR_EXIT(&cprinfo
);
1975 * All control operations are serialized on the 'dsp' and are also funneled
1976 * through a taskq mechanism to ensure that subsequent processing has kernel
1977 * context and can safely use cv_wait.
1979 * Mechanisms to handle taskq dispatch failures
1981 * The only way to be sure that taskq dispatch does not fail is to either
1982 * specify TQ_SLEEP or to use a static taskq and prepopulate it with
1983 * some number of entries and make sure that the number of outstanding requests
1984 * are less than that number. We can't use TQ_SLEEP since we don't know the
1985 * context. Nor can we bound the total number of 'dsp' end points. So we are
1986 * unable to use either of the above schemes, and are forced to deal with
1987 * taskq dispatch failures. Note that even dynamic taskq could fail in
1988 * dispatch if TQ_NOSLEEP is specified, since this flag is translated
1989 * eventually to KM_NOSLEEP and kmem allocations could fail in the taskq
1992 * We maintain a queue of 'dsp's that encountered taskq dispatch failure.
1993 * We also have a single global thread to retry the taskq dispatch. This
1994 * thread loops in 'dld_taskq_dispatch' and retries the taskq dispatch, but
1995 * uses TQ_SLEEP to ensure eventual success of the dispatch operation.
1998 dld_wput_nondata(dld_str_t
*dsp
, mblk_t
*mp
)
2000 ASSERT(mp
->b_next
== NULL
);
2001 mutex_enter(&dsp
->ds_lock
);
2002 if (dsp
->ds_pending_head
!= NULL
) {
2003 ASSERT(dsp
->ds_dlpi_pending
);
2004 dsp
->ds_pending_tail
->b_next
= mp
;
2005 dsp
->ds_pending_tail
= mp
;
2006 mutex_exit(&dsp
->ds_lock
);
2009 ASSERT(dsp
->ds_pending_tail
== NULL
);
2010 dsp
->ds_pending_head
= dsp
->ds_pending_tail
= mp
;
2012 * At this point if ds_dlpi_pending is set, it implies that the taskq
2013 * thread is still active and is processing the last message, though
2014 * the pending queue has been emptied.
2016 if (dsp
->ds_dlpi_pending
) {
2017 mutex_exit(&dsp
->ds_lock
);
2021 dsp
->ds_dlpi_pending
= 1;
2022 mutex_exit(&dsp
->ds_lock
);
2024 if (taskq_dispatch(dld_taskq
, dld_wput_nondata_task
, dsp
,
2028 mutex_enter(&dld_taskq_lock
);
2029 list_insert_tail(&dld_taskq_list
, dsp
);
2030 cv_signal(&dld_taskq_cv
);
2031 mutex_exit(&dld_taskq_lock
);
2035 * Process an M_IOCTL message.
2038 dld_ioc(dld_str_t
*dsp
, mblk_t
*mp
)
2042 cmd
= ((struct iocblk
*)mp
->b_rptr
)->ioc_cmd
;
2043 ASSERT(dsp
->ds_type
== DLD_DLPI
);
2047 ioc_native(dsp
, mp
);
2049 case DLIOCMARGININFO
:
2050 ioc_margin(dsp
, mp
);
2059 ioc_lowlink(dsp
, mp
);
2070 ioc_native(dld_str_t
*dsp
, mblk_t
*mp
)
2072 queue_t
*q
= dsp
->ds_wq
;
2073 const mac_info_t
*mip
= dsp
->ds_mip
;
2076 * Native mode can be enabled if it's disabled and if the
2077 * native media type is different.
2079 if (!dsp
->ds_native
&& mip
->mi_media
!= mip
->mi_nativemedia
)
2080 dsp
->ds_native
= B_TRUE
;
2083 miocack(q
, mp
, 0, mip
->mi_nativemedia
);
2085 miocnak(q
, mp
, 0, ENOTSUP
);
2092 ioc_margin(dld_str_t
*dsp
, mblk_t
*mp
)
2094 queue_t
*q
= dsp
->ds_wq
;
2098 if (dsp
->ds_dlstate
== DL_UNATTACHED
) {
2102 if ((err
= miocpullup(mp
, sizeof (uint32_t))) != 0)
2105 mac_margin_get(dsp
->ds_mh
, &margin
);
2106 *((uint32_t *)mp
->b_cont
->b_rptr
) = margin
;
2107 miocack(q
, mp
, sizeof (uint32_t), 0);
2111 miocnak(q
, mp
, 0, err
);
2118 ioc_raw(dld_str_t
*dsp
, mblk_t
*mp
)
2120 queue_t
*q
= dsp
->ds_wq
;
2121 mac_perim_handle_t mph
;
2123 if (dsp
->ds_mh
== NULL
) {
2124 dsp
->ds_mode
= DLD_RAW
;
2125 miocack(q
, mp
, 0, 0);
2129 mac_perim_enter_by_mh(dsp
->ds_mh
, &mph
);
2130 if (dsp
->ds_polling
|| dsp
->ds_direct
) {
2131 mac_perim_exit(mph
);
2132 miocnak(q
, mp
, 0, EPROTO
);
2136 if (dsp
->ds_mode
!= DLD_RAW
&& dsp
->ds_dlstate
== DL_IDLE
) {
2138 * Set the receive callback.
2140 dls_rx_set(dsp
, dld_str_rx_raw
, dsp
);
2144 * Note that raw mode is enabled.
2146 dsp
->ds_mode
= DLD_RAW
;
2147 mac_perim_exit(mph
);
2149 miocack(q
, mp
, 0, 0);
2156 ioc_fast(dld_str_t
*dsp
, mblk_t
*mp
)
2158 dl_unitdata_req_t
*dlp
;
2161 const uint8_t *addr
;
2166 queue_t
*q
= dsp
->ds_wq
;
2168 mac_perim_handle_t mph
;
2170 if (dld_opt
& DLD_OPT_NO_FASTPATH
) {
2176 * DLIOCHDRINFO should only come from IP. The one initiated from
2177 * user-land should not be allowed.
2179 if (((struct iocblk
*)mp
->b_rptr
)->ioc_cr
!= kcred
) {
2185 if (nmp
== NULL
|| MBLKL(nmp
) < sizeof (dl_unitdata_req_t
) ||
2186 (dlp
= (dl_unitdata_req_t
*)nmp
->b_rptr
,
2187 dlp
->dl_primitive
!= DL_UNITDATA_REQ
)) {
2192 off
= dlp
->dl_dest_addr_offset
;
2193 len
= dlp
->dl_dest_addr_length
;
2195 if (!MBLKIN(nmp
, off
, len
)) {
2200 if (dsp
->ds_dlstate
!= DL_IDLE
) {
2205 addr_length
= dsp
->ds_mip
->mi_addr_length
;
2206 if (len
!= addr_length
+ sizeof (uint16_t)) {
2211 addr
= nmp
->b_rptr
+ off
;
2212 sap
= *(uint16_t *)(nmp
->b_rptr
+ off
+ addr_length
);
2214 if ((hmp
= dls_header(dsp
, addr
, sap
, 0, NULL
)) == NULL
) {
2220 * This ioctl might happen concurrently with a direct call to dld_capab
2221 * that tries to enable direct and/or poll capabilities. Since the
2222 * stack does not serialize them, we do so here to avoid mixing
2225 mac_perim_enter_by_mh(dsp
->ds_mh
, &mph
);
2226 if (dsp
->ds_mode
!= DLD_FASTPATH
) {
2228 * Set the receive callback (unless polling is enabled).
2230 if (!dsp
->ds_polling
&& !dsp
->ds_direct
)
2231 dls_rx_set(dsp
, dld_str_rx_fastpath
, dsp
);
2234 * Note that fast-path mode is enabled.
2236 dsp
->ds_mode
= DLD_FASTPATH
;
2238 mac_perim_exit(mph
);
2240 freemsg(nmp
->b_cont
);
2243 miocack(q
, mp
, MBLKL(nmp
) + MBLKL(hmp
), 0);
2246 miocnak(q
, mp
, 0, err
);
2250 * DLIOCLOWLINK: request actual link state changes. When the
2251 * link is part of a bridge instance the client receives actual
2252 * link state changes and not the aggregate link status. Used by
2253 * the bridging daemon (bridged) for proper RSTP operation.
2256 ioc_lowlink(dld_str_t
*dsp
, mblk_t
*mp
)
2258 queue_t
*q
= dsp
->ds_wq
;
2261 if ((err
= miocpullup(mp
, sizeof (int))) != 0) {
2262 miocnak(q
, mp
, 0, err
);
2264 /* LINTED: alignment */
2265 dsp
->ds_lowlink
= *(boolean_t
*)mp
->b_cont
->b_rptr
;
2266 miocack(q
, mp
, 0, 0);
2271 * Catch-all handler.
2274 ioc(dld_str_t
*dsp
, mblk_t
*mp
)
2276 queue_t
*q
= dsp
->ds_wq
;
2278 if (dsp
->ds_dlstate
== DL_UNATTACHED
) {
2279 miocnak(q
, mp
, 0, EINVAL
);
2282 mac_ioctl(dsp
->ds_mh
, q
, mp
);