Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / uts / common / io / vnic / vnic_dev.c
blobd6711539673639d610d9b00ff83328fa0f40ac85
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2015 Joyent, Inc.
24 * Copyright 2016 OmniTI Computer Consulting, Inc. All rights reserved.
27 #include <sys/types.h>
28 #include <sys/cred.h>
29 #include <sys/sysmacros.h>
30 #include <sys/conf.h>
31 #include <sys/cmn_err.h>
32 #include <sys/list.h>
33 #include <sys/ksynch.h>
34 #include <sys/kmem.h>
35 #include <sys/stream.h>
36 #include <sys/modctl.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/atomic.h>
40 #include <sys/stat.h>
41 #include <sys/modhash.h>
42 #include <sys/strsubr.h>
43 #include <sys/strsun.h>
44 #include <sys/dlpi.h>
45 #include <sys/mac.h>
46 #include <sys/mac_provider.h>
47 #include <sys/mac_client.h>
48 #include <sys/mac_client_priv.h>
49 #include <sys/mac_ether.h>
50 #include <sys/dls.h>
51 #include <sys/pattr.h>
52 #include <sys/time.h>
53 #include <sys/vlan.h>
54 #include <sys/vnic.h>
55 #include <sys/vnic_impl.h>
56 #include <sys/mac_impl.h>
57 #include <sys/mac_flow_impl.h>
58 #include <inet/ip_impl.h>
61 * Note that for best performance, the VNIC is a passthrough design.
62 * For each VNIC corresponds a MAC client of the underlying MAC (lower MAC).
63 * This MAC client is opened by the VNIC driver at VNIC creation,
64 * and closed when the VNIC is deleted.
65 * When a MAC client of the VNIC itself opens a VNIC, the MAC layer
66 * (upper MAC) detects that the MAC being opened is a VNIC. Instead
67 * of allocating a new MAC client, it asks the VNIC driver to return
68 * the lower MAC client handle associated with the VNIC, and that handle
69 * is returned to the upper MAC client directly. This allows access
70 * by upper MAC clients of the VNIC to have direct access to the lower
71 * MAC client for the control path and data path.
73 * Due to this passthrough, some of the entry points exported by the
74 * VNIC driver are never directly invoked. These entry points include
75 * vnic_m_start, vnic_m_stop, vnic_m_promisc, vnic_m_multicst, etc.
77 * VNICs support multiple upper mac clients to enable support for
78 * multiple MAC addresses on the VNIC. When the VNIC is created the
79 * initial mac client is the primary upper mac. Any additional mac
80 * clients are secondary macs.
83 static int vnic_m_start(void *);
84 static void vnic_m_stop(void *);
85 static int vnic_m_promisc(void *, boolean_t);
86 static int vnic_m_multicst(void *, boolean_t, const uint8_t *);
87 static int vnic_m_unicst(void *, const uint8_t *);
88 static int vnic_m_stat(void *, uint_t, uint64_t *);
89 static void vnic_m_ioctl(void *, queue_t *, mblk_t *);
90 static int vnic_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
91 const void *);
92 static int vnic_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *);
93 static void vnic_m_propinfo(void *, const char *, mac_prop_id_t,
94 mac_prop_info_handle_t);
95 static mblk_t *vnic_m_tx(void *, mblk_t *);
96 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *);
97 static void vnic_notify_cb(void *, mac_notify_type_t);
98 static void vnic_cleanup_secondary_macs(vnic_t *, int);
100 static kmem_cache_t *vnic_cache;
101 static krwlock_t vnic_lock;
102 static uint_t vnic_count;
104 #define ANCHOR_VNIC_MIN_MTU 576
105 #define ANCHOR_VNIC_MAX_MTU 9000
107 /* hash of VNICs (vnic_t's), keyed by VNIC id */
108 static mod_hash_t *vnic_hash;
109 #define VNIC_HASHSZ 64
110 #define VNIC_HASH_KEY(vnic_id) ((mod_hash_key_t)(uintptr_t)vnic_id)
112 #define VNIC_M_CALLBACK_FLAGS \
113 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO)
115 static mac_callbacks_t vnic_m_callbacks = {
116 VNIC_M_CALLBACK_FLAGS,
117 vnic_m_stat,
118 vnic_m_start,
119 vnic_m_stop,
120 vnic_m_promisc,
121 vnic_m_multicst,
122 vnic_m_unicst,
123 vnic_m_tx,
124 NULL,
125 vnic_m_ioctl,
126 vnic_m_capab_get,
127 NULL,
128 NULL,
129 vnic_m_setprop,
130 vnic_m_getprop,
131 vnic_m_propinfo
134 void
135 vnic_dev_init(void)
137 vnic_cache = kmem_cache_create("vnic_cache",
138 sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
140 vnic_hash = mod_hash_create_idhash("vnic_hash",
141 VNIC_HASHSZ, mod_hash_null_valdtor);
143 rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL);
145 vnic_count = 0;
148 void
149 vnic_dev_fini(void)
151 ASSERT(vnic_count == 0);
153 rw_destroy(&vnic_lock);
154 mod_hash_destroy_idhash(vnic_hash);
155 kmem_cache_destroy(vnic_cache);
158 uint_t
159 vnic_dev_count(void)
161 return (vnic_count);
164 static vnic_ioc_diag_t
165 vnic_mac2vnic_diag(mac_diag_t diag)
167 switch (diag) {
168 case MAC_DIAG_MACADDR_NIC:
169 return (VNIC_IOC_DIAG_MACADDR_NIC);
170 case MAC_DIAG_MACADDR_INUSE:
171 return (VNIC_IOC_DIAG_MACADDR_INUSE);
172 case MAC_DIAG_MACADDR_INVALID:
173 return (VNIC_IOC_DIAG_MACADDR_INVALID);
174 case MAC_DIAG_MACADDRLEN_INVALID:
175 return (VNIC_IOC_DIAG_MACADDRLEN_INVALID);
176 case MAC_DIAG_MACFACTORYSLOTINVALID:
177 return (VNIC_IOC_DIAG_MACFACTORYSLOTINVALID);
178 case MAC_DIAG_MACFACTORYSLOTUSED:
179 return (VNIC_IOC_DIAG_MACFACTORYSLOTUSED);
180 case MAC_DIAG_MACFACTORYSLOTALLUSED:
181 return (VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED);
182 case MAC_DIAG_MACFACTORYNOTSUP:
183 return (VNIC_IOC_DIAG_MACFACTORYNOTSUP);
184 case MAC_DIAG_MACPREFIX_INVALID:
185 return (VNIC_IOC_DIAG_MACPREFIX_INVALID);
186 case MAC_DIAG_MACPREFIXLEN_INVALID:
187 return (VNIC_IOC_DIAG_MACPREFIXLEN_INVALID);
188 case MAC_DIAG_MACNO_HWRINGS:
189 return (VNIC_IOC_DIAG_NO_HWRINGS);
190 default:
191 return (VNIC_IOC_DIAG_NONE);
195 static int
196 vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type,
197 int *addr_slot, uint_t prefix_len, int *addr_len_ptr_arg,
198 uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag,
199 uint16_t vid, boolean_t req_hwgrp_flag)
201 mac_diag_t mac_diag;
202 uint16_t mac_flags = 0;
203 int err;
204 uint_t addr_len;
206 if (flags & VNIC_IOC_CREATE_NODUPCHECK)
207 mac_flags |= MAC_UNICAST_NODUPCHECK;
209 switch (vnic_addr_type) {
210 case VNIC_MAC_ADDR_TYPE_FIXED:
211 case VNIC_MAC_ADDR_TYPE_VRID:
213 * The MAC address value to assign to the VNIC
214 * is already provided in mac_addr_arg. addr_len_ptr_arg
215 * already contains the MAC address length.
217 break;
219 case VNIC_MAC_ADDR_TYPE_RANDOM:
221 * Random MAC address. There are two sub-cases:
223 * 1 - If mac_len == 0, a new MAC address is generated.
224 * The length of the MAC address to generated depends
225 * on the type of MAC used. The prefix to use for the MAC
226 * address is stored in the most significant bytes
227 * of the mac_addr argument, and its length is specified
228 * by the mac_prefix_len argument. This prefix can
229 * correspond to a IEEE OUI in the case of Ethernet,
230 * for example.
232 * 2 - If mac_len > 0, the address was already picked
233 * randomly, and is now passed back during VNIC
234 * re-creation. The mac_addr argument contains the MAC
235 * address that was generated. We distinguish this
236 * case from the fixed MAC address case, since we
237 * want the user consumers to know, when they query
238 * the list of VNICs, that a VNIC was assigned a
239 * random MAC address vs assigned a fixed address
240 * specified by the user.
244 * If it's a pre-generated address, we're done. mac_addr_arg
245 * and addr_len_ptr_arg already contain the MAC address
246 * value and length.
248 if (*addr_len_ptr_arg > 0)
249 break;
251 /* generate a new random MAC address */
252 if ((err = mac_addr_random(vnic->vn_mch,
253 prefix_len, mac_addr_arg, &mac_diag)) != 0) {
254 *diag = vnic_mac2vnic_diag(mac_diag);
255 return (err);
257 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
258 break;
260 case VNIC_MAC_ADDR_TYPE_FACTORY:
261 err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
262 if (err != 0) {
263 if (err == EINVAL)
264 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTINVALID;
265 if (err == EBUSY)
266 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTUSED;
267 if (err == ENOSPC)
268 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED;
269 return (err);
272 mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
273 mac_addr_arg, &addr_len, NULL, NULL);
274 *addr_len_ptr_arg = addr_len;
275 break;
277 case VNIC_MAC_ADDR_TYPE_AUTO:
278 /* first try to allocate a factory MAC address */
279 err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
280 if (err == 0) {
281 mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
282 mac_addr_arg, &addr_len, NULL, NULL);
283 vnic_addr_type = VNIC_MAC_ADDR_TYPE_FACTORY;
284 *addr_len_ptr_arg = addr_len;
285 break;
289 * Allocating a factory MAC address failed, generate a
290 * random MAC address instead.
292 if ((err = mac_addr_random(vnic->vn_mch,
293 prefix_len, mac_addr_arg, &mac_diag)) != 0) {
294 *diag = vnic_mac2vnic_diag(mac_diag);
295 return (err);
297 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
298 vnic_addr_type = VNIC_MAC_ADDR_TYPE_RANDOM;
299 break;
300 case VNIC_MAC_ADDR_TYPE_PRIMARY:
302 * We get the address here since we copy it in the
303 * vnic's vn_addr.
304 * We can't ask for hardware resources since we
305 * don't currently support hardware classification
306 * for these MAC clients.
308 if (req_hwgrp_flag) {
309 *diag = VNIC_IOC_DIAG_NO_HWRINGS;
310 return (ENOTSUP);
312 mac_unicast_primary_get(vnic->vn_lower_mh, mac_addr_arg);
313 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
314 mac_flags |= MAC_UNICAST_VNIC_PRIMARY;
315 break;
318 vnic->vn_addr_type = vnic_addr_type;
320 err = mac_unicast_add(vnic->vn_mch, mac_addr_arg, mac_flags,
321 &vnic->vn_muh, vid, &mac_diag);
322 if (err != 0) {
323 if (vnic_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
324 /* release factory MAC address */
325 mac_addr_factory_release(vnic->vn_mch, *addr_slot);
327 *diag = vnic_mac2vnic_diag(mac_diag);
330 return (err);
334 * Create a new VNIC upon request from administrator.
335 * Returns 0 on success, an errno on failure.
337 /* ARGSUSED */
339 vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid,
340 vnic_mac_addr_type_t *vnic_addr_type, int *mac_len, uchar_t *mac_addr,
341 int *mac_slot, uint_t mac_prefix_len, uint16_t vid, vrid_t vrid,
342 int af, mac_resource_props_t *mrp, uint32_t flags, vnic_ioc_diag_t *diag,
343 cred_t *credp)
345 vnic_t *vnic;
346 mac_register_t *mac;
347 int err;
348 boolean_t is_anchor = ((flags & VNIC_IOC_CREATE_ANCHOR) != 0);
349 char vnic_name[MAXNAMELEN];
350 const mac_info_t *minfop;
351 uint32_t req_hwgrp_flag = B_FALSE;
353 *diag = VNIC_IOC_DIAG_NONE;
355 rw_enter(&vnic_lock, RW_WRITER);
357 /* does a VNIC with the same id already exist? */
358 err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
359 (mod_hash_val_t *)&vnic);
360 if (err == 0) {
361 rw_exit(&vnic_lock);
362 return (EEXIST);
365 vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP);
366 if (vnic == NULL) {
367 rw_exit(&vnic_lock);
368 return (ENOMEM);
371 bzero(vnic, sizeof (*vnic));
373 vnic->vn_id = vnic_id;
374 vnic->vn_link_id = linkid;
375 vnic->vn_vrid = vrid;
376 vnic->vn_af = af;
378 if (!is_anchor) {
379 if (linkid == DATALINK_INVALID_LINKID) {
380 err = EINVAL;
381 goto bail;
385 * Open the lower MAC and assign its initial bandwidth and
386 * MAC address. We do this here during VNIC creation and
387 * do not wait until the upper MAC client open so that we
388 * can validate the VNIC creation parameters (bandwidth,
389 * MAC address, etc) and reserve a factory MAC address if
390 * one was requested.
392 err = mac_open_by_linkid(linkid, &vnic->vn_lower_mh);
393 if (err != 0)
394 goto bail;
397 * VNIC(vlan) over VNICs(vlans) is not supported.
399 if (mac_is_vnic(vnic->vn_lower_mh)) {
400 err = EINVAL;
401 goto bail;
404 /* only ethernet support for now */
405 minfop = mac_info(vnic->vn_lower_mh);
406 if (minfop->mi_nativemedia != DL_ETHER) {
407 err = ENOTSUP;
408 goto bail;
411 (void) dls_mgmt_get_linkinfo(vnic_id, vnic_name, NULL, NULL,
412 NULL);
413 err = mac_client_open(vnic->vn_lower_mh, &vnic->vn_mch,
414 vnic_name, MAC_OPEN_FLAGS_IS_VNIC);
415 if (err != 0)
416 goto bail;
418 /* assign a MAC address to the VNIC */
420 err = vnic_unicast_add(vnic, *vnic_addr_type, mac_slot,
421 mac_prefix_len, mac_len, mac_addr, flags, diag, vid,
422 req_hwgrp_flag);
423 if (err != 0) {
424 vnic->vn_muh = NULL;
425 if (diag != NULL && req_hwgrp_flag)
426 *diag = VNIC_IOC_DIAG_NO_HWRINGS;
427 goto bail;
430 /* register to receive notification from underlying MAC */
431 vnic->vn_mnh = mac_notify_add(vnic->vn_lower_mh, vnic_notify_cb,
432 vnic);
434 *vnic_addr_type = vnic->vn_addr_type;
435 vnic->vn_addr_len = *mac_len;
436 vnic->vn_vid = vid;
438 bcopy(mac_addr, vnic->vn_addr, vnic->vn_addr_len);
440 if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY)
441 vnic->vn_slot_id = *mac_slot;
444 * Set the initial VNIC capabilities. If the VNIC is created
445 * over MACs which does not support nactive vlan, disable
446 * VNIC's hardware checksum capability if its VID is not 0,
447 * since the underlying MAC would get the hardware checksum
448 * offset wrong in case of VLAN packets.
450 if (vid == 0 || !mac_capab_get(vnic->vn_lower_mh,
451 MAC_CAPAB_NO_NATIVEVLAN, NULL)) {
452 if (!mac_capab_get(vnic->vn_lower_mh, MAC_CAPAB_HCKSUM,
453 &vnic->vn_hcksum_txflags))
454 vnic->vn_hcksum_txflags = 0;
455 } else {
456 vnic->vn_hcksum_txflags = 0;
460 /* register with the MAC module */
461 if ((mac = mac_alloc(MAC_VERSION)) == NULL)
462 goto bail;
464 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
465 mac->m_driver = vnic;
466 mac->m_dip = vnic_get_dip();
467 mac->m_instance = (uint_t)-1;
468 mac->m_src_addr = vnic->vn_addr;
469 mac->m_callbacks = &vnic_m_callbacks;
471 if (!is_anchor) {
473 * If this is a VNIC based VLAN, then we check for the
474 * margin unless it has been created with the force
475 * flag. If we are configuring a VLAN over an etherstub,
476 * we don't check the margin even if force is not set.
478 if (vid == 0 || (flags & VNIC_IOC_CREATE_FORCE) != 0) {
479 if (vid != VLAN_ID_NONE)
480 vnic->vn_force = B_TRUE;
482 * As the current margin size of the underlying mac is
483 * used to determine the margin size of the VNIC
484 * itself, request the underlying mac not to change
485 * to a smaller margin size.
487 err = mac_margin_add(vnic->vn_lower_mh,
488 &vnic->vn_margin, B_TRUE);
489 ASSERT(err == 0);
490 } else {
491 vnic->vn_margin = VLAN_TAGSZ;
492 err = mac_margin_add(vnic->vn_lower_mh,
493 &vnic->vn_margin, B_FALSE);
494 if (err != 0) {
495 mac_free(mac);
496 if (diag != NULL)
497 *diag = VNIC_IOC_DIAG_MACMARGIN_INVALID;
498 goto bail;
502 mac_sdu_get(vnic->vn_lower_mh, &mac->m_min_sdu,
503 &mac->m_max_sdu);
504 err = mac_mtu_add(vnic->vn_lower_mh, &mac->m_max_sdu, B_FALSE);
505 if (err != 0) {
506 VERIFY(mac_margin_remove(vnic->vn_lower_mh,
507 vnic->vn_margin) == 0);
508 mac_free(mac);
509 if (diag != NULL)
510 *diag = VNIC_IOC_DIAG_MACMTU_INVALID;
511 goto bail;
513 vnic->vn_mtu = mac->m_max_sdu;
514 } else {
515 vnic->vn_margin = VLAN_TAGSZ;
516 mac->m_min_sdu = 1;
517 mac->m_max_sdu = ANCHOR_VNIC_MAX_MTU;
518 vnic->vn_mtu = ANCHOR_VNIC_MAX_MTU;
521 mac->m_margin = vnic->vn_margin;
523 err = mac_register(mac, &vnic->vn_mh);
524 mac_free(mac);
525 if (err != 0) {
526 if (!is_anchor) {
527 VERIFY(mac_mtu_remove(vnic->vn_lower_mh,
528 vnic->vn_mtu) == 0);
529 VERIFY(mac_margin_remove(vnic->vn_lower_mh,
530 vnic->vn_margin) == 0);
532 goto bail;
535 /* Set the VNIC's MAC in the client */
536 if (!is_anchor) {
537 mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh, mrp);
539 if (mrp != NULL) {
540 if ((mrp->mrp_mask & MRP_RX_RINGS) != 0 ||
541 (mrp->mrp_mask & MRP_TX_RINGS) != 0) {
542 req_hwgrp_flag = B_TRUE;
544 err = mac_client_set_resources(vnic->vn_mch, mrp);
545 if (err != 0) {
546 VERIFY(mac_mtu_remove(vnic->vn_lower_mh,
547 vnic->vn_mtu) == 0);
548 VERIFY(mac_margin_remove(vnic->vn_lower_mh,
549 vnic->vn_margin) == 0);
550 (void) mac_unregister(vnic->vn_mh);
551 goto bail;
556 err = dls_devnet_create(vnic->vn_mh, vnic->vn_id, crgetzoneid(credp));
557 if (err != 0) {
558 VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
559 vnic->vn_margin) == 0);
560 if (!is_anchor) {
561 VERIFY(mac_mtu_remove(vnic->vn_lower_mh,
562 vnic->vn_mtu) == 0);
563 VERIFY(mac_margin_remove(vnic->vn_lower_mh,
564 vnic->vn_margin) == 0);
566 (void) mac_unregister(vnic->vn_mh);
567 goto bail;
570 /* add new VNIC to hash table */
571 err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id),
572 (mod_hash_val_t)vnic);
573 ASSERT(err == 0);
574 vnic_count++;
577 * Now that we've enabled this VNIC, we should go through and update the
578 * link state by setting it to our parents.
580 vnic->vn_enabled = B_TRUE;
582 if (is_anchor) {
583 mac_link_update(vnic->vn_mh, LINK_STATE_UP);
584 } else {
585 mac_link_update(vnic->vn_mh,
586 mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE));
589 rw_exit(&vnic_lock);
591 return (0);
593 bail:
594 rw_exit(&vnic_lock);
595 if (!is_anchor) {
596 if (vnic->vn_mnh != NULL)
597 (void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
598 if (vnic->vn_muh != NULL)
599 (void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
600 if (vnic->vn_mch != NULL)
601 mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
602 if (vnic->vn_lower_mh != NULL)
603 mac_close(vnic->vn_lower_mh);
606 kmem_cache_free(vnic_cache, vnic);
607 return (err);
611 * Modify the properties of an existing VNIC.
613 /* ARGSUSED */
615 vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask,
616 vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr,
617 uint_t mac_slot, mac_resource_props_t *mrp)
619 vnic_t *vnic = NULL;
621 rw_enter(&vnic_lock, RW_WRITER);
623 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
624 (mod_hash_val_t *)&vnic) != 0) {
625 rw_exit(&vnic_lock);
626 return (ENOENT);
629 rw_exit(&vnic_lock);
631 return (0);
634 /* ARGSUSED */
636 vnic_dev_delete(datalink_id_t vnic_id, uint32_t flags, cred_t *credp)
638 vnic_t *vnic = NULL;
639 mod_hash_val_t val;
640 datalink_id_t tmpid;
641 int rc;
643 rw_enter(&vnic_lock, RW_WRITER);
645 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
646 (mod_hash_val_t *)&vnic) != 0) {
647 rw_exit(&vnic_lock);
648 return (ENOENT);
651 if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid, B_TRUE)) != 0) {
652 rw_exit(&vnic_lock);
653 return (rc);
656 ASSERT(vnic_id == tmpid);
659 * We cannot unregister the MAC yet. Unregistering would
660 * free up mac_impl_t which should not happen at this time.
661 * So disable mac_impl_t by calling mac_disable(). This will prevent
662 * any new claims on mac_impl_t.
664 if ((rc = mac_disable(vnic->vn_mh)) != 0) {
665 (void) dls_devnet_create(vnic->vn_mh, vnic_id,
666 crgetzoneid(credp));
667 rw_exit(&vnic_lock);
668 return (rc);
671 vnic_cleanup_secondary_macs(vnic, vnic->vn_nhandles);
673 vnic->vn_enabled = B_FALSE;
674 (void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val);
675 ASSERT(vnic == (vnic_t *)val);
676 vnic_count--;
677 rw_exit(&vnic_lock);
680 * XXX-nicolas shouldn't have a void cast here, if it's
681 * expected that the function will never fail, then we should
682 * have an ASSERT().
684 (void) mac_unregister(vnic->vn_mh);
686 if (vnic->vn_lower_mh != NULL) {
688 * Check if MAC address for the vnic was obtained from the
689 * factory MAC addresses. If yes, release it.
691 if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
692 (void) mac_addr_factory_release(vnic->vn_mch,
693 vnic->vn_slot_id);
695 (void) mac_margin_remove(vnic->vn_lower_mh, vnic->vn_margin);
696 (void) mac_mtu_remove(vnic->vn_lower_mh, vnic->vn_mtu);
697 (void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
698 (void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
699 mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
700 mac_close(vnic->vn_lower_mh);
703 kmem_cache_free(vnic_cache, vnic);
704 return (0);
707 /* ARGSUSED */
708 mblk_t *
709 vnic_m_tx(void *arg, mblk_t *mp_chain)
712 * This function could be invoked for an anchor VNIC when sending
713 * broadcast and multicast packets, and unicast packets which did
714 * not match any local known destination.
716 freemsgchain(mp_chain);
717 return (NULL);
720 /*ARGSUSED*/
721 static void
722 vnic_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
724 miocnak(q, mp, 0, ENOTSUP);
728 * This entry point cannot be passed-through, since it is invoked
729 * for the per-VNIC kstats which must be exported independently
730 * of the existence of VNIC MAC clients.
732 static int
733 vnic_m_stat(void *arg, uint_t stat, uint64_t *val)
735 vnic_t *vnic = arg;
736 int rval = 0;
738 if (vnic->vn_lower_mh == NULL) {
740 * It's an anchor VNIC, which does not have any
741 * statistics in itself.
743 return (ENOTSUP);
747 * ENOTSUP must be reported for unsupported stats, the VNIC
748 * driver reports a subset of the stats that would
749 * be returned by a real piece of hardware.
752 switch (stat) {
753 case MAC_STAT_LINK_STATE:
754 case MAC_STAT_LINK_UP:
755 case MAC_STAT_PROMISC:
756 case MAC_STAT_IFSPEED:
757 case MAC_STAT_MULTIRCV:
758 case MAC_STAT_MULTIXMT:
759 case MAC_STAT_BRDCSTRCV:
760 case MAC_STAT_BRDCSTXMT:
761 case MAC_STAT_OPACKETS:
762 case MAC_STAT_OBYTES:
763 case MAC_STAT_IERRORS:
764 case MAC_STAT_OERRORS:
765 case MAC_STAT_RBYTES:
766 case MAC_STAT_IPACKETS:
767 *val = mac_client_stat_get(vnic->vn_mch, stat);
768 break;
769 default:
770 rval = ENOTSUP;
773 return (rval);
777 * Invoked by the upper MAC to retrieve the lower MAC client handle
778 * corresponding to a VNIC. A pointer to this function is obtained
779 * by the upper MAC via capability query.
781 * XXX-nicolas Note: this currently causes all VNIC MAC clients to
782 * receive the same MAC client handle for the same VNIC. This is ok
783 * as long as we have only one VNIC MAC client which sends and
784 * receives data, but we don't currently enforce this at the MAC layer.
786 static void *
787 vnic_mac_client_handle(void *vnic_arg)
789 vnic_t *vnic = vnic_arg;
791 return (vnic->vn_mch);
795 * Invoked when updating the primary MAC so that the secondary MACs are
796 * kept in sync.
798 static void
799 vnic_mac_secondary_update(void *vnic_arg)
801 vnic_t *vn = vnic_arg;
802 int i;
804 for (i = 1; i <= vn->vn_nhandles; i++) {
805 mac_secondary_dup(vn->vn_mc_handles[0], vn->vn_mc_handles[i]);
810 * Return information about the specified capability.
812 /* ARGSUSED */
813 static boolean_t
814 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
816 vnic_t *vnic = arg;
818 switch (cap) {
819 case MAC_CAPAB_HCKSUM: {
820 uint32_t *hcksum_txflags = cap_data;
822 *hcksum_txflags = vnic->vn_hcksum_txflags &
823 (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM |
824 HCKSUM_INET_PARTIAL);
825 break;
827 case MAC_CAPAB_VNIC: {
828 mac_capab_vnic_t *vnic_capab = cap_data;
830 if (vnic->vn_lower_mh == NULL) {
832 * It's an anchor VNIC, we don't have an underlying
833 * NIC and MAC client handle.
835 return (B_FALSE);
838 if (vnic_capab != NULL) {
839 vnic_capab->mcv_arg = vnic;
840 vnic_capab->mcv_mac_client_handle =
841 vnic_mac_client_handle;
842 vnic_capab->mcv_mac_secondary_update =
843 vnic_mac_secondary_update;
845 break;
847 case MAC_CAPAB_ANCHOR_VNIC: {
848 /* since it's an anchor VNIC we don't have lower mac handle */
849 if (vnic->vn_lower_mh == NULL) {
850 ASSERT(vnic->vn_link_id == 0);
851 return (B_TRUE);
853 return (B_FALSE);
855 case MAC_CAPAB_NO_NATIVEVLAN:
856 return (B_FALSE);
857 case MAC_CAPAB_NO_ZCOPY:
858 return (B_TRUE);
859 case MAC_CAPAB_VRRP: {
860 mac_capab_vrrp_t *vrrp_capab = cap_data;
862 if (vnic->vn_vrid != 0) {
863 if (vrrp_capab != NULL)
864 vrrp_capab->mcv_af = vnic->vn_af;
865 return (B_TRUE);
867 return (B_FALSE);
869 default:
870 return (B_FALSE);
872 return (B_TRUE);
875 /* ARGSUSED */
876 static int
877 vnic_m_start(void *arg)
879 return (0);
882 /* ARGSUSED */
883 static void
884 vnic_m_stop(void *arg)
888 /* ARGSUSED */
889 static int
890 vnic_m_promisc(void *arg, boolean_t on)
892 return (0);
895 /* ARGSUSED */
896 static int
897 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
899 return (0);
902 static int
903 vnic_m_unicst(void *arg, const uint8_t *macaddr)
905 vnic_t *vnic = arg;
907 return (mac_vnic_unicast_set(vnic->vn_mch, macaddr));
910 static void
911 vnic_cleanup_secondary_macs(vnic_t *vn, int cnt)
913 int i;
915 /* Remove existing secondaries (primary is at 0) */
916 for (i = 1; i <= cnt; i++) {
917 mac_rx_clear(vn->vn_mc_handles[i]);
919 /* unicast handle might not have been set yet */
920 if (vn->vn_mu_handles[i] != NULL)
921 (void) mac_unicast_remove(vn->vn_mc_handles[i],
922 vn->vn_mu_handles[i]);
924 mac_secondary_cleanup(vn->vn_mc_handles[i]);
926 mac_client_close(vn->vn_mc_handles[i], MAC_CLOSE_FLAGS_IS_VNIC);
928 vn->vn_mu_handles[i] = NULL;
929 vn->vn_mc_handles[i] = NULL;
932 vn->vn_nhandles = 0;
936 * Setup secondary MAC addresses on the vnic. Due to limitations in the mac
937 * code, each mac address must be associated with a mac_client (and the
938 * flow that goes along with the client) so we need to create those clients
939 * here.
941 static int
942 vnic_set_secondary_macs(vnic_t *vn, mac_secondary_addr_t *msa)
944 int i, err;
945 char primary_name[MAXNAMELEN];
947 /* First, remove pre-existing secondaries */
948 ASSERT(vn->vn_nhandles < MPT_MAXMACADDR);
949 vnic_cleanup_secondary_macs(vn, vn->vn_nhandles);
951 if (msa->ms_addrcnt == (uint32_t)-1)
952 msa->ms_addrcnt = 0;
954 vn->vn_nhandles = msa->ms_addrcnt;
956 (void) dls_mgmt_get_linkinfo(vn->vn_id, primary_name, NULL, NULL, NULL);
959 * Now add the new secondary MACs
960 * Recall that the primary MAC address is the first element.
961 * The secondary clients are named after the primary with their
962 * index to distinguish them.
964 for (i = 1; i <= vn->vn_nhandles; i++) {
965 uint8_t *addr;
966 mac_diag_t mac_diag;
967 char secondary_name[MAXNAMELEN];
969 (void) snprintf(secondary_name, sizeof (secondary_name),
970 "%s%02d", primary_name, i);
972 err = mac_client_open(vn->vn_lower_mh, &vn->vn_mc_handles[i],
973 secondary_name, MAC_OPEN_FLAGS_IS_VNIC);
974 if (err != 0) {
975 /* Remove any that we successfully added */
976 vnic_cleanup_secondary_macs(vn, --i);
977 return (err);
981 * Assign a MAC address to the VNIC
983 * Normally this would be done with vnic_unicast_add but since
984 * we know these are fixed adddresses, and since we need to
985 * save this in the proper array slot, we bypass that function
986 * and go direct.
988 addr = msa->ms_addrs[i - 1];
989 err = mac_unicast_add(vn->vn_mc_handles[i], addr, 0,
990 &vn->vn_mu_handles[i], vn->vn_vid, &mac_diag);
991 if (err != 0) {
992 /* Remove any that we successfully added */
993 vnic_cleanup_secondary_macs(vn, i);
994 return (err);
998 * Setup the secondary the same way as the primary (i.e.
999 * receiver function/argument (e.g. i_dls_link_rx, mac_pkt_drop,
1000 * etc.), the promisc list, and the resource controls).
1002 mac_secondary_dup(vn->vn_mc_handles[0], vn->vn_mc_handles[i]);
1005 return (0);
1008 static int
1009 vnic_get_secondary_macs(vnic_t *vn, uint_t pr_valsize, void *pr_val)
1011 int i;
1012 mac_secondary_addr_t msa;
1014 if (pr_valsize < sizeof (msa))
1015 return (EINVAL);
1017 /* Get existing addresses (primary is at 0) */
1018 ASSERT(vn->vn_nhandles < MPT_MAXMACADDR);
1019 for (i = 1; i <= vn->vn_nhandles; i++) {
1020 ASSERT(vn->vn_mc_handles[i] != NULL);
1021 mac_unicast_secondary_get(vn->vn_mc_handles[i],
1022 msa.ms_addrs[i - 1]);
1024 msa.ms_addrcnt = vn->vn_nhandles;
1026 bcopy(&msa, pr_val, sizeof (msa));
1027 return (0);
1031 * Callback functions for set/get of properties
1033 /*ARGSUSED*/
1034 static int
1035 vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
1036 uint_t pr_valsize, const void *pr_val)
1038 int err = 0;
1039 vnic_t *vn = m_driver;
1041 switch (pr_num) {
1042 case MAC_PROP_MTU: {
1043 uint32_t mtu;
1045 if (pr_valsize < sizeof (mtu)) {
1046 err = EINVAL;
1047 break;
1049 bcopy(pr_val, &mtu, sizeof (mtu));
1051 if (vn->vn_link_id == DATALINK_INVALID_LINKID) {
1052 if (mtu < ANCHOR_VNIC_MIN_MTU ||
1053 mtu > ANCHOR_VNIC_MAX_MTU) {
1054 err = EINVAL;
1055 break;
1057 } else {
1058 err = mac_mtu_add(vn->vn_lower_mh, &mtu, B_FALSE);
1060 * If it's not supported to set a value here, translate
1061 * that to EINVAL, so user land gets a better idea of
1062 * what went wrong. This realistically means that they
1063 * violated the output of prop info.
1065 if (err == ENOTSUP)
1066 err = EINVAL;
1067 if (err != 0)
1068 break;
1069 VERIFY(mac_mtu_remove(vn->vn_lower_mh,
1070 vn->vn_mtu) == 0);
1072 vn->vn_mtu = mtu;
1073 err = mac_maxsdu_update(vn->vn_mh, mtu);
1074 break;
1076 case MAC_PROP_VN_PROMISC_FILTERED: {
1077 boolean_t filtered;
1079 if (pr_valsize < sizeof (filtered)) {
1080 err = EINVAL;
1081 break;
1084 bcopy(pr_val, &filtered, sizeof (filtered));
1085 mac_set_promisc_filtered(vn->vn_mch, filtered);
1086 break;
1088 case MAC_PROP_SECONDARY_ADDRS: {
1089 mac_secondary_addr_t msa;
1091 bcopy(pr_val, &msa, sizeof (msa));
1092 err = vnic_set_secondary_macs(vn, &msa);
1093 break;
1095 default:
1096 err = ENOTSUP;
1097 break;
1099 return (err);
1102 /* ARGSUSED */
1103 static int
1104 vnic_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1105 uint_t pr_valsize, void *pr_val)
1107 vnic_t *vn = arg;
1108 int ret = 0;
1109 boolean_t out;
1111 switch (pr_num) {
1112 case MAC_PROP_VN_PROMISC_FILTERED:
1113 out = mac_get_promisc_filtered(vn->vn_mch);
1114 ASSERT(pr_valsize >= sizeof (boolean_t));
1115 bcopy(&out, pr_val, sizeof (boolean_t));
1116 break;
1117 case MAC_PROP_SECONDARY_ADDRS:
1118 ret = vnic_get_secondary_macs(vn, pr_valsize, pr_val);
1119 break;
1120 default:
1121 ret = ENOTSUP;
1122 break;
1125 return (ret);
1128 /* ARGSUSED */
1129 static void vnic_m_propinfo(void *m_driver, const char *pr_name,
1130 mac_prop_id_t pr_num, mac_prop_info_handle_t prh)
1132 vnic_t *vn = m_driver;
1134 switch (pr_num) {
1135 case MAC_PROP_MTU:
1136 if (vn->vn_link_id == DATALINK_INVALID_LINKID) {
1137 mac_prop_info_set_range_uint32(prh,
1138 ANCHOR_VNIC_MIN_MTU, ANCHOR_VNIC_MAX_MTU);
1139 } else {
1140 uint32_t max;
1141 mac_perim_handle_t mph;
1142 mac_propval_range_t range;
1145 * The valid range for a VNIC's MTU is the minimum that
1146 * the device supports and the current value of the
1147 * device. A VNIC cannot increase the current MTU of the
1148 * device. Therefore we need to get the range from the
1149 * propinfo endpoint and current mtu from the
1150 * traditional property endpoint.
1152 mac_perim_enter_by_mh(vn->vn_lower_mh, &mph);
1153 if (mac_get_prop(vn->vn_lower_mh, MAC_PROP_MTU, "mtu",
1154 &max, sizeof (uint32_t)) != 0) {
1155 mac_perim_exit(mph);
1156 return;
1159 range.mpr_count = 1;
1160 if (mac_prop_info(vn->vn_lower_mh, MAC_PROP_MTU, "mtu",
1161 NULL, 0, &range, NULL) != 0) {
1162 mac_perim_exit(mph);
1163 return;
1166 mac_prop_info_set_default_uint32(prh, max);
1167 mac_prop_info_set_range_uint32(prh,
1168 range.mpr_range_uint32[0].mpur_min, max);
1169 mac_perim_exit(mph);
1171 break;
1177 vnic_info(vnic_info_t *info, cred_t *credp)
1179 vnic_t *vnic;
1180 int err;
1182 /* Make sure that the VNIC link is visible from the caller's zone. */
1183 if (!dls_devnet_islinkvisible(info->vn_vnic_id, crgetzoneid(credp)))
1184 return (ENOENT);
1186 rw_enter(&vnic_lock, RW_WRITER);
1188 err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(info->vn_vnic_id),
1189 (mod_hash_val_t *)&vnic);
1190 if (err != 0) {
1191 rw_exit(&vnic_lock);
1192 return (ENOENT);
1195 info->vn_link_id = vnic->vn_link_id;
1196 info->vn_mac_addr_type = vnic->vn_addr_type;
1197 info->vn_mac_len = vnic->vn_addr_len;
1198 bcopy(vnic->vn_addr, info->vn_mac_addr, MAXMACADDRLEN);
1199 info->vn_mac_slot = vnic->vn_slot_id;
1200 info->vn_mac_prefix_len = 0;
1201 info->vn_vid = vnic->vn_vid;
1202 info->vn_force = vnic->vn_force;
1203 info->vn_vrid = vnic->vn_vrid;
1204 info->vn_af = vnic->vn_af;
1206 bzero(&info->vn_resource_props, sizeof (mac_resource_props_t));
1207 if (vnic->vn_mch != NULL)
1208 mac_client_get_resources(vnic->vn_mch,
1209 &info->vn_resource_props);
1211 rw_exit(&vnic_lock);
1212 return (0);
1215 static void
1216 vnic_notify_cb(void *arg, mac_notify_type_t type)
1218 vnic_t *vnic = arg;
1221 * Do not deliver notifications if the vnic is not fully initialized
1222 * or is in process of being torn down.
1224 if (!vnic->vn_enabled)
1225 return;
1227 switch (type) {
1228 case MAC_NOTE_UNICST:
1230 * Only the VLAN VNIC needs to be notified with primary MAC
1231 * address change.
1233 if (vnic->vn_addr_type != VNIC_MAC_ADDR_TYPE_PRIMARY)
1234 return;
1236 /* the unicast MAC address value */
1237 mac_unicast_primary_get(vnic->vn_lower_mh, vnic->vn_addr);
1239 /* notify its upper layer MAC about MAC address change */
1240 mac_unicst_update(vnic->vn_mh, (const uint8_t *)vnic->vn_addr);
1241 break;
1243 case MAC_NOTE_LINK:
1244 mac_link_update(vnic->vn_mh,
1245 mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE));
1246 break;
1248 default:
1249 break;