8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / uts / common / xen / io / xnbo.c
blob9a513288964bc6e542968d4719795a428bcec6ad
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Xen network backend - mac client edition.
30 * A driver that sits above an existing GLDv3/Nemo MAC driver and
31 * relays packets to/from that driver from/to a guest domain.
34 #ifdef DEBUG
35 #define XNBO_DEBUG 1
36 #endif /* DEBUG */
38 #include "xnb.h"
40 #include <sys/sunddi.h>
41 #include <sys/ddi.h>
42 #include <sys/modctl.h>
43 #include <sys/strsubr.h>
44 #include <sys/mac_client.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_client_priv.h>
47 #include <sys/mac.h>
48 #include <net/if.h>
49 #include <sys/dlpi.h>
50 #include <sys/pattr.h>
51 #include <xen/sys/xenbus_impl.h>
52 #include <xen/sys/xendev.h>
53 #include <sys/sdt.h>
54 #include <sys/note.h>
56 #ifdef XNBO_DEBUG
57 boolean_t xnbo_cksum_offload_to_peer = B_TRUE;
58 boolean_t xnbo_cksum_offload_from_peer = B_TRUE;
59 #endif /* XNBO_DEBUG */
61 /* Track multicast addresses. */
62 typedef struct xmca {
63 struct xmca *next;
64 ether_addr_t addr;
65 } xmca_t;
67 /* State about this device instance. */
68 typedef struct xnbo {
69 mac_handle_t o_mh;
70 mac_client_handle_t o_mch;
71 mac_unicast_handle_t o_mah;
72 mac_promisc_handle_t o_mphp;
73 boolean_t o_running;
74 boolean_t o_promiscuous;
75 uint32_t o_hcksum_capab;
76 xmca_t *o_mca;
77 char o_link_name[LIFNAMSIZ];
78 boolean_t o_need_rx_filter;
79 boolean_t o_need_setphysaddr;
80 boolean_t o_multicast_control;
81 } xnbo_t;
83 static void xnbo_close_mac(xnb_t *);
84 static void i_xnbo_close_mac(xnb_t *, boolean_t);
87 * Packets from the peer come here. We pass them to the mac device.
89 static void
90 xnbo_to_mac(xnb_t *xnbp, mblk_t *mp)
92 xnbo_t *xnbop = xnbp->xnb_flavour_data;
94 ASSERT(mp != NULL);
96 if (!xnbop->o_running) {
97 xnbp->xnb_stat_tx_too_early++;
98 goto fail;
101 if (mac_tx(xnbop->o_mch, mp, 0,
102 MAC_DROP_ON_NO_DESC, NULL) != NULL) {
103 xnbp->xnb_stat_mac_full++;
106 return;
108 fail:
109 freemsgchain(mp);
113 * Process the checksum flags `flags' provided by the peer for the
114 * packet `mp'.
116 static mblk_t *
117 xnbo_cksum_from_peer(xnb_t *xnbp, mblk_t *mp, uint16_t flags)
119 xnbo_t *xnbop = xnbp->xnb_flavour_data;
121 ASSERT(mp->b_next == NULL);
123 if ((flags & NETTXF_csum_blank) != 0) {
124 uint32_t capab = xnbop->o_hcksum_capab;
126 #ifdef XNBO_DEBUG
127 if (!xnbo_cksum_offload_from_peer)
128 capab = 0;
129 #endif /* XNBO_DEBUG */
132 * The checksum in the packet is blank. Determine
133 * whether we can do hardware offload and, if so,
134 * update the flags on the mblk according. If not,
135 * calculate and insert the checksum using software.
137 mp = xnb_process_cksum_flags(xnbp, mp, capab);
140 return (mp);
144 * Calculate the checksum flags to be relayed to the peer for the
145 * packet `mp'.
147 static uint16_t
148 xnbo_cksum_to_peer(xnb_t *xnbp, mblk_t *mp)
150 _NOTE(ARGUNUSED(xnbp));
151 uint16_t r = 0;
152 uint32_t pflags, csum;
154 #ifdef XNBO_DEBUG
155 if (!xnbo_cksum_offload_to_peer)
156 return (0);
157 #endif /* XNBO_DEBUG */
160 * We might also check for HCK_PARTIALCKSUM here and,
161 * providing that the partial checksum covers the TCP/UDP
162 * payload, return NETRXF_data_validated.
164 * It seems that it's probably not worthwhile, as even MAC
165 * devices which advertise HCKSUM_INET_PARTIAL in their
166 * capabilities tend to use HCK_FULLCKSUM on the receive side
167 * - they are actually saying that in the output path the
168 * caller must use HCK_PARTIALCKSUM.
170 * Then again, if a NIC supports HCK_PARTIALCKSUM in its'
171 * output path, the host IP stack will use it. If such packets
172 * are destined for the peer (i.e. looped around) we would
173 * gain some advantage.
176 mac_hcksum_get(mp, NULL, NULL, NULL, &csum, &pflags);
179 * If the MAC driver has asserted that the checksum is
180 * good, let the peer know.
182 if (((pflags & HCK_FULLCKSUM) != 0) &&
183 (((pflags & HCK_FULLCKSUM_OK) != 0) ||
184 (csum == 0xffff)))
185 r |= NETRXF_data_validated;
187 return (r);
191 * Packets from the mac device come here. We pass them to the peer.
193 /*ARGSUSED*/
194 static void
195 xnbo_from_mac(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
196 boolean_t loopback)
198 xnb_t *xnbp = arg;
200 mp = xnb_copy_to_peer(xnbp, mp);
202 if (mp != NULL)
203 freemsgchain(mp);
207 * Packets from the mac device come here. We pass them to the peer if
208 * the destination mac address matches or it's a multicast/broadcast
209 * address.
211 static void
212 xnbo_from_mac_filter(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
213 boolean_t loopback)
215 _NOTE(ARGUNUSED(loopback));
216 xnb_t *xnbp = arg;
217 xnbo_t *xnbop = xnbp->xnb_flavour_data;
218 mblk_t *next, *keep, *keep_head, *free, *free_head;
220 keep = keep_head = free = free_head = NULL;
222 #define ADD(list, bp) \
223 if (list != NULL) \
224 list->b_next = bp; \
225 else \
226 list##_head = bp; \
227 list = bp;
229 for (; mp != NULL; mp = next) {
230 mac_header_info_t hdr_info;
232 next = mp->b_next;
233 mp->b_next = NULL;
235 if (mac_header_info(xnbop->o_mh, mp, &hdr_info) != 0) {
236 ADD(free, mp);
237 continue;
240 if ((hdr_info.mhi_dsttype == MAC_ADDRTYPE_BROADCAST) ||
241 (hdr_info.mhi_dsttype == MAC_ADDRTYPE_MULTICAST)) {
242 ADD(keep, mp);
243 continue;
246 if (bcmp(hdr_info.mhi_daddr, xnbp->xnb_mac_addr,
247 sizeof (xnbp->xnb_mac_addr)) == 0) {
248 ADD(keep, mp);
249 continue;
252 ADD(free, mp);
254 #undef ADD
256 if (keep_head != NULL)
257 xnbo_from_mac(xnbp, mrh, keep_head, B_FALSE);
259 if (free_head != NULL)
260 freemsgchain(free_head);
263 static boolean_t
264 xnbo_open_mac(xnb_t *xnbp, char *mac)
266 xnbo_t *xnbop = xnbp->xnb_flavour_data;
267 int err;
268 const mac_info_t *mi;
269 void (*rx_fn)(void *, mac_resource_handle_t, mblk_t *, boolean_t);
270 struct ether_addr ea;
271 uint_t max_sdu;
272 mac_diag_t diag;
274 if ((err = mac_open_by_linkname(mac, &xnbop->o_mh)) != 0) {
275 cmn_err(CE_WARN, "xnbo_open_mac: "
276 "cannot open mac for link %s (%d)", mac, err);
277 return (B_FALSE);
279 ASSERT(xnbop->o_mh != NULL);
281 mi = mac_info(xnbop->o_mh);
282 ASSERT(mi != NULL);
284 if (mi->mi_media != DL_ETHER) {
285 cmn_err(CE_WARN, "xnbo_open_mac: "
286 "device is not DL_ETHER (%d)", mi->mi_media);
287 i_xnbo_close_mac(xnbp, B_TRUE);
288 return (B_FALSE);
290 if (mi->mi_media != mi->mi_nativemedia) {
291 cmn_err(CE_WARN, "xnbo_open_mac: "
292 "device media and native media mismatch (%d != %d)",
293 mi->mi_media, mi->mi_nativemedia);
294 i_xnbo_close_mac(xnbp, B_TRUE);
295 return (B_FALSE);
298 mac_sdu_get(xnbop->o_mh, NULL, &max_sdu);
299 if (max_sdu > XNBMAXPKT) {
300 cmn_err(CE_WARN, "xnbo_open_mac: mac device SDU too big (%d)",
301 max_sdu);
302 i_xnbo_close_mac(xnbp, B_TRUE);
303 return (B_FALSE);
307 * MAC_OPEN_FLAGS_MULTI_PRIMARY is relevant when we are migrating a
308 * guest on the localhost itself. In this case we would have the MAC
309 * client open for the guest being migrated *and* also for the
310 * migrated guest (i.e. the former will be active till the migration
311 * is complete when the latter will be activated). This flag states
312 * that it is OK for mac_unicast_add to add the primary MAC unicast
313 * address multiple times.
315 if (mac_client_open(xnbop->o_mh, &xnbop->o_mch, NULL,
316 MAC_OPEN_FLAGS_USE_DATALINK_NAME |
317 MAC_OPEN_FLAGS_MULTI_PRIMARY) != 0) {
318 cmn_err(CE_WARN, "xnbo_open_mac: "
319 "error (%d) opening mac client", err);
320 i_xnbo_close_mac(xnbp, B_TRUE);
321 return (B_FALSE);
324 if (xnbop->o_need_rx_filter)
325 rx_fn = xnbo_from_mac_filter;
326 else
327 rx_fn = xnbo_from_mac;
329 err = mac_unicast_add_set_rx(xnbop->o_mch, NULL, MAC_UNICAST_PRIMARY,
330 &xnbop->o_mah, 0, &diag, xnbop->o_multicast_control ? rx_fn : NULL,
331 xnbp);
332 if (err != 0) {
333 cmn_err(CE_WARN, "xnbo_open_mac: failed to get the primary "
334 "MAC address of %s: %d", mac, err);
335 i_xnbo_close_mac(xnbp, B_TRUE);
336 return (B_FALSE);
338 if (!xnbop->o_multicast_control) {
339 err = mac_promisc_add(xnbop->o_mch, MAC_CLIENT_PROMISC_ALL,
340 rx_fn, xnbp, &xnbop->o_mphp, MAC_PROMISC_FLAGS_NO_TX_LOOP |
341 MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
342 if (err != 0) {
343 cmn_err(CE_WARN, "xnbo_open_mac: "
344 "cannot enable promiscuous mode of %s: %d",
345 mac, err);
346 i_xnbo_close_mac(xnbp, B_TRUE);
347 return (B_FALSE);
349 xnbop->o_promiscuous = B_TRUE;
352 if (xnbop->o_need_setphysaddr) {
353 err = mac_unicast_primary_set(xnbop->o_mh, xnbp->xnb_mac_addr);
354 /* Warn, but continue on. */
355 if (err != 0) {
356 bcopy(xnbp->xnb_mac_addr, ea.ether_addr_octet,
357 ETHERADDRL);
358 cmn_err(CE_WARN, "xnbo_open_mac: "
359 "cannot set MAC address of %s to "
360 "%s: %d", mac, ether_sprintf(&ea), err);
364 if (!mac_capab_get(xnbop->o_mh, MAC_CAPAB_HCKSUM,
365 &xnbop->o_hcksum_capab))
366 xnbop->o_hcksum_capab = 0;
368 xnbop->o_running = B_TRUE;
370 return (B_TRUE);
373 static void
374 xnbo_close_mac(xnb_t *xnbp)
376 i_xnbo_close_mac(xnbp, B_FALSE);
379 static void
380 i_xnbo_close_mac(xnb_t *xnbp, boolean_t locked)
382 xnbo_t *xnbop = xnbp->xnb_flavour_data;
383 xmca_t *loop;
385 ASSERT(!locked || MUTEX_HELD(&xnbp->xnb_state_lock));
387 if (xnbop->o_mh == NULL)
388 return;
390 if (xnbop->o_running)
391 xnbop->o_running = B_FALSE;
393 if (!locked)
394 mutex_enter(&xnbp->xnb_state_lock);
395 loop = xnbop->o_mca;
396 xnbop->o_mca = NULL;
397 if (!locked)
398 mutex_exit(&xnbp->xnb_state_lock);
400 while (loop != NULL) {
401 xmca_t *next = loop->next;
403 DTRACE_PROBE3(mcast_remove,
404 (char *), "close",
405 (void *), xnbp,
406 (etheraddr_t *), loop->addr);
407 (void) mac_multicast_remove(xnbop->o_mch, loop->addr);
408 kmem_free(loop, sizeof (*loop));
409 loop = next;
412 if (xnbop->o_promiscuous) {
413 if (xnbop->o_mphp != NULL) {
414 mac_promisc_remove(xnbop->o_mphp);
415 xnbop->o_mphp = NULL;
417 xnbop->o_promiscuous = B_FALSE;
418 } else {
419 if (xnbop->o_mch != NULL)
420 mac_rx_clear(xnbop->o_mch);
423 if (xnbop->o_mah != NULL) {
424 (void) mac_unicast_remove(xnbop->o_mch, xnbop->o_mah);
425 xnbop->o_mah = NULL;
428 if (xnbop->o_mch != NULL) {
429 mac_client_close(xnbop->o_mch, 0);
430 xnbop->o_mch = NULL;
433 mac_close(xnbop->o_mh);
434 xnbop->o_mh = NULL;
438 * Hotplug has completed and we are connected to the peer. We have all
439 * the information we need to exchange traffic, so open the MAC device
440 * and configure it appropriately.
442 static boolean_t
443 xnbo_start_connect(xnb_t *xnbp)
445 xnbo_t *xnbop = xnbp->xnb_flavour_data;
447 return (xnbo_open_mac(xnbp, xnbop->o_link_name));
451 * The guest has successfully synchronize with this instance. We read
452 * the configuration of the guest from xenstore to check whether the
453 * guest requests multicast control. If not (the default) we make a
454 * note that the MAC device needs to be used in promiscious mode.
456 static boolean_t
457 xnbo_peer_connected(xnb_t *xnbp)
459 char *oename;
460 int request;
461 xnbo_t *xnbop = xnbp->xnb_flavour_data;
463 oename = xvdi_get_oename(xnbp->xnb_devinfo);
465 if (xenbus_scanf(XBT_NULL, oename,
466 "request-multicast-control", "%d", &request) != 0)
467 request = 0;
468 xnbop->o_multicast_control = (request > 0);
470 return (B_TRUE);
474 * The guest domain has closed down the inter-domain connection. We
475 * close the underlying MAC device.
477 static void
478 xnbo_peer_disconnected(xnb_t *xnbp)
480 xnbo_close_mac(xnbp);
484 * The hotplug script has completed. We read information from xenstore
485 * about our configuration, most notably the name of the MAC device we
486 * should use.
488 static boolean_t
489 xnbo_hotplug_connected(xnb_t *xnbp)
491 char *xsname;
492 xnbo_t *xnbop = xnbp->xnb_flavour_data;
493 int need;
495 xsname = xvdi_get_xsname(xnbp->xnb_devinfo);
497 if (xenbus_scanf(XBT_NULL, xsname,
498 "nic", "%s", xnbop->o_link_name) != 0) {
499 cmn_err(CE_WARN, "xnbo_connect: "
500 "cannot read nic name from %s", xsname);
501 return (B_FALSE);
504 if (xenbus_scanf(XBT_NULL, xsname,
505 "SUNW-need-rx-filter", "%d", &need) != 0)
506 need = 0;
507 xnbop->o_need_rx_filter = (need > 0);
509 if (xenbus_scanf(XBT_NULL, xsname,
510 "SUNW-need-set-physaddr", "%d", &need) != 0)
511 need = 0;
512 xnbop->o_need_setphysaddr = (need > 0);
514 return (B_TRUE);
518 * Find the multicast address `addr', return B_TRUE if it is one that
519 * we receive. If `remove', remove it from the set received.
521 static boolean_t
522 xnbo_mcast_find(xnb_t *xnbp, ether_addr_t *addr, boolean_t remove)
524 xnbo_t *xnbop = xnbp->xnb_flavour_data;
525 xmca_t *prev, *del, *this;
527 ASSERT(MUTEX_HELD(&xnbp->xnb_state_lock));
528 ASSERT(xnbop->o_promiscuous == B_FALSE);
530 prev = del = NULL;
532 this = xnbop->o_mca;
534 while (this != NULL) {
535 if (bcmp(&this->addr, addr, sizeof (this->addr)) == 0) {
536 del = this;
537 if (remove) {
538 if (prev == NULL)
539 xnbop->o_mca = this->next;
540 else
541 prev->next = this->next;
543 break;
546 prev = this;
547 this = this->next;
550 if (del == NULL)
551 return (B_FALSE);
553 if (remove) {
554 DTRACE_PROBE3(mcast_remove,
555 (char *), "remove",
556 (void *), xnbp,
557 (etheraddr_t *), del->addr);
558 mac_multicast_remove(xnbop->o_mch, del->addr);
559 kmem_free(del, sizeof (*del));
562 return (B_TRUE);
566 * Add the multicast address `addr' to the set received.
568 static boolean_t
569 xnbo_mcast_add(xnb_t *xnbp, ether_addr_t *addr)
571 xnbo_t *xnbop = xnbp->xnb_flavour_data;
572 boolean_t r = B_FALSE;
574 ASSERT(xnbop->o_promiscuous == B_FALSE);
576 mutex_enter(&xnbp->xnb_state_lock);
578 if (xnbo_mcast_find(xnbp, addr, B_FALSE)) {
579 r = B_TRUE;
580 } else if (mac_multicast_add(xnbop->o_mch,
581 (const uint8_t *)addr) == 0) {
582 xmca_t *mca;
584 DTRACE_PROBE3(mcast_add,
585 (char *), "add",
586 (void *), xnbp,
587 (etheraddr_t *), addr);
589 mca = kmem_alloc(sizeof (*mca), KM_SLEEP);
590 bcopy(addr, &mca->addr, sizeof (mca->addr));
592 mca->next = xnbop->o_mca;
593 xnbop->o_mca = mca;
595 r = B_TRUE;
598 mutex_exit(&xnbp->xnb_state_lock);
600 return (r);
604 * Remove the multicast address `addr' from the set received.
606 static boolean_t
607 xnbo_mcast_del(xnb_t *xnbp, ether_addr_t *addr)
609 boolean_t r;
611 mutex_enter(&xnbp->xnb_state_lock);
612 r = xnbo_mcast_find(xnbp, addr, B_TRUE);
613 mutex_exit(&xnbp->xnb_state_lock);
615 return (r);
618 static int
619 xnbo_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
621 static xnb_flavour_t flavour = {
622 xnbo_to_mac, xnbo_peer_connected, xnbo_peer_disconnected,
623 xnbo_hotplug_connected, xnbo_start_connect,
624 xnbo_cksum_from_peer, xnbo_cksum_to_peer,
625 xnbo_mcast_add, xnbo_mcast_del,
627 xnbo_t *xnbop;
629 switch (cmd) {
630 case DDI_ATTACH:
631 break;
632 case DDI_RESUME:
633 return (DDI_SUCCESS);
634 default:
635 return (DDI_FAILURE);
638 xnbop = kmem_zalloc(sizeof (*xnbop), KM_SLEEP);
640 if (xnb_attach(dip, &flavour, xnbop) != DDI_SUCCESS) {
641 kmem_free(xnbop, sizeof (*xnbop));
642 return (DDI_FAILURE);
645 return (DDI_SUCCESS);
648 static int
649 xnbo_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
651 xnb_t *xnbp = ddi_get_driver_private(dip);
652 xnbo_t *xnbop = xnbp->xnb_flavour_data;
654 switch (cmd) {
655 case DDI_DETACH:
656 break;
657 case DDI_SUSPEND:
658 return (DDI_SUCCESS);
659 default:
660 return (DDI_FAILURE);
663 mutex_enter(&xnbp->xnb_tx_lock);
664 mutex_enter(&xnbp->xnb_rx_lock);
666 if (!xnbp->xnb_detachable || xnbp->xnb_connected ||
667 (xnbp->xnb_tx_buf_count > 0)) {
668 mutex_exit(&xnbp->xnb_rx_lock);
669 mutex_exit(&xnbp->xnb_tx_lock);
671 return (DDI_FAILURE);
674 mutex_exit(&xnbp->xnb_rx_lock);
675 mutex_exit(&xnbp->xnb_tx_lock);
677 xnbo_close_mac(xnbp);
678 kmem_free(xnbop, sizeof (*xnbop));
680 xnb_detach(dip);
682 return (DDI_SUCCESS);
685 static struct cb_ops cb_ops = {
686 nulldev, /* open */
687 nulldev, /* close */
688 nodev, /* strategy */
689 nodev, /* print */
690 nodev, /* dump */
691 nodev, /* read */
692 nodev, /* write */
693 nodev, /* ioctl */
694 nodev, /* devmap */
695 nodev, /* mmap */
696 nodev, /* segmap */
697 nochpoll, /* poll */
698 ddi_prop_op, /* cb_prop_op */
699 0, /* streamtab */
700 D_NEW | D_MP | D_64BIT /* Driver compatibility flag */
703 static struct dev_ops ops = {
704 DEVO_REV, /* devo_rev */
705 0, /* devo_refcnt */
706 nulldev, /* devo_getinfo */
707 nulldev, /* devo_identify */
708 nulldev, /* devo_probe */
709 xnbo_attach, /* devo_attach */
710 xnbo_detach, /* devo_detach */
711 nodev, /* devo_reset */
712 &cb_ops, /* devo_cb_ops */
713 (struct bus_ops *)0, /* devo_bus_ops */
714 NULL, /* devo_power */
715 ddi_quiesce_not_needed, /* devo_quiesce */
718 static struct modldrv modldrv = {
719 &mod_driverops, "xnbo driver", &ops,
722 static struct modlinkage modlinkage = {
723 MODREV_1, &modldrv, NULL
727 _init(void)
729 return (mod_install(&modlinkage));
733 _info(struct modinfo *modinfop)
735 return (mod_info(&modlinkage, modinfop));
739 _fini(void)
741 return (mod_remove(&modlinkage));