4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
29 * Copyright (c) 2004 Christian Limpach.
30 * All rights reserved.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. This section intentionally left blank.
41 * 4. The name of the author may not be used to endorse or promote products
42 * derived from this software without specific prior written permission.
44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
56 * Section 3 of the above license was updated in response to bug 6379571.
60 * xnf.c - GLDv3 network driver for domU.
64 * This driver uses four per-instance locks:
68 * Protects access to the grant reference list stored in
69 * xnf_gref_head. Grant references should be acquired and released
70 * using gref_get() and gref_put() respectively.
75 * xnf_need_sched - used to record that a previous transmit attempt
76 * failed (and consequently it will be necessary to call
77 * mac_tx_update() when transmit resources are available).
78 * xnf_pending_multicast - the number of multicast requests that
79 * have been submitted to the backend for which we have not
80 * processed responses.
84 * Protects the transmit ring (xnf_tx_ring) and associated
85 * structures (notably xnf_tx_pkt_id and xnf_tx_pkt_id_head).
89 * Protects the receive ring (xnf_rx_ring) and associated
90 * structures (notably xnf_rx_pkt_info).
92 * If driver-global state that affects both the transmit and receive
93 * rings is manipulated, both xnf_txlock and xnf_rxlock should be
94 * held, in that order.
96 * xnf_schedlock is acquired both whilst holding xnf_txlock and
97 * without. It should always be acquired after xnf_txlock if both are
101 * - atomic_add_64() is used to manipulate counters where we require
102 * accuracy. For counters intended only for observation by humans,
103 * post increment/decrement are used instead.
106 #include <sys/types.h>
107 #include <sys/errno.h>
108 #include <sys/param.h>
109 #include <sys/sysmacros.h>
110 #include <sys/systm.h>
111 #include <sys/stream.h>
112 #include <sys/strsubr.h>
113 #include <sys/strsun.h>
114 #include <sys/conf.h>
116 #include <sys/devops.h>
117 #include <sys/sunddi.h>
118 #include <sys/sunndi.h>
119 #include <sys/dlpi.h>
120 #include <sys/ethernet.h>
121 #include <sys/strsun.h>
122 #include <sys/pattr.h>
124 #include <inet/ip_impl.h>
126 #include <sys/modctl.h>
127 #include <sys/mac_provider.h>
128 #include <sys/mac_ether.h>
129 #include <sys/bootinfo.h>
130 #include <sys/mach_mmu.h>
131 #ifdef XPV_HVM_DRIVER
132 #include <sys/xpv_support.h>
133 #include <sys/hypervisor.h>
135 #include <sys/hypervisor.h>
136 #include <sys/evtchn_impl.h>
137 #include <sys/balloon_impl.h>
139 #include <xen/public/io/netif.h>
140 #include <sys/gnttab.h>
141 #include <xen/sys/xendev.h>
143 #include <sys/note.h>
144 #include <sys/debug.h>
148 #if defined(DEBUG) || defined(__lint)
154 xnf_t
*xnf_debug_instance
= NULL
;
158 * On a 32 bit PAE system physical and machine addresses are larger
159 * than 32 bits. ddi_btop() on such systems take an unsigned long
160 * argument, and so addresses above 4G are truncated before ddi_btop()
161 * gets to see them. To avoid this, code the shift operation here.
163 #define xnf_btop(addr) ((addr) >> PAGESHIFT)
165 unsigned int xnf_max_tx_frags
= 1;
168 * Should we use the multicast control feature if the backend provides
171 boolean_t xnf_multicast_control
= B_TRUE
;
174 * Received packets below this size are copied to a new streams buffer
175 * rather than being desballoc'ed.
177 * This value is chosen to accommodate traffic where there are a large
178 * number of small packets. For data showing a typical distribution,
182 * Rishi Sinha, Christos Papadopoulos, and John
183 * Heidemann. Internet Packet Size Distributions: Some
184 * Observations. Technical Report ISI-TR-2007-643,
185 * USC/Information Sciences Institute, May, 2007. Orignally
186 * released October 2005 as web page
187 * http://netweb.usc.edu/~sinha/pkt-sizes/.
188 * <http://www.isi.edu/~johnh/PAPERS/Sinha07a.html>.
190 size_t xnf_rx_copy_limit
= 64;
192 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1)
193 #define INVALID_GRANT_REF ((grant_ref_t)-1)
194 #define INVALID_TX_ID ((uint16_t)-1)
196 #define TX_ID_TO_TXID(p, id) (&((p)->xnf_tx_pkt_id[(id)]))
197 #define TX_ID_VALID(i) (((i) != INVALID_TX_ID) && ((i) < NET_TX_RING_SIZE))
199 /* Required system entry points */
200 static int xnf_attach(dev_info_t
*, ddi_attach_cmd_t
);
201 static int xnf_detach(dev_info_t
*, ddi_detach_cmd_t
);
203 /* Required driver entry points for Nemo */
204 static int xnf_start(void *);
205 static void xnf_stop(void *);
206 static int xnf_set_mac_addr(void *, const uint8_t *);
207 static int xnf_set_multicast(void *, boolean_t
, const uint8_t *);
208 static int xnf_set_promiscuous(void *, boolean_t
);
209 static mblk_t
*xnf_send(void *, mblk_t
*);
210 static uint_t
xnf_intr(caddr_t
);
211 static int xnf_stat(void *, uint_t
, uint64_t *);
212 static boolean_t
xnf_getcapab(void *, mac_capab_t
, void *);
214 /* Driver private functions */
215 static int xnf_alloc_dma_resources(xnf_t
*);
216 static void xnf_release_dma_resources(xnf_t
*);
217 static void xnf_release_mblks(xnf_t
*);
219 static int xnf_buf_constructor(void *, void *, int);
220 static void xnf_buf_destructor(void *, void *);
221 static xnf_buf_t
*xnf_buf_get(xnf_t
*, int, boolean_t
);
222 #pragma inline(xnf_buf_get)
223 static void xnf_buf_put(xnf_t
*, xnf_buf_t
*, boolean_t
);
224 #pragma inline(xnf_buf_put)
225 static void xnf_buf_refresh(xnf_buf_t
*);
226 #pragma inline(xnf_buf_refresh)
227 static void xnf_buf_recycle(xnf_buf_t
*);
229 static int xnf_tx_buf_constructor(void *, void *, int);
230 static void xnf_tx_buf_destructor(void *, void *);
232 static grant_ref_t
gref_get(xnf_t
*);
233 #pragma inline(gref_get)
234 static void gref_put(xnf_t
*, grant_ref_t
);
235 #pragma inline(gref_put)
237 static xnf_txid_t
*txid_get(xnf_t
*);
238 #pragma inline(txid_get)
239 static void txid_put(xnf_t
*, xnf_txid_t
*);
240 #pragma inline(txid_put)
242 void xnf_send_driver_status(int, int);
243 static void xnf_rxbuf_hang(xnf_t
*, xnf_buf_t
*);
244 static int xnf_tx_clean_ring(xnf_t
*);
245 static void oe_state_change(dev_info_t
*, ddi_eventcookie_t
,
247 static boolean_t
xnf_kstat_init(xnf_t
*);
248 static void xnf_rx_collect(xnf_t
*);
250 static mac_callbacks_t xnf_callbacks
= {
264 /* DMA attributes for network ring buffer */
265 static ddi_dma_attr_t ringbuf_dma_attr
= {
266 DMA_ATTR_V0
, /* version of this structure */
267 0, /* lowest usable address */
268 0xffffffffffffffffULL
, /* highest usable address */
269 0x7fffffff, /* maximum DMAable byte count */
270 MMU_PAGESIZE
, /* alignment in bytes */
271 0x7ff, /* bitmap of burst sizes */
272 1, /* minimum transfer */
273 0xffffffffU
, /* maximum transfer */
274 0xffffffffffffffffULL
, /* maximum segment length */
275 1, /* maximum number of segments */
277 0, /* flags (reserved) */
280 /* DMA attributes for transmit and receive data */
281 static ddi_dma_attr_t buf_dma_attr
= {
282 DMA_ATTR_V0
, /* version of this structure */
283 0, /* lowest usable address */
284 0xffffffffffffffffULL
, /* highest usable address */
285 0x7fffffff, /* maximum DMAable byte count */
286 MMU_PAGESIZE
, /* alignment in bytes */
287 0x7ff, /* bitmap of burst sizes */
288 1, /* minimum transfer */
289 0xffffffffU
, /* maximum transfer */
290 0xffffffffffffffffULL
, /* maximum segment length */
291 1, /* maximum number of segments */
293 0, /* flags (reserved) */
296 /* DMA access attributes for registers and descriptors */
297 static ddi_device_acc_attr_t accattr
= {
299 DDI_STRUCTURE_LE_ACC
, /* This is a little-endian device */
303 /* DMA access attributes for data: NOT to be byte swapped. */
304 static ddi_device_acc_attr_t data_accattr
= {
310 DDI_DEFINE_STREAM_OPS(xnf_dev_ops
, nulldev
, nulldev
, xnf_attach
, xnf_detach
,
311 nodev
, NULL
, D_MP
, NULL
, ddi_quiesce_not_supported
);
313 static struct modldrv xnf_modldrv
= {
315 "Virtual Ethernet driver",
319 static struct modlinkage modlinkage
= {
320 MODREV_1
, &xnf_modldrv
, NULL
328 mac_init_ops(&xnf_dev_ops
, "xnf");
329 r
= mod_install(&modlinkage
);
330 if (r
!= DDI_SUCCESS
)
331 mac_fini_ops(&xnf_dev_ops
);
339 return (EBUSY
); /* XXPV should be removable */
343 _info(struct modinfo
*modinfop
)
345 return (mod_info(&modlinkage
, modinfop
));
349 * Acquire a grant reference.
352 gref_get(xnf_t
*xnfp
)
356 mutex_enter(&xnfp
->xnf_gref_lock
);
359 gref
= gnttab_claim_grant_reference(&xnfp
->xnf_gref_head
);
361 } while ((gref
== INVALID_GRANT_REF
) &&
362 (gnttab_alloc_grant_references(16, &xnfp
->xnf_gref_head
) == 0));
364 mutex_exit(&xnfp
->xnf_gref_lock
);
366 if (gref
== INVALID_GRANT_REF
) {
367 xnfp
->xnf_stat_gref_failure
++;
369 atomic_inc_64(&xnfp
->xnf_stat_gref_outstanding
);
370 if (xnfp
->xnf_stat_gref_outstanding
> xnfp
->xnf_stat_gref_peak
)
371 xnfp
->xnf_stat_gref_peak
=
372 xnfp
->xnf_stat_gref_outstanding
;
379 * Release a grant reference.
382 gref_put(xnf_t
*xnfp
, grant_ref_t gref
)
384 ASSERT(gref
!= INVALID_GRANT_REF
);
386 mutex_enter(&xnfp
->xnf_gref_lock
);
387 gnttab_release_grant_reference(&xnfp
->xnf_gref_head
, gref
);
388 mutex_exit(&xnfp
->xnf_gref_lock
);
390 atomic_dec_64(&xnfp
->xnf_stat_gref_outstanding
);
394 * Acquire a transmit id.
397 txid_get(xnf_t
*xnfp
)
401 ASSERT(MUTEX_HELD(&xnfp
->xnf_txlock
));
403 if (xnfp
->xnf_tx_pkt_id_head
== INVALID_TX_ID
)
406 ASSERT(TX_ID_VALID(xnfp
->xnf_tx_pkt_id_head
));
408 tidp
= TX_ID_TO_TXID(xnfp
, xnfp
->xnf_tx_pkt_id_head
);
409 xnfp
->xnf_tx_pkt_id_head
= tidp
->next
;
410 tidp
->next
= INVALID_TX_ID
;
412 ASSERT(tidp
->txbuf
== NULL
);
418 * Release a transmit id.
421 txid_put(xnf_t
*xnfp
, xnf_txid_t
*tidp
)
423 ASSERT(MUTEX_HELD(&xnfp
->xnf_txlock
));
424 ASSERT(TX_ID_VALID(tidp
->id
));
425 ASSERT(tidp
->next
== INVALID_TX_ID
);
428 tidp
->next
= xnfp
->xnf_tx_pkt_id_head
;
429 xnfp
->xnf_tx_pkt_id_head
= tidp
->id
;
433 * Get `wanted' slots in the transmit ring, waiting for at least that
434 * number if `wait' is B_TRUE. Force the ring to be cleaned by setting
437 * Return the number of slots available.
440 tx_slots_get(xnf_t
*xnfp
, int wanted
, boolean_t wait
)
443 boolean_t forced_clean
= (wanted
== 0);
445 ASSERT(MUTEX_HELD(&xnfp
->xnf_txlock
));
447 /* LINTED: constant in conditional context */
449 slotsfree
= RING_FREE_REQUESTS(&xnfp
->xnf_tx_ring
);
451 if ((slotsfree
< wanted
) || forced_clean
)
452 slotsfree
= xnf_tx_clean_ring(xnfp
);
455 * If there are more than we need free, tell other
456 * people to come looking again. We hold txlock, so we
457 * are able to take our slots before anyone else runs.
459 if (slotsfree
> wanted
)
460 cv_broadcast(&xnfp
->xnf_cv_tx_slots
);
462 if (slotsfree
>= wanted
)
468 cv_wait(&xnfp
->xnf_cv_tx_slots
, &xnfp
->xnf_txlock
);
471 ASSERT(slotsfree
<= RING_SIZE(&(xnfp
->xnf_tx_ring
)));
477 xnf_setup_rings(xnf_t
*xnfp
)
480 struct xenbus_device
*xsd
;
486 oeid
= xvdi_get_oeid(xnfp
->xnf_devinfo
);
487 xsd
= xvdi_get_xsd(xnfp
->xnf_devinfo
);
489 if (xnfp
->xnf_tx_ring_ref
!= INVALID_GRANT_REF
)
490 gnttab_end_foreign_access(xnfp
->xnf_tx_ring_ref
, 0, 0);
492 err
= gnttab_grant_foreign_access(oeid
,
493 xnf_btop(pa_to_ma(xnfp
->xnf_tx_ring_phys_addr
)), 0);
496 xenbus_dev_error(xsd
, err
, "granting access to tx ring page");
499 xnfp
->xnf_tx_ring_ref
= (grant_ref_t
)err
;
501 if (xnfp
->xnf_rx_ring_ref
!= INVALID_GRANT_REF
)
502 gnttab_end_foreign_access(xnfp
->xnf_rx_ring_ref
, 0, 0);
504 err
= gnttab_grant_foreign_access(oeid
,
505 xnf_btop(pa_to_ma(xnfp
->xnf_rx_ring_phys_addr
)), 0);
508 xenbus_dev_error(xsd
, err
, "granting access to rx ring page");
511 xnfp
->xnf_rx_ring_ref
= (grant_ref_t
)err
;
513 mutex_enter(&xnfp
->xnf_txlock
);
516 * Setup/cleanup the TX ring. Note that this can lose packets
517 * after a resume, but we expect to stagger on.
519 xnfp
->xnf_tx_pkt_id_head
= INVALID_TX_ID
; /* I.e. emtpy list. */
520 for (i
= 0, tidp
= &xnfp
->xnf_tx_pkt_id
[0];
521 i
< NET_TX_RING_SIZE
;
529 tidp
->next
= INVALID_TX_ID
; /* Appease txid_put(). */
530 txid_put(xnfp
, tidp
);
534 ASSERT(txp
->tx_txreq
.gref
!= INVALID_GRANT_REF
);
535 ASSERT(txp
->tx_mp
!= NULL
);
537 switch (txp
->tx_type
) {
539 VERIFY(gnttab_query_foreign_access(txp
->tx_txreq
.gref
)
542 if (txp
->tx_bdesc
== NULL
) {
543 (void) gnttab_end_foreign_access_ref(
544 txp
->tx_txreq
.gref
, 1);
545 gref_put(xnfp
, txp
->tx_txreq
.gref
);
546 (void) ddi_dma_unbind_handle(
549 xnf_buf_put(xnfp
, txp
->tx_bdesc
, B_TRUE
);
553 txid_put(xnfp
, tidp
);
554 kmem_cache_free(xnfp
->xnf_tx_buf_cache
, txp
);
559 txp
->tx_type
= TX_MCAST_RSP
;
560 txp
->tx_status
= NETIF_RSP_DROPPED
;
561 cv_broadcast(&xnfp
->xnf_cv_multicast
);
564 * The request consumed two slots in the ring,
565 * yet only a single xnf_txid_t is used. Step
566 * over the empty slot.
569 ASSERT(i
< NET_TX_RING_SIZE
);
578 /* LINTED: constant in conditional context */
579 SHARED_RING_INIT(xnfp
->xnf_tx_ring
.sring
);
580 /* LINTED: constant in conditional context */
581 FRONT_RING_INIT(&xnfp
->xnf_tx_ring
,
582 xnfp
->xnf_tx_ring
.sring
, PAGESIZE
);
584 mutex_exit(&xnfp
->xnf_txlock
);
586 mutex_enter(&xnfp
->xnf_rxlock
);
589 * Clean out any buffers currently posted to the receive ring
590 * before we reset it.
592 for (i
= 0, bdescp
= &xnfp
->xnf_rx_pkt_info
[0];
593 i
< NET_RX_RING_SIZE
;
595 if (*bdescp
!= NULL
) {
596 xnf_buf_put(xnfp
, *bdescp
, B_FALSE
);
601 /* LINTED: constant in conditional context */
602 SHARED_RING_INIT(xnfp
->xnf_rx_ring
.sring
);
603 /* LINTED: constant in conditional context */
604 FRONT_RING_INIT(&xnfp
->xnf_rx_ring
,
605 xnfp
->xnf_rx_ring
.sring
, PAGESIZE
);
608 * Fill the ring with buffers.
610 for (i
= 0; i
< NET_RX_RING_SIZE
; i
++) {
613 bdesc
= xnf_buf_get(xnfp
, KM_SLEEP
, B_FALSE
);
614 VERIFY(bdesc
!= NULL
);
615 xnf_rxbuf_hang(xnfp
, bdesc
);
618 /* LINTED: constant in conditional context */
619 RING_PUSH_REQUESTS(&xnfp
->xnf_rx_ring
);
621 mutex_exit(&xnfp
->xnf_rxlock
);
626 if (xnfp
->xnf_tx_ring_ref
!= INVALID_GRANT_REF
)
627 gnttab_end_foreign_access(xnfp
->xnf_tx_ring_ref
, 0, 0);
628 xnfp
->xnf_tx_ring_ref
= INVALID_GRANT_REF
;
630 if (xnfp
->xnf_rx_ring_ref
!= INVALID_GRANT_REF
)
631 gnttab_end_foreign_access(xnfp
->xnf_rx_ring_ref
, 0, 0);
632 xnfp
->xnf_rx_ring_ref
= INVALID_GRANT_REF
;
638 * Connect driver to back end, called to set up communication with
639 * back end driver both initially and on resume after restore/migrate.
642 xnf_be_connect(xnf_t
*xnfp
)
645 xenbus_transaction_t xbt
;
646 struct xenbus_device
*xsd
;
650 ASSERT(!xnfp
->xnf_connected
);
652 xsd
= xvdi_get_xsd(xnfp
->xnf_devinfo
);
653 xsname
= xvdi_get_xsname(xnfp
->xnf_devinfo
);
655 err
= xnf_setup_rings(xnfp
);
657 cmn_err(CE_WARN
, "failed to set up tx/rx rings");
658 xenbus_dev_error(xsd
, err
, "setting up ring");
663 err
= xenbus_transaction_start(&xbt
);
665 xenbus_dev_error(xsd
, EIO
, "starting transaction");
669 err
= xenbus_printf(xbt
, xsname
, "tx-ring-ref", "%u",
670 xnfp
->xnf_tx_ring_ref
);
672 message
= "writing tx ring-ref";
673 goto abort_transaction
;
676 err
= xenbus_printf(xbt
, xsname
, "rx-ring-ref", "%u",
677 xnfp
->xnf_rx_ring_ref
);
679 message
= "writing rx ring-ref";
680 goto abort_transaction
;
683 err
= xenbus_printf(xbt
, xsname
, "event-channel", "%u",
686 message
= "writing event-channel";
687 goto abort_transaction
;
690 err
= xenbus_printf(xbt
, xsname
, "feature-rx-notify", "%d", 1);
692 message
= "writing feature-rx-notify";
693 goto abort_transaction
;
696 err
= xenbus_printf(xbt
, xsname
, "request-rx-copy", "%d", 1);
698 message
= "writing request-rx-copy";
699 goto abort_transaction
;
702 if (xnfp
->xnf_be_mcast_control
) {
703 err
= xenbus_printf(xbt
, xsname
, "request-multicast-control",
706 message
= "writing request-multicast-control";
707 goto abort_transaction
;
711 err
= xvdi_switch_state(xnfp
->xnf_devinfo
, xbt
, XenbusStateConnected
);
713 message
= "switching state to XenbusStateConnected";
714 goto abort_transaction
;
717 err
= xenbus_transaction_end(xbt
, 0);
721 xenbus_dev_error(xsd
, err
, "completing transaction");
727 (void) xenbus_transaction_end(xbt
, 1);
728 xenbus_dev_error(xsd
, err
, "%s", message
);
732 * Read configuration information from xenstore.
735 xnf_read_config(xnf_t
*xnfp
)
738 char mac
[ETHERADDRL
* 3];
739 char *oename
= xvdi_get_oename(xnfp
->xnf_devinfo
);
741 err
= xenbus_scanf(XBT_NULL
, oename
, "mac",
742 "%s", (char *)&mac
[0]);
745 * bad: we're supposed to be set up with a proper mac
746 * addr. at this point
748 cmn_err(CE_WARN
, "%s%d: no mac address",
749 ddi_driver_name(xnfp
->xnf_devinfo
),
750 ddi_get_instance(xnfp
->xnf_devinfo
));
753 if (ether_aton(mac
, xnfp
->xnf_mac_addr
) != ETHERADDRL
) {
755 xenbus_dev_error(xvdi_get_xsd(xnfp
->xnf_devinfo
), ENOENT
,
756 "parsing %s/mac", xvdi_get_xsname(xnfp
->xnf_devinfo
));
760 err
= xenbus_scanf(XBT_NULL
, oename
,
761 "feature-rx-copy", "%d", &be_cap
);
763 * If we fail to read the store we assume that the key is
764 * absent, implying an older domain at the far end. Older
765 * domains cannot do HV copy.
769 xnfp
->xnf_be_rx_copy
= (be_cap
!= 0);
771 err
= xenbus_scanf(XBT_NULL
, oename
,
772 "feature-multicast-control", "%d", &be_cap
);
774 * If we fail to read the store we assume that the key is
775 * absent, implying an older domain at the far end. Older
776 * domains do not support multicast control.
780 xnfp
->xnf_be_mcast_control
= (be_cap
!= 0) && xnf_multicast_control
;
784 * attach(9E) -- Attach a device to the system
787 xnf_attach(dev_info_t
*devinfo
, ddi_attach_cmd_t cmd
)
789 mac_register_t
*macp
;
795 if (xnf_debug
& XNF_DEBUG_DDI
)
796 printf("xnf%d: attach(0x%p)\n", ddi_get_instance(devinfo
),
802 xnfp
= ddi_get_driver_private(devinfo
);
805 (void) xvdi_resume(devinfo
);
806 (void) xvdi_alloc_evtchn(devinfo
);
807 xnfp
->xnf_evtchn
= xvdi_get_evtchn(devinfo
);
808 #ifdef XPV_HVM_DRIVER
809 ec_bind_evtchn_to_handler(xnfp
->xnf_evtchn
, IPL_VIF
, xnf_intr
,
812 (void) ddi_add_intr(devinfo
, 0, NULL
, NULL
, xnf_intr
,
815 return (DDI_SUCCESS
);
821 return (DDI_FAILURE
);
825 * Allocate gld_mac_info_t and xnf_instance structures
827 macp
= mac_alloc(MAC_VERSION
);
829 return (DDI_FAILURE
);
830 xnfp
= kmem_zalloc(sizeof (*xnfp
), KM_SLEEP
);
832 macp
->m_dip
= devinfo
;
833 macp
->m_driver
= xnfp
;
834 xnfp
->xnf_devinfo
= devinfo
;
836 macp
->m_type_ident
= MAC_PLUGIN_IDENT_ETHER
;
837 macp
->m_src_addr
= xnfp
->xnf_mac_addr
;
838 macp
->m_callbacks
= &xnf_callbacks
;
840 macp
->m_max_sdu
= XNF_MAXPKT
;
842 xnfp
->xnf_running
= B_FALSE
;
843 xnfp
->xnf_connected
= B_FALSE
;
844 xnfp
->xnf_be_rx_copy
= B_FALSE
;
845 xnfp
->xnf_be_mcast_control
= B_FALSE
;
846 xnfp
->xnf_need_sched
= B_FALSE
;
848 xnfp
->xnf_rx_head
= NULL
;
849 xnfp
->xnf_rx_tail
= NULL
;
850 xnfp
->xnf_rx_new_buffers_posted
= B_FALSE
;
852 #ifdef XPV_HVM_DRIVER
854 * Report our version to dom0.
856 if (xenbus_printf(XBT_NULL
, "guest/xnf", "version", "%d",
858 cmn_err(CE_WARN
, "xnf: couldn't write version\n");
862 * Get the iblock cookie with which to initialize the mutexes.
864 if (ddi_get_iblock_cookie(devinfo
, 0, &xnfp
->xnf_icookie
)
868 mutex_init(&xnfp
->xnf_txlock
,
869 NULL
, MUTEX_DRIVER
, xnfp
->xnf_icookie
);
870 mutex_init(&xnfp
->xnf_rxlock
,
871 NULL
, MUTEX_DRIVER
, xnfp
->xnf_icookie
);
872 mutex_init(&xnfp
->xnf_schedlock
,
873 NULL
, MUTEX_DRIVER
, xnfp
->xnf_icookie
);
874 mutex_init(&xnfp
->xnf_gref_lock
,
875 NULL
, MUTEX_DRIVER
, xnfp
->xnf_icookie
);
877 cv_init(&xnfp
->xnf_cv_state
, NULL
, CV_DEFAULT
, NULL
);
878 cv_init(&xnfp
->xnf_cv_multicast
, NULL
, CV_DEFAULT
, NULL
);
879 cv_init(&xnfp
->xnf_cv_tx_slots
, NULL
, CV_DEFAULT
, NULL
);
881 (void) sprintf(cachename
, "xnf_buf_cache_%d",
882 ddi_get_instance(devinfo
));
883 xnfp
->xnf_buf_cache
= kmem_cache_create(cachename
,
884 sizeof (xnf_buf_t
), 0,
885 xnf_buf_constructor
, xnf_buf_destructor
,
886 NULL
, xnfp
, NULL
, 0);
887 if (xnfp
->xnf_buf_cache
== NULL
)
890 (void) sprintf(cachename
, "xnf_tx_buf_cache_%d",
891 ddi_get_instance(devinfo
));
892 xnfp
->xnf_tx_buf_cache
= kmem_cache_create(cachename
,
893 sizeof (xnf_txbuf_t
), 0,
894 xnf_tx_buf_constructor
, xnf_tx_buf_destructor
,
895 NULL
, xnfp
, NULL
, 0);
896 if (xnfp
->xnf_tx_buf_cache
== NULL
)
899 xnfp
->xnf_gref_head
= INVALID_GRANT_REF
;
901 if (xnf_alloc_dma_resources(xnfp
) == DDI_FAILURE
) {
902 cmn_err(CE_WARN
, "xnf%d: failed to allocate and initialize "
903 "driver data structures",
904 ddi_get_instance(xnfp
->xnf_devinfo
));
908 xnfp
->xnf_rx_ring
.sring
->rsp_event
=
909 xnfp
->xnf_tx_ring
.sring
->rsp_event
= 1;
911 xnfp
->xnf_tx_ring_ref
= INVALID_GRANT_REF
;
912 xnfp
->xnf_rx_ring_ref
= INVALID_GRANT_REF
;
914 /* set driver private pointer now */
915 ddi_set_driver_private(devinfo
, xnfp
);
917 if (!xnf_kstat_init(xnfp
))
921 * Allocate an event channel, add the interrupt handler and
922 * bind it to the event channel.
924 (void) xvdi_alloc_evtchn(devinfo
);
925 xnfp
->xnf_evtchn
= xvdi_get_evtchn(devinfo
);
926 #ifdef XPV_HVM_DRIVER
927 ec_bind_evtchn_to_handler(xnfp
->xnf_evtchn
, IPL_VIF
, xnf_intr
, xnfp
);
929 (void) ddi_add_intr(devinfo
, 0, NULL
, NULL
, xnf_intr
, (caddr_t
)xnfp
);
932 err
= mac_register(macp
, &xnfp
->xnf_mh
);
938 if (xvdi_add_event_handler(devinfo
, XS_OE_STATE
, oe_state_change
, NULL
)
942 #ifdef XPV_HVM_DRIVER
944 * In the HVM case, this driver essentially replaces a driver for
945 * a 'real' PCI NIC. Without the "model" property set to
946 * "Ethernet controller", like the PCI code does, netbooting does
947 * not work correctly, as strplumb_get_netdev_path() will not find
950 (void) ndi_prop_update_string(DDI_DEV_T_NONE
, devinfo
, "model",
951 "Ethernet controller");
955 if (xnf_debug_instance
== NULL
)
956 xnf_debug_instance
= xnfp
;
959 return (DDI_SUCCESS
);
962 (void) mac_unregister(xnfp
->xnf_mh
);
965 #ifdef XPV_HVM_DRIVER
966 ec_unbind_evtchn(xnfp
->xnf_evtchn
);
967 xvdi_free_evtchn(devinfo
);
969 ddi_remove_intr(devinfo
, 0, xnfp
->xnf_icookie
);
971 xnfp
->xnf_evtchn
= INVALID_EVTCHN
;
972 kstat_delete(xnfp
->xnf_kstat_aux
);
975 xnf_release_dma_resources(xnfp
);
978 kmem_cache_destroy(xnfp
->xnf_tx_buf_cache
);
981 kmem_cache_destroy(xnfp
->xnf_buf_cache
);
984 cv_destroy(&xnfp
->xnf_cv_tx_slots
);
985 cv_destroy(&xnfp
->xnf_cv_multicast
);
986 cv_destroy(&xnfp
->xnf_cv_state
);
988 mutex_destroy(&xnfp
->xnf_gref_lock
);
989 mutex_destroy(&xnfp
->xnf_schedlock
);
990 mutex_destroy(&xnfp
->xnf_rxlock
);
991 mutex_destroy(&xnfp
->xnf_txlock
);
994 kmem_free(xnfp
, sizeof (*xnfp
));
998 return (DDI_FAILURE
);
1001 /* detach(9E) -- Detach a device from the system */
1003 xnf_detach(dev_info_t
*devinfo
, ddi_detach_cmd_t cmd
)
1005 xnf_t
*xnfp
; /* Our private device info */
1008 if (xnf_debug
& XNF_DEBUG_DDI
)
1009 printf("xnf_detach(0x%p)\n", (void *)devinfo
);
1012 xnfp
= ddi_get_driver_private(devinfo
);
1016 #ifdef XPV_HVM_DRIVER
1017 ec_unbind_evtchn(xnfp
->xnf_evtchn
);
1018 xvdi_free_evtchn(devinfo
);
1020 ddi_remove_intr(devinfo
, 0, xnfp
->xnf_icookie
);
1023 xvdi_suspend(devinfo
);
1025 mutex_enter(&xnfp
->xnf_rxlock
);
1026 mutex_enter(&xnfp
->xnf_txlock
);
1028 xnfp
->xnf_evtchn
= INVALID_EVTCHN
;
1029 xnfp
->xnf_connected
= B_FALSE
;
1030 mutex_exit(&xnfp
->xnf_txlock
);
1031 mutex_exit(&xnfp
->xnf_rxlock
);
1033 /* claim link to be down after disconnect */
1034 mac_link_update(xnfp
->xnf_mh
, LINK_STATE_DOWN
);
1035 return (DDI_SUCCESS
);
1041 return (DDI_FAILURE
);
1044 if (xnfp
->xnf_connected
)
1045 return (DDI_FAILURE
);
1048 * Cannot detach if we have xnf_buf_t outstanding.
1050 if (xnfp
->xnf_stat_buf_allocated
> 0)
1051 return (DDI_FAILURE
);
1053 if (mac_unregister(xnfp
->xnf_mh
) != 0)
1054 return (DDI_FAILURE
);
1056 kstat_delete(xnfp
->xnf_kstat_aux
);
1058 /* Stop the receiver */
1061 xvdi_remove_event_handler(devinfo
, XS_OE_STATE
);
1063 /* Remove the interrupt */
1064 #ifdef XPV_HVM_DRIVER
1065 ec_unbind_evtchn(xnfp
->xnf_evtchn
);
1066 xvdi_free_evtchn(devinfo
);
1068 ddi_remove_intr(devinfo
, 0, xnfp
->xnf_icookie
);
1071 /* Release any pending xmit mblks */
1072 xnf_release_mblks(xnfp
);
1074 /* Release all DMA resources */
1075 xnf_release_dma_resources(xnfp
);
1077 cv_destroy(&xnfp
->xnf_cv_tx_slots
);
1078 cv_destroy(&xnfp
->xnf_cv_multicast
);
1079 cv_destroy(&xnfp
->xnf_cv_state
);
1081 kmem_cache_destroy(xnfp
->xnf_tx_buf_cache
);
1082 kmem_cache_destroy(xnfp
->xnf_buf_cache
);
1084 mutex_destroy(&xnfp
->xnf_gref_lock
);
1085 mutex_destroy(&xnfp
->xnf_schedlock
);
1086 mutex_destroy(&xnfp
->xnf_rxlock
);
1087 mutex_destroy(&xnfp
->xnf_txlock
);
1089 kmem_free(xnfp
, sizeof (*xnfp
));
1091 return (DDI_SUCCESS
);
1095 * xnf_set_mac_addr() -- set the physical network address on the board.
1098 xnf_set_mac_addr(void *arg
, const uint8_t *macaddr
)
1100 _NOTE(ARGUNUSED(arg
, macaddr
));
1103 * We can't set our macaddr.
1109 * xnf_set_multicast() -- set (enable) or disable a multicast address.
1111 * Program the hardware to enable/disable the multicast address
1112 * in "mca". Enable if "add" is true, disable if false.
1115 xnf_set_multicast(void *arg
, boolean_t add
, const uint8_t *mca
)
1122 netif_tx_request_t
*txrp
;
1123 struct netif_extra_info
*erp
;
1124 boolean_t notify
, result
;
1127 * If the backend does not support multicast control then we
1128 * must assume that the right packets will just arrive.
1130 if (!xnfp
->xnf_be_mcast_control
)
1133 txp
= kmem_cache_alloc(xnfp
->xnf_tx_buf_cache
, KM_SLEEP
);
1135 mutex_enter(&xnfp
->xnf_txlock
);
1138 * If we're not yet connected then claim success. This is
1139 * acceptable because we refresh the entire set of multicast
1140 * addresses when we get connected.
1142 * We can't wait around here because the MAC layer expects
1143 * this to be a non-blocking operation - waiting ends up
1144 * causing a deadlock during resume.
1146 if (!xnfp
->xnf_connected
) {
1147 mutex_exit(&xnfp
->xnf_txlock
);
1152 * 1. Acquire two slots in the ring.
1153 * 2. Fill in the slots.
1154 * 3. Request notification when the operation is done.
1156 * 5. Wait for the response via xnf_tx_clean_ring().
1159 n_slots
= tx_slots_get(xnfp
, 2, B_TRUE
);
1160 ASSERT(n_slots
>= 2);
1162 slot
= xnfp
->xnf_tx_ring
.req_prod_pvt
;
1163 tidp
= txid_get(xnfp
);
1164 VERIFY(tidp
!= NULL
);
1166 txp
->tx_type
= TX_MCAST_REQ
;
1167 txp
->tx_slot
= slot
;
1169 txrp
= RING_GET_REQUEST(&xnfp
->xnf_tx_ring
, slot
);
1170 erp
= (struct netif_extra_info
*)
1171 RING_GET_REQUEST(&xnfp
->xnf_tx_ring
, slot
+ 1);
1176 /* Set tx_txreq.id to appease xnf_tx_clean_ring(). */
1177 txrp
->id
= txp
->tx_txreq
.id
= tidp
->id
;
1178 txrp
->flags
= NETTXF_extra_info
;
1180 erp
->type
= add
? XEN_NETIF_EXTRA_TYPE_MCAST_ADD
:
1181 XEN_NETIF_EXTRA_TYPE_MCAST_DEL
;
1182 bcopy((void *)mca
, &erp
->u
.mcast
.addr
, ETHERADDRL
);
1186 xnfp
->xnf_tx_ring
.req_prod_pvt
= slot
+ 2;
1188 mutex_enter(&xnfp
->xnf_schedlock
);
1189 xnfp
->xnf_pending_multicast
++;
1190 mutex_exit(&xnfp
->xnf_schedlock
);
1192 /* LINTED: constant in conditional context */
1193 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp
->xnf_tx_ring
,
1196 ec_notify_via_evtchn(xnfp
->xnf_evtchn
);
1198 while (txp
->tx_type
== TX_MCAST_REQ
)
1199 cv_wait(&xnfp
->xnf_cv_multicast
,
1202 ASSERT(txp
->tx_type
== TX_MCAST_RSP
);
1204 mutex_enter(&xnfp
->xnf_schedlock
);
1205 xnfp
->xnf_pending_multicast
--;
1206 mutex_exit(&xnfp
->xnf_schedlock
);
1208 result
= (txp
->tx_status
== NETIF_RSP_OKAY
);
1210 txid_put(xnfp
, tidp
);
1212 mutex_exit(&xnfp
->xnf_txlock
);
1214 kmem_cache_free(xnfp
->xnf_tx_buf_cache
, txp
);
1216 return (result
? 0 : 1);
1220 * xnf_set_promiscuous() -- set or reset promiscuous mode on the board
1222 * Program the hardware to enable/disable promiscuous mode.
1225 xnf_set_promiscuous(void *arg
, boolean_t on
)
1227 _NOTE(ARGUNUSED(arg
, on
));
1230 * We can't really do this, but we pretend that we can in
1231 * order that snoop will work.
1237 * Clean buffers that we have responses for from the transmit ring.
1240 xnf_tx_clean_ring(xnf_t
*xnfp
)
1242 boolean_t work_to_do
;
1244 ASSERT(MUTEX_HELD(&xnfp
->xnf_txlock
));
1247 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp
->xnf_tx_ring
)) {
1248 RING_IDX cons
, prod
, i
;
1250 cons
= xnfp
->xnf_tx_ring
.rsp_cons
;
1251 prod
= xnfp
->xnf_tx_ring
.sring
->rsp_prod
;
1254 * Clean tx requests from ring that we have responses
1257 DTRACE_PROBE2(xnf_tx_clean_range
, int, cons
, int, prod
);
1258 for (i
= cons
; i
!= prod
; i
++) {
1259 netif_tx_response_t
*trp
;
1263 trp
= RING_GET_RESPONSE(&xnfp
->xnf_tx_ring
, i
);
1264 ASSERT(TX_ID_VALID(trp
->id
));
1266 tidp
= TX_ID_TO_TXID(xnfp
, trp
->id
);
1267 ASSERT(tidp
->id
== trp
->id
);
1268 ASSERT(tidp
->next
== INVALID_TX_ID
);
1271 ASSERT(txp
!= NULL
);
1272 ASSERT(txp
->tx_txreq
.id
== trp
->id
);
1274 switch (txp
->tx_type
) {
1276 if (gnttab_query_foreign_access(
1277 txp
->tx_txreq
.gref
) != 0)
1279 "tx grant %d still in use by "
1281 txp
->tx_txreq
.gref
);
1283 if (txp
->tx_bdesc
== NULL
) {
1284 (void) gnttab_end_foreign_access_ref(
1285 txp
->tx_txreq
.gref
, 1);
1286 gref_put(xnfp
, txp
->tx_txreq
.gref
);
1287 (void) ddi_dma_unbind_handle(
1288 txp
->tx_dma_handle
);
1290 xnf_buf_put(xnfp
, txp
->tx_bdesc
,
1294 freemsg(txp
->tx_mp
);
1295 txid_put(xnfp
, tidp
);
1296 kmem_cache_free(xnfp
->xnf_tx_buf_cache
, txp
);
1301 txp
->tx_type
= TX_MCAST_RSP
;
1302 txp
->tx_status
= trp
->status
;
1303 cv_broadcast(&xnfp
->xnf_cv_multicast
);
1311 cmn_err(CE_PANIC
, "xnf_tx_clean_ring: "
1312 "invalid xnf_txbuf_t type: %d",
1318 * Record the last response we dealt with so that we
1319 * know where to start next time around.
1321 xnfp
->xnf_tx_ring
.rsp_cons
= prod
;
1325 /* LINTED: constant in conditional context */
1326 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp
->xnf_tx_ring
, work_to_do
);
1330 return (RING_FREE_REQUESTS(&xnfp
->xnf_tx_ring
));
1334 * Allocate and fill in a look-aside buffer for the packet `mp'. Used
1335 * to ensure that the packet is physically contiguous and contained
1336 * within a single page.
1339 xnf_tx_pullup(xnf_t
*xnfp
, mblk_t
*mp
)
1344 bd
= xnf_buf_get(xnfp
, KM_SLEEP
, B_TRUE
);
1349 while (mp
!= NULL
) {
1350 size_t len
= MBLKL(mp
);
1352 bcopy(mp
->b_rptr
, bp
, len
);
1358 ASSERT((bp
- bd
->buf
) <= PAGESIZE
);
1360 xnfp
->xnf_stat_tx_pullup
++;
1366 * Insert the pseudo-header checksum into the packet `buf'.
1369 xnf_pseudo_cksum(caddr_t buf
, int length
)
1371 struct ether_header
*ehp
;
1372 uint16_t sap
, len
, *stuff
;
1378 ASSERT(length
>= sizeof (*ehp
));
1379 ehp
= (struct ether_header
*)buf
;
1381 if (ntohs(ehp
->ether_type
) == VLAN_TPID
) {
1382 struct ether_vlan_header
*evhp
;
1384 ASSERT(length
>= sizeof (*evhp
));
1385 evhp
= (struct ether_vlan_header
*)buf
;
1386 sap
= ntohs(evhp
->ether_type
);
1387 offset
= sizeof (*evhp
);
1389 sap
= ntohs(ehp
->ether_type
);
1390 offset
= sizeof (*ehp
);
1393 ASSERT(sap
== ETHERTYPE_IP
);
1395 /* Packet should have been pulled up by the caller. */
1396 if ((offset
+ sizeof (ipha_t
)) > length
) {
1397 cmn_err(CE_WARN
, "xnf_pseudo_cksum: no room for checksum");
1401 ipha
= (ipha_t
*)(buf
+ offset
);
1403 ASSERT(IPH_HDR_LENGTH(ipha
) == IP_SIMPLE_HDR_LENGTH
);
1405 len
= ntohs(ipha
->ipha_length
) - IP_SIMPLE_HDR_LENGTH
;
1407 switch (ipha
->ipha_protocol
) {
1409 stuff
= IPH_TCPH_CHECKSUMP(ipha
, IP_SIMPLE_HDR_LENGTH
);
1410 cksum
= IP_TCP_CSUM_COMP
;
1413 stuff
= IPH_UDPH_CHECKSUMP(ipha
, IP_SIMPLE_HDR_LENGTH
);
1414 cksum
= IP_UDP_CSUM_COMP
;
1417 cmn_err(CE_WARN
, "xnf_pseudo_cksum: unexpected protocol %d",
1418 ipha
->ipha_protocol
);
1422 src
= ipha
->ipha_src
;
1423 dst
= ipha
->ipha_dst
;
1425 cksum
+= (dst
>> 16) + (dst
& 0xFFFF);
1426 cksum
+= (src
>> 16) + (src
& 0xFFFF);
1427 cksum
+= htons(len
);
1429 cksum
= (cksum
>> 16) + (cksum
& 0xFFFF);
1430 cksum
= (cksum
>> 16) + (cksum
& 0xFFFF);
1432 ASSERT(cksum
<= 0xFFFF);
1434 *stuff
= (uint16_t)(cksum
? cksum
: ~cksum
);
1438 * Push a list of prepared packets (`txp') into the transmit ring.
1440 static xnf_txbuf_t
*
1441 tx_push_packets(xnf_t
*xnfp
, xnf_txbuf_t
*txp
)
1447 mutex_enter(&xnfp
->xnf_txlock
);
1449 ASSERT(xnfp
->xnf_running
);
1452 * Wait until we are connected to the backend.
1454 while (!xnfp
->xnf_connected
)
1455 cv_wait(&xnfp
->xnf_cv_state
, &xnfp
->xnf_txlock
);
1457 slots_free
= tx_slots_get(xnfp
, 1, B_FALSE
);
1458 DTRACE_PROBE1(xnf_send_slotsfree
, int, slots_free
);
1460 slot
= xnfp
->xnf_tx_ring
.req_prod_pvt
;
1462 while ((txp
!= NULL
) && (slots_free
> 0)) {
1464 netif_tx_request_t
*txrp
;
1466 tidp
= txid_get(xnfp
);
1467 VERIFY(tidp
!= NULL
);
1469 txrp
= RING_GET_REQUEST(&xnfp
->xnf_tx_ring
, slot
);
1471 txp
->tx_slot
= slot
;
1472 txp
->tx_txreq
.id
= tidp
->id
;
1473 *txrp
= txp
->tx_txreq
;
1477 xnfp
->xnf_stat_opackets
++;
1478 xnfp
->xnf_stat_obytes
+= txp
->tx_txreq
.size
;
1486 xnfp
->xnf_tx_ring
.req_prod_pvt
= slot
;
1489 * Tell the peer that we sent something, if it cares.
1491 /* LINTED: constant in conditional context */
1492 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp
->xnf_tx_ring
,
1495 ec_notify_via_evtchn(xnfp
->xnf_evtchn
);
1497 mutex_exit(&xnfp
->xnf_txlock
);
1503 * Send the chain of packets `mp'. Called by the MAC framework.
1506 xnf_send(void *arg
, mblk_t
*mp
)
1510 xnf_txbuf_t
*head
, *tail
;
1514 oeid
= xvdi_get_oeid(xnfp
->xnf_devinfo
);
1517 * Prepare packets for transmission.
1521 while (mp
!= NULL
) {
1523 int n_chunks
, length
;
1524 boolean_t page_oops
;
1527 for (ml
= mp
, n_chunks
= length
= 0, page_oops
= B_FALSE
;
1529 ml
= ml
->b_cont
, n_chunks
++) {
1532 * Test if this buffer includes a page
1533 * boundary. The test assumes that the range
1534 * b_rptr...b_wptr can include only a single
1537 if (xnf_btop((size_t)ml
->b_rptr
) !=
1538 xnf_btop((size_t)ml
->b_wptr
)) {
1539 xnfp
->xnf_stat_tx_pagebndry
++;
1543 length
+= MBLKL(ml
);
1545 DTRACE_PROBE1(xnf_send_b_cont
, int, n_chunks
);
1548 * Make sure packet isn't too large.
1550 if (length
> XNF_FRAMESIZE
) {
1552 "xnf%d: oversized packet (%d bytes) dropped",
1553 ddi_get_instance(xnfp
->xnf_devinfo
), length
);
1558 txp
= kmem_cache_alloc(xnfp
->xnf_tx_buf_cache
, KM_SLEEP
);
1560 txp
->tx_type
= TX_DATA
;
1562 if ((n_chunks
> xnf_max_tx_frags
) || page_oops
) {
1564 * Loan a side buffer rather than the mblk
1567 txp
->tx_bdesc
= xnf_tx_pullup(xnfp
, mp
);
1568 if (txp
->tx_bdesc
== NULL
) {
1569 kmem_cache_free(xnfp
->xnf_tx_buf_cache
, txp
);
1573 txp
->tx_bufp
= txp
->tx_bdesc
->buf
;
1574 txp
->tx_mfn
= txp
->tx_bdesc
->buf_mfn
;
1575 txp
->tx_txreq
.gref
= txp
->tx_bdesc
->grant_ref
;
1579 ddi_dma_cookie_t dma_cookie
;
1582 rc
= ddi_dma_addr_bind_handle(txp
->tx_dma_handle
,
1583 NULL
, (char *)mp
->b_rptr
, length
,
1584 DDI_DMA_WRITE
| DDI_DMA_STREAMING
,
1585 DDI_DMA_DONTWAIT
, 0, &dma_cookie
,
1587 if (rc
!= DDI_DMA_MAPPED
) {
1588 ASSERT(rc
!= DDI_DMA_INUSE
);
1589 ASSERT(rc
!= DDI_DMA_PARTIAL_MAP
);
1592 if (rc
!= DDI_DMA_NORESOURCES
)
1594 "xnf%d: bind_handle failed (%x)",
1595 ddi_get_instance(xnfp
->xnf_devinfo
),
1598 kmem_cache_free(xnfp
->xnf_tx_buf_cache
, txp
);
1601 ASSERT(ncookies
== 1);
1603 txp
->tx_bdesc
= NULL
;
1604 txp
->tx_bufp
= (caddr_t
)mp
->b_rptr
;
1606 xnf_btop(pa_to_ma(dma_cookie
.dmac_laddress
));
1607 txp
->tx_txreq
.gref
= gref_get(xnfp
);
1608 if (txp
->tx_txreq
.gref
== INVALID_GRANT_REF
) {
1609 (void) ddi_dma_unbind_handle(
1610 txp
->tx_dma_handle
);
1611 kmem_cache_free(xnfp
->xnf_tx_buf_cache
, txp
);
1614 gnttab_grant_foreign_access_ref(txp
->tx_txreq
.gref
,
1615 oeid
, txp
->tx_mfn
, 1);
1618 txp
->tx_next
= NULL
;
1620 txp
->tx_txreq
.size
= length
;
1621 txp
->tx_txreq
.offset
= (uintptr_t)txp
->tx_bufp
& PAGEOFFSET
;
1622 txp
->tx_txreq
.flags
= 0;
1623 mac_hcksum_get(mp
, NULL
, NULL
, NULL
, NULL
, &pflags
);
1626 * If the local protocol stack requests checksum
1627 * offload we set the 'checksum blank' flag,
1628 * indicating to the peer that we need the checksum
1629 * calculated for us.
1631 * We _don't_ set the validated flag, because we haven't
1632 * validated that the data and the checksum match.
1634 xnf_pseudo_cksum(txp
->tx_bufp
, length
);
1635 txp
->tx_txreq
.flags
|= NETTXF_csum_blank
;
1637 xnfp
->xnf_stat_tx_cksum_deferred
++;
1641 ASSERT(tail
== NULL
);
1645 ASSERT(tail
!= NULL
);
1647 tail
->tx_next
= txp
;
1655 * There is no point in preparing more than
1656 * NET_TX_RING_SIZE, as we won't be able to push them
1657 * into the ring in one go and would hence have to
1658 * un-prepare the extra.
1660 if (prepared
== NET_TX_RING_SIZE
)
1664 DTRACE_PROBE1(xnf_send_prepared
, int, prepared
);
1668 int notprepared
= 0;
1676 DTRACE_PROBE1(xnf_send_notprepared
, int, notprepared
);
1677 #else /* !XNF_DEBUG */
1678 DTRACE_PROBE1(xnf_send_notprepared
, int, -1);
1679 #endif /* XNF_DEBUG */
1683 * Push the packets we have prepared into the ring. They may
1687 head
= tx_push_packets(xnfp
, head
);
1690 * If some packets that we prepared were not sent, unprepare
1691 * them and add them back to the head of those we didn't
1696 mblk_t
*mp_head
, *mp_tail
;
1699 mp_head
= mp_tail
= NULL
;
1702 while (loop
!= NULL
) {
1703 xnf_txbuf_t
*next
= loop
->tx_next
;
1705 if (loop
->tx_bdesc
== NULL
) {
1706 (void) gnttab_end_foreign_access_ref(
1707 loop
->tx_txreq
.gref
, 1);
1708 gref_put(xnfp
, loop
->tx_txreq
.gref
);
1709 (void) ddi_dma_unbind_handle(
1710 loop
->tx_dma_handle
);
1712 xnf_buf_put(xnfp
, loop
->tx_bdesc
, B_TRUE
);
1715 ASSERT(loop
->tx_mp
!= NULL
);
1716 if (mp_head
== NULL
)
1717 mp_head
= loop
->tx_mp
;
1718 mp_tail
= loop
->tx_mp
;
1720 kmem_cache_free(xnfp
->xnf_tx_buf_cache
, loop
);
1725 if (mp_tail
== NULL
) {
1726 ASSERT(mp_head
== NULL
);
1728 ASSERT(mp_head
!= NULL
);
1730 mp_tail
->b_next
= mp
;
1734 DTRACE_PROBE1(xnf_send_unprepared
, int, unprepared
);
1738 * If any mblks are left then we have deferred for some reason
1739 * and need to ask for a re-schedule later. This is typically
1740 * due to the ring filling.
1743 mutex_enter(&xnfp
->xnf_schedlock
);
1744 xnfp
->xnf_need_sched
= B_TRUE
;
1745 mutex_exit(&xnfp
->xnf_schedlock
);
1747 xnfp
->xnf_stat_tx_defer
++;
1754 * Notification of RX packets. Currently no TX-complete interrupt is
1755 * used, as we clean the TX ring lazily.
1758 xnf_intr(caddr_t arg
)
1760 xnf_t
*xnfp
= (xnf_t
*)arg
;
1762 boolean_t need_sched
, clean_ring
;
1764 mutex_enter(&xnfp
->xnf_rxlock
);
1767 * Interrupts before we are connected are spurious.
1769 if (!xnfp
->xnf_connected
) {
1770 mutex_exit(&xnfp
->xnf_rxlock
);
1771 xnfp
->xnf_stat_unclaimed_interrupts
++;
1772 return (DDI_INTR_UNCLAIMED
);
1776 * Receive side processing.
1780 * Collect buffers from the ring.
1782 xnf_rx_collect(xnfp
);
1785 * Interrupt me when the next receive buffer is consumed.
1787 xnfp
->xnf_rx_ring
.sring
->rsp_event
=
1788 xnfp
->xnf_rx_ring
.rsp_cons
+ 1;
1791 } while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp
->xnf_rx_ring
));
1793 if (xnfp
->xnf_rx_new_buffers_posted
) {
1797 * Indicate to the peer that we have re-filled the
1798 * receive ring, if it cares.
1800 /* LINTED: constant in conditional context */
1801 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp
->xnf_rx_ring
, notify
);
1803 ec_notify_via_evtchn(xnfp
->xnf_evtchn
);
1804 xnfp
->xnf_rx_new_buffers_posted
= B_FALSE
;
1807 mp
= xnfp
->xnf_rx_head
;
1808 xnfp
->xnf_rx_head
= xnfp
->xnf_rx_tail
= NULL
;
1810 xnfp
->xnf_stat_interrupts
++;
1811 mutex_exit(&xnfp
->xnf_rxlock
);
1814 mac_rx(xnfp
->xnf_mh
, NULL
, mp
);
1817 * Transmit side processing.
1819 * If a previous transmit attempt failed or we have pending
1820 * multicast requests, clean the ring.
1822 * If we previously stalled transmission and cleaning produces
1823 * some free slots, tell upstream to attempt sending again.
1825 * The odd style is to avoid acquiring xnf_txlock unless we
1826 * will actually look inside the tx machinery.
1828 mutex_enter(&xnfp
->xnf_schedlock
);
1829 need_sched
= xnfp
->xnf_need_sched
;
1830 clean_ring
= need_sched
|| (xnfp
->xnf_pending_multicast
> 0);
1831 mutex_exit(&xnfp
->xnf_schedlock
);
1836 mutex_enter(&xnfp
->xnf_txlock
);
1837 free_slots
= tx_slots_get(xnfp
, 0, B_FALSE
);
1839 if (need_sched
&& (free_slots
> 0)) {
1840 mutex_enter(&xnfp
->xnf_schedlock
);
1841 xnfp
->xnf_need_sched
= B_FALSE
;
1842 mutex_exit(&xnfp
->xnf_schedlock
);
1844 mac_tx_update(xnfp
->xnf_mh
);
1846 mutex_exit(&xnfp
->xnf_txlock
);
1849 return (DDI_INTR_CLAIMED
);
1853 * xnf_start() -- start the board receiving and enable interrupts.
1856 xnf_start(void *arg
)
1861 if (xnf_debug
& XNF_DEBUG_TRACE
)
1862 printf("xnf%d start(0x%p)\n",
1863 ddi_get_instance(xnfp
->xnf_devinfo
), (void *)xnfp
);
1866 mutex_enter(&xnfp
->xnf_rxlock
);
1867 mutex_enter(&xnfp
->xnf_txlock
);
1869 /* Accept packets from above. */
1870 xnfp
->xnf_running
= B_TRUE
;
1872 mutex_exit(&xnfp
->xnf_txlock
);
1873 mutex_exit(&xnfp
->xnf_rxlock
);
1878 /* xnf_stop() - disable hardware */
1885 if (xnf_debug
& XNF_DEBUG_TRACE
)
1886 printf("xnf%d stop(0x%p)\n",
1887 ddi_get_instance(xnfp
->xnf_devinfo
), (void *)xnfp
);
1890 mutex_enter(&xnfp
->xnf_rxlock
);
1891 mutex_enter(&xnfp
->xnf_txlock
);
1893 xnfp
->xnf_running
= B_FALSE
;
1895 mutex_exit(&xnfp
->xnf_txlock
);
1896 mutex_exit(&xnfp
->xnf_rxlock
);
1900 * Hang buffer `bdesc' on the RX ring.
1903 xnf_rxbuf_hang(xnf_t
*xnfp
, xnf_buf_t
*bdesc
)
1905 netif_rx_request_t
*reqp
;
1908 ASSERT(MUTEX_HELD(&xnfp
->xnf_rxlock
));
1910 reqp
= RING_GET_REQUEST(&xnfp
->xnf_rx_ring
,
1911 xnfp
->xnf_rx_ring
.req_prod_pvt
);
1912 hang_ix
= (RING_IDX
) (reqp
- RING_GET_REQUEST(&xnfp
->xnf_rx_ring
, 0));
1913 ASSERT(xnfp
->xnf_rx_pkt_info
[hang_ix
] == NULL
);
1915 reqp
->id
= bdesc
->id
= hang_ix
;
1916 reqp
->gref
= bdesc
->grant_ref
;
1918 xnfp
->xnf_rx_pkt_info
[hang_ix
] = bdesc
;
1919 xnfp
->xnf_rx_ring
.req_prod_pvt
++;
1921 xnfp
->xnf_rx_new_buffers_posted
= B_TRUE
;
1925 * Collect packets from the RX ring, storing them in `xnfp' for later
1929 xnf_rx_collect(xnf_t
*xnfp
)
1931 mblk_t
*head
, *tail
;
1933 ASSERT(MUTEX_HELD(&xnfp
->xnf_rxlock
));
1936 * Loop over unconsumed responses:
1938 * 2. take corresponding buffer off recv. ring
1939 * 3. indicate this by setting slot to NULL
1940 * 4. create a new message and
1941 * 5. copy data in, adjust ptr
1946 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp
->xnf_rx_ring
)) {
1947 netif_rx_response_t
*rxpkt
;
1952 boolean_t hwcsum
= B_FALSE
;
1956 rxpkt
= RING_GET_RESPONSE(&xnfp
->xnf_rx_ring
,
1957 xnfp
->xnf_rx_ring
.rsp_cons
);
1959 DTRACE_PROBE4(xnf_rx_got_rsp
, int, (int)rxpkt
->id
,
1960 int, (int)rxpkt
->offset
,
1961 int, (int)rxpkt
->flags
,
1962 int, (int)rxpkt
->status
);
1967 bdesc
= xnfp
->xnf_rx_pkt_info
[rxpkt
->id
];
1972 xnfp
->xnf_rx_pkt_info
[rxpkt
->id
] = NULL
;
1973 ASSERT(bdesc
->id
== rxpkt
->id
);
1975 ref
= bdesc
->grant_ref
;
1976 off
= rxpkt
->offset
;
1977 len
= rxpkt
->status
;
1979 if (!xnfp
->xnf_running
) {
1980 DTRACE_PROBE4(xnf_rx_not_running
,
1982 char *, bdesc
->buf
, int, rxpkt
->offset
,
1983 char *, ((char *)bdesc
->buf
) + rxpkt
->offset
);
1985 xnfp
->xnf_stat_drop
++;
1987 } else if (len
<= 0) {
1988 DTRACE_PROBE4(xnf_rx_pkt_status_negative
,
1990 char *, bdesc
->buf
, int, rxpkt
->offset
,
1991 char *, ((char *)bdesc
->buf
) + rxpkt
->offset
);
1993 xnfp
->xnf_stat_errrx
++;
1997 xnfp
->xnf_stat_runt
++;
1999 case NETIF_RSP_ERROR
:
2000 xnfp
->xnf_stat_mac_rcv_error
++;
2002 case NETIF_RSP_DROPPED
:
2003 xnfp
->xnf_stat_norxbuf
++;
2007 } else if (bdesc
->grant_ref
== INVALID_GRANT_REF
) {
2008 cmn_err(CE_WARN
, "Bad rx grant reference %d "
2009 "from domain %d", ref
,
2010 xvdi_get_oeid(xnfp
->xnf_devinfo
));
2012 } else if ((off
+ len
) > PAGESIZE
) {
2013 cmn_err(CE_WARN
, "Rx packet overflows page "
2014 "(offset %ld, length %ld) from domain %d",
2015 off
, len
, xvdi_get_oeid(xnfp
->xnf_devinfo
));
2017 xnf_buf_t
*nbuf
= NULL
;
2019 DTRACE_PROBE4(xnf_rx_packet
, int, len
,
2020 char *, bdesc
->buf
, int, off
,
2021 char *, ((char *)bdesc
->buf
) + off
);
2023 ASSERT(off
+ len
<= PAGEOFFSET
);
2025 if (rxpkt
->flags
& NETRXF_data_validated
)
2029 * If the packet is below a pre-determined
2030 * size we will copy data out rather than
2033 if (len
> xnf_rx_copy_limit
)
2034 nbuf
= xnf_buf_get(xnfp
, KM_NOSLEEP
, B_FALSE
);
2037 * If we have a replacement buffer, attempt to
2038 * wrap the existing one with an mblk_t in
2039 * order that the upper layers of the stack
2040 * might use it directly.
2043 mp
= desballoc((unsigned char *)bdesc
->buf
,
2044 bdesc
->len
, 0, &bdesc
->free_rtn
);
2046 xnfp
->xnf_stat_rx_desballoc_fail
++;
2047 xnfp
->xnf_stat_norxbuf
++;
2049 xnf_buf_put(xnfp
, nbuf
, B_FALSE
);
2052 mp
->b_rptr
= mp
->b_rptr
+ off
;
2053 mp
->b_wptr
= mp
->b_rptr
+ len
;
2056 * Release the grant reference
2057 * associated with this buffer
2058 * - they are scarce and the
2059 * upper layers of the stack
2062 (void) gnttab_end_foreign_access_ref(
2063 bdesc
->grant_ref
, 0);
2064 gref_put(xnfp
, bdesc
->grant_ref
);
2065 bdesc
->grant_ref
= INVALID_GRANT_REF
;
2073 * No replacement buffer allocated -
2074 * attempt to copy the data out and
2075 * re-hang the existing buffer.
2079 mp
= allocb(len
, BPRI_MED
);
2081 xnfp
->xnf_stat_rx_allocb_fail
++;
2082 xnfp
->xnf_stat_norxbuf
++;
2085 bcopy(bdesc
->buf
+ off
, mp
->b_wptr
,
2092 /* Re-hang the buffer. */
2093 xnf_rxbuf_hang(xnfp
, bdesc
);
2098 * If the peer says that the data has
2099 * been validated then we declare that
2100 * the full checksum has been
2103 * We don't look at the "checksum
2104 * blank" flag, and hence could have a
2105 * packet here that we are asserting
2106 * is good with a blank checksum.
2108 mac_hcksum_set(mp
, 0, 0, 0, 0,
2110 xnfp
->xnf_stat_rx_cksum_no_need
++;
2113 ASSERT(tail
== NULL
);
2117 ASSERT(tail
!= NULL
);
2123 ASSERT(mp
->b_next
== NULL
);
2125 xnfp
->xnf_stat_ipackets
++;
2126 xnfp
->xnf_stat_rbytes
+= len
;
2129 xnfp
->xnf_rx_ring
.rsp_cons
++;
2133 * Store the mblks we have collected.
2136 ASSERT(tail
!= NULL
);
2138 if (xnfp
->xnf_rx_head
== NULL
) {
2139 ASSERT(xnfp
->xnf_rx_tail
== NULL
);
2141 xnfp
->xnf_rx_head
= head
;
2143 ASSERT(xnfp
->xnf_rx_tail
!= NULL
);
2145 xnfp
->xnf_rx_tail
->b_next
= head
;
2147 xnfp
->xnf_rx_tail
= tail
;
2152 * xnf_alloc_dma_resources() -- initialize the drivers structures
2155 xnf_alloc_dma_resources(xnf_t
*xnfp
)
2157 dev_info_t
*devinfo
= xnfp
->xnf_devinfo
;
2159 ddi_dma_cookie_t dma_cookie
;
2165 * The code below allocates all the DMA data structures that
2166 * need to be released when the driver is detached.
2168 * Allocate page for the transmit descriptor ring.
2170 if (ddi_dma_alloc_handle(devinfo
, &ringbuf_dma_attr
,
2171 DDI_DMA_SLEEP
, 0, &xnfp
->xnf_tx_ring_dma_handle
) != DDI_SUCCESS
)
2174 if (ddi_dma_mem_alloc(xnfp
->xnf_tx_ring_dma_handle
,
2175 PAGESIZE
, &accattr
, DDI_DMA_CONSISTENT
,
2176 DDI_DMA_SLEEP
, 0, &rptr
, &len
,
2177 &xnfp
->xnf_tx_ring_dma_acchandle
) != DDI_SUCCESS
) {
2178 ddi_dma_free_handle(&xnfp
->xnf_tx_ring_dma_handle
);
2179 xnfp
->xnf_tx_ring_dma_handle
= NULL
;
2183 if ((rc
= ddi_dma_addr_bind_handle(xnfp
->xnf_tx_ring_dma_handle
, NULL
,
2184 rptr
, PAGESIZE
, DDI_DMA_RDWR
| DDI_DMA_CONSISTENT
,
2185 DDI_DMA_SLEEP
, 0, &dma_cookie
, &ncookies
)) != DDI_DMA_MAPPED
) {
2186 ddi_dma_mem_free(&xnfp
->xnf_tx_ring_dma_acchandle
);
2187 ddi_dma_free_handle(&xnfp
->xnf_tx_ring_dma_handle
);
2188 xnfp
->xnf_tx_ring_dma_handle
= NULL
;
2189 xnfp
->xnf_tx_ring_dma_acchandle
= NULL
;
2190 if (rc
== DDI_DMA_NORESOURCES
)
2196 ASSERT(ncookies
== 1);
2197 bzero(rptr
, PAGESIZE
);
2198 /* LINTED: constant in conditional context */
2199 SHARED_RING_INIT((netif_tx_sring_t
*)rptr
);
2200 /* LINTED: constant in conditional context */
2201 FRONT_RING_INIT(&xnfp
->xnf_tx_ring
, (netif_tx_sring_t
*)rptr
, PAGESIZE
);
2202 xnfp
->xnf_tx_ring_phys_addr
= dma_cookie
.dmac_laddress
;
2205 * Allocate page for the receive descriptor ring.
2207 if (ddi_dma_alloc_handle(devinfo
, &ringbuf_dma_attr
,
2208 DDI_DMA_SLEEP
, 0, &xnfp
->xnf_rx_ring_dma_handle
) != DDI_SUCCESS
)
2211 if (ddi_dma_mem_alloc(xnfp
->xnf_rx_ring_dma_handle
,
2212 PAGESIZE
, &accattr
, DDI_DMA_CONSISTENT
,
2213 DDI_DMA_SLEEP
, 0, &rptr
, &len
,
2214 &xnfp
->xnf_rx_ring_dma_acchandle
) != DDI_SUCCESS
) {
2215 ddi_dma_free_handle(&xnfp
->xnf_rx_ring_dma_handle
);
2216 xnfp
->xnf_rx_ring_dma_handle
= NULL
;
2220 if ((rc
= ddi_dma_addr_bind_handle(xnfp
->xnf_rx_ring_dma_handle
, NULL
,
2221 rptr
, PAGESIZE
, DDI_DMA_RDWR
| DDI_DMA_CONSISTENT
,
2222 DDI_DMA_SLEEP
, 0, &dma_cookie
, &ncookies
)) != DDI_DMA_MAPPED
) {
2223 ddi_dma_mem_free(&xnfp
->xnf_rx_ring_dma_acchandle
);
2224 ddi_dma_free_handle(&xnfp
->xnf_rx_ring_dma_handle
);
2225 xnfp
->xnf_rx_ring_dma_handle
= NULL
;
2226 xnfp
->xnf_rx_ring_dma_acchandle
= NULL
;
2227 if (rc
== DDI_DMA_NORESOURCES
)
2233 ASSERT(ncookies
== 1);
2234 bzero(rptr
, PAGESIZE
);
2235 /* LINTED: constant in conditional context */
2236 SHARED_RING_INIT((netif_rx_sring_t
*)rptr
);
2237 /* LINTED: constant in conditional context */
2238 FRONT_RING_INIT(&xnfp
->xnf_rx_ring
, (netif_rx_sring_t
*)rptr
, PAGESIZE
);
2239 xnfp
->xnf_rx_ring_phys_addr
= dma_cookie
.dmac_laddress
;
2241 return (DDI_SUCCESS
);
2244 cmn_err(CE_WARN
, "xnf%d: could not allocate enough DMA memory",
2245 ddi_get_instance(xnfp
->xnf_devinfo
));
2247 xnf_release_dma_resources(xnfp
);
2248 return (DDI_FAILURE
);
2252 * Release all DMA resources in the opposite order from acquisition
2255 xnf_release_dma_resources(xnf_t
*xnfp
)
2260 * Free receive buffers which are currently associated with
2263 mutex_enter(&xnfp
->xnf_rxlock
);
2264 for (i
= 0; i
< NET_RX_RING_SIZE
; i
++) {
2267 if ((bp
= xnfp
->xnf_rx_pkt_info
[i
]) == NULL
)
2269 xnfp
->xnf_rx_pkt_info
[i
] = NULL
;
2270 xnf_buf_put(xnfp
, bp
, B_FALSE
);
2272 mutex_exit(&xnfp
->xnf_rxlock
);
2274 /* Free the receive ring buffer. */
2275 if (xnfp
->xnf_rx_ring_dma_acchandle
!= NULL
) {
2276 (void) ddi_dma_unbind_handle(xnfp
->xnf_rx_ring_dma_handle
);
2277 ddi_dma_mem_free(&xnfp
->xnf_rx_ring_dma_acchandle
);
2278 ddi_dma_free_handle(&xnfp
->xnf_rx_ring_dma_handle
);
2279 xnfp
->xnf_rx_ring_dma_acchandle
= NULL
;
2281 /* Free the transmit ring buffer. */
2282 if (xnfp
->xnf_tx_ring_dma_acchandle
!= NULL
) {
2283 (void) ddi_dma_unbind_handle(xnfp
->xnf_tx_ring_dma_handle
);
2284 ddi_dma_mem_free(&xnfp
->xnf_tx_ring_dma_acchandle
);
2285 ddi_dma_free_handle(&xnfp
->xnf_tx_ring_dma_handle
);
2286 xnfp
->xnf_tx_ring_dma_acchandle
= NULL
;
2292 * Release any packets and associated structures used by the TX ring.
2295 xnf_release_mblks(xnf_t
*xnfp
)
2300 for (i
= 0, tidp
= &xnfp
->xnf_tx_pkt_id
[0];
2301 i
< NET_TX_RING_SIZE
;
2303 xnf_txbuf_t
*txp
= tidp
->txbuf
;
2306 ASSERT(txp
->tx_mp
!= NULL
);
2307 freemsg(txp
->tx_mp
);
2309 txid_put(xnfp
, tidp
);
2310 kmem_cache_free(xnfp
->xnf_tx_buf_cache
, txp
);
2316 xnf_buf_constructor(void *buf
, void *arg
, int kmflag
)
2318 int (*ddiflags
)(caddr_t
) = DDI_DMA_SLEEP
;
2319 xnf_buf_t
*bdesc
= buf
;
2321 ddi_dma_cookie_t dma_cookie
;
2325 if (kmflag
& KM_NOSLEEP
)
2326 ddiflags
= DDI_DMA_DONTWAIT
;
2328 /* Allocate a DMA access handle for the buffer. */
2329 if (ddi_dma_alloc_handle(xnfp
->xnf_devinfo
, &buf_dma_attr
,
2330 ddiflags
, 0, &bdesc
->dma_handle
) != DDI_SUCCESS
)
2333 /* Allocate DMA-able memory for buffer. */
2334 if (ddi_dma_mem_alloc(bdesc
->dma_handle
,
2335 PAGESIZE
, &data_accattr
, DDI_DMA_STREAMING
, ddiflags
, 0,
2336 &bdesc
->buf
, &len
, &bdesc
->acc_handle
) != DDI_SUCCESS
)
2339 /* Bind to virtual address of buffer to get physical address. */
2340 if (ddi_dma_addr_bind_handle(bdesc
->dma_handle
, NULL
,
2341 bdesc
->buf
, len
, DDI_DMA_RDWR
| DDI_DMA_STREAMING
,
2342 ddiflags
, 0, &dma_cookie
, &ncookies
) != DDI_DMA_MAPPED
)
2344 ASSERT(ncookies
== 1);
2346 bdesc
->free_rtn
.free_func
= xnf_buf_recycle
;
2347 bdesc
->free_rtn
.free_arg
= (caddr_t
)bdesc
;
2349 bdesc
->buf_phys
= dma_cookie
.dmac_laddress
;
2350 bdesc
->buf_mfn
= pfn_to_mfn(xnf_btop(bdesc
->buf_phys
));
2351 bdesc
->len
= dma_cookie
.dmac_size
;
2352 bdesc
->grant_ref
= INVALID_GRANT_REF
;
2353 bdesc
->gen
= xnfp
->xnf_gen
;
2355 atomic_inc_64(&xnfp
->xnf_stat_buf_allocated
);
2360 ddi_dma_mem_free(&bdesc
->acc_handle
);
2363 ddi_dma_free_handle(&bdesc
->dma_handle
);
2367 ASSERT(kmflag
& KM_NOSLEEP
); /* Cannot fail for KM_SLEEP. */
2372 xnf_buf_destructor(void *buf
, void *arg
)
2374 xnf_buf_t
*bdesc
= buf
;
2377 (void) ddi_dma_unbind_handle(bdesc
->dma_handle
);
2378 ddi_dma_mem_free(&bdesc
->acc_handle
);
2379 ddi_dma_free_handle(&bdesc
->dma_handle
);
2381 atomic_dec_64(&xnfp
->xnf_stat_buf_allocated
);
2385 xnf_buf_get(xnf_t
*xnfp
, int flags
, boolean_t readonly
)
2391 * Usually grant references are more scarce than memory, so we
2392 * attempt to acquire a grant reference first.
2394 gref
= gref_get(xnfp
);
2395 if (gref
== INVALID_GRANT_REF
)
2398 bufp
= kmem_cache_alloc(xnfp
->xnf_buf_cache
, flags
);
2400 gref_put(xnfp
, gref
);
2404 ASSERT(bufp
->grant_ref
== INVALID_GRANT_REF
);
2406 bufp
->grant_ref
= gref
;
2408 if (bufp
->gen
!= xnfp
->xnf_gen
)
2409 xnf_buf_refresh(bufp
);
2411 gnttab_grant_foreign_access_ref(bufp
->grant_ref
,
2412 xvdi_get_oeid(bufp
->xnfp
->xnf_devinfo
),
2413 bufp
->buf_mfn
, readonly
? 1 : 0);
2415 atomic_inc_64(&xnfp
->xnf_stat_buf_outstanding
);
2421 xnf_buf_put(xnf_t
*xnfp
, xnf_buf_t
*bufp
, boolean_t readonly
)
2423 if (bufp
->grant_ref
!= INVALID_GRANT_REF
) {
2424 (void) gnttab_end_foreign_access_ref(
2425 bufp
->grant_ref
, readonly
? 1 : 0);
2426 gref_put(xnfp
, bufp
->grant_ref
);
2427 bufp
->grant_ref
= INVALID_GRANT_REF
;
2430 kmem_cache_free(xnfp
->xnf_buf_cache
, bufp
);
2432 atomic_dec_64(&xnfp
->xnf_stat_buf_outstanding
);
2436 * Refresh any cached data about a buffer after resume.
2439 xnf_buf_refresh(xnf_buf_t
*bdesc
)
2441 bdesc
->buf_mfn
= pfn_to_mfn(xnf_btop(bdesc
->buf_phys
));
2442 bdesc
->gen
= bdesc
->xnfp
->xnf_gen
;
2446 * Streams `freeb' routine for `xnf_buf_t' when used as transmit
2447 * look-aside buffers.
2450 xnf_buf_recycle(xnf_buf_t
*bdesc
)
2452 xnf_t
*xnfp
= bdesc
->xnfp
;
2454 xnf_buf_put(xnfp
, bdesc
, B_TRUE
);
2458 xnf_tx_buf_constructor(void *buf
, void *arg
, int kmflag
)
2460 int (*ddiflags
)(caddr_t
) = DDI_DMA_SLEEP
;
2461 xnf_txbuf_t
*txp
= buf
;
2464 if (kmflag
& KM_NOSLEEP
)
2465 ddiflags
= DDI_DMA_DONTWAIT
;
2467 if (ddi_dma_alloc_handle(xnfp
->xnf_devinfo
, &buf_dma_attr
,
2468 ddiflags
, 0, &txp
->tx_dma_handle
) != DDI_SUCCESS
) {
2469 ASSERT(kmflag
& KM_NOSLEEP
); /* Cannot fail for KM_SLEEP. */
2477 xnf_tx_buf_destructor(void *buf
, void *arg
)
2479 _NOTE(ARGUNUSED(arg
));
2480 xnf_txbuf_t
*txp
= buf
;
2482 ddi_dma_free_handle(&txp
->tx_dma_handle
);
2488 static char *xnf_aux_statistics
[] = {
2489 "tx_cksum_deferred",
2492 "unclaimed_interrupts",
2502 "rx_desballoc_fail",
2506 xnf_kstat_aux_update(kstat_t
*ksp
, int flag
)
2511 if (flag
!= KSTAT_READ
)
2514 xnfp
= ksp
->ks_private
;
2518 * Assignment order must match that of the names in
2519 * xnf_aux_statistics.
2521 (knp
++)->value
.ui64
= xnfp
->xnf_stat_tx_cksum_deferred
;
2522 (knp
++)->value
.ui64
= xnfp
->xnf_stat_rx_cksum_no_need
;
2524 (knp
++)->value
.ui64
= xnfp
->xnf_stat_interrupts
;
2525 (knp
++)->value
.ui64
= xnfp
->xnf_stat_unclaimed_interrupts
;
2526 (knp
++)->value
.ui64
= xnfp
->xnf_stat_tx_pullup
;
2527 (knp
++)->value
.ui64
= xnfp
->xnf_stat_tx_pagebndry
;
2528 (knp
++)->value
.ui64
= xnfp
->xnf_stat_tx_attempt
;
2530 (knp
++)->value
.ui64
= xnfp
->xnf_stat_buf_allocated
;
2531 (knp
++)->value
.ui64
= xnfp
->xnf_stat_buf_outstanding
;
2532 (knp
++)->value
.ui64
= xnfp
->xnf_stat_gref_outstanding
;
2533 (knp
++)->value
.ui64
= xnfp
->xnf_stat_gref_failure
;
2534 (knp
++)->value
.ui64
= xnfp
->xnf_stat_gref_peak
;
2535 (knp
++)->value
.ui64
= xnfp
->xnf_stat_rx_allocb_fail
;
2536 (knp
++)->value
.ui64
= xnfp
->xnf_stat_rx_desballoc_fail
;
2542 xnf_kstat_init(xnf_t
*xnfp
)
2544 int nstat
= sizeof (xnf_aux_statistics
) /
2545 sizeof (xnf_aux_statistics
[0]);
2546 char **cp
= xnf_aux_statistics
;
2550 * Create and initialise kstats.
2552 if ((xnfp
->xnf_kstat_aux
= kstat_create("xnf",
2553 ddi_get_instance(xnfp
->xnf_devinfo
),
2554 "aux_statistics", "net", KSTAT_TYPE_NAMED
,
2558 xnfp
->xnf_kstat_aux
->ks_private
= xnfp
;
2559 xnfp
->xnf_kstat_aux
->ks_update
= xnf_kstat_aux_update
;
2561 knp
= xnfp
->xnf_kstat_aux
->ks_data
;
2563 kstat_named_init(knp
, *cp
, KSTAT_DATA_UINT64
);
2570 kstat_install(xnfp
->xnf_kstat_aux
);
2576 xnf_stat(void *arg
, uint_t stat
, uint64_t *val
)
2580 mutex_enter(&xnfp
->xnf_rxlock
);
2581 mutex_enter(&xnfp
->xnf_txlock
);
2583 #define mac_stat(q, r) \
2584 case (MAC_STAT_##q): \
2585 *val = xnfp->xnf_stat_##r; \
2588 #define ether_stat(q, r) \
2589 case (ETHER_STAT_##q): \
2590 *val = xnfp->xnf_stat_##r; \
2595 mac_stat(IPACKETS
, ipackets
);
2596 mac_stat(OPACKETS
, opackets
);
2597 mac_stat(RBYTES
, rbytes
);
2598 mac_stat(OBYTES
, obytes
);
2599 mac_stat(NORCVBUF
, norxbuf
);
2600 mac_stat(IERRORS
, errrx
);
2601 mac_stat(NOXMTBUF
, tx_defer
);
2603 ether_stat(MACRCV_ERRORS
, mac_rcv_error
);
2604 ether_stat(TOOSHORT_ERRORS
, runt
);
2606 /* always claim to be in full duplex mode */
2607 case ETHER_STAT_LINK_DUPLEX
:
2608 *val
= LINK_DUPLEX_FULL
;
2611 /* always claim to be at 1Gb/s link speed */
2612 case MAC_STAT_IFSPEED
:
2613 *val
= 1000000000ull;
2617 mutex_exit(&xnfp
->xnf_txlock
);
2618 mutex_exit(&xnfp
->xnf_rxlock
);
2626 mutex_exit(&xnfp
->xnf_txlock
);
2627 mutex_exit(&xnfp
->xnf_rxlock
);
2633 xnf_getcapab(void *arg
, mac_capab_t cap
, void *cap_data
)
2635 _NOTE(ARGUNUSED(arg
));
2638 case MAC_CAPAB_HCKSUM
: {
2639 uint32_t *capab
= cap_data
;
2642 * Whilst the flag used to communicate with the IO
2643 * domain is called "NETTXF_csum_blank", the checksum
2644 * in the packet must contain the pseudo-header
2645 * checksum and not zero.
2647 * To help out the IO domain, we might use
2648 * HCKSUM_INET_PARTIAL. Unfortunately our stack will
2649 * then use checksum offload for IPv6 packets, which
2650 * the IO domain can't handle.
2652 * As a result, we declare outselves capable of
2653 * HCKSUM_INET_FULL_V4. This means that we receive
2654 * IPv4 packets from the stack with a blank checksum
2655 * field and must insert the pseudo-header checksum
2656 * before passing the packet to the IO domain.
2658 *capab
= HCKSUM_INET_FULL_V4
;
2669 * The state of the peer has changed - react accordingly.
2672 oe_state_change(dev_info_t
*dip
, ddi_eventcookie_t id
,
2673 void *arg
, void *impl_data
)
2675 _NOTE(ARGUNUSED(id
, arg
));
2676 xnf_t
*xnfp
= ddi_get_driver_private(dip
);
2677 XenbusState new_state
= *(XenbusState
*)impl_data
;
2679 ASSERT(xnfp
!= NULL
);
2681 switch (new_state
) {
2682 case XenbusStateUnknown
:
2683 case XenbusStateInitialising
:
2684 case XenbusStateInitialised
:
2685 case XenbusStateClosing
:
2686 case XenbusStateClosed
:
2687 case XenbusStateReconfiguring
:
2688 case XenbusStateReconfigured
:
2691 case XenbusStateInitWait
:
2692 xnf_read_config(xnfp
);
2694 if (!xnfp
->xnf_be_rx_copy
) {
2696 "The xnf driver requires a dom0 that "
2697 "supports 'feature-rx-copy'.");
2698 (void) xvdi_switch_state(xnfp
->xnf_devinfo
,
2699 XBT_NULL
, XenbusStateClosed
);
2704 * Connect to the backend.
2706 xnf_be_connect(xnfp
);
2709 * Our MAC address as discovered by xnf_read_config().
2711 mac_unicst_update(xnfp
->xnf_mh
, xnfp
->xnf_mac_addr
);
2715 case XenbusStateConnected
:
2716 mutex_enter(&xnfp
->xnf_rxlock
);
2717 mutex_enter(&xnfp
->xnf_txlock
);
2719 xnfp
->xnf_connected
= B_TRUE
;
2721 * Wake up any threads waiting to send data to
2724 cv_broadcast(&xnfp
->xnf_cv_state
);
2726 mutex_exit(&xnfp
->xnf_txlock
);
2727 mutex_exit(&xnfp
->xnf_rxlock
);
2730 * Kick the peer in case it missed any transmits
2731 * request in the TX ring.
2733 ec_notify_via_evtchn(xnfp
->xnf_evtchn
);
2736 * There may already be completed receive requests in
2737 * the ring sent by backend after it gets connected
2738 * but before we see its state change here, so we call
2739 * xnf_intr() to handle them, if any.
2741 (void) xnf_intr((caddr_t
)xnfp
);
2744 * Mark the link up now that we are connected.
2746 mac_link_update(xnfp
->xnf_mh
, LINK_STATE_UP
);
2749 * Tell the backend about the multicast addresses in
2750 * which we are interested.
2752 mac_multicast_refresh(xnfp
->xnf_mh
, NULL
, xnfp
, B_TRUE
);