2 * Back-end of the driver for virtual network devices. This portion of the
3 * driver exports a 'unified' network-device interface that can be accessed
4 * by any operating system that implements a compatible front end. A
5 * reference front-end implementation can be found in:
6 * drivers/net/xen-netfront.c
8 * Copyright (c) 2002-2005, K A Fraser
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
37 #include <linux/kthread.h>
38 #include <linux/if_vlan.h>
39 #include <linux/udp.h>
40 #include <linux/highmem.h>
45 #include <xen/events.h>
46 #include <xen/interface/memory.h>
49 #include <asm/xen/hypercall.h>
51 /* Provide an option to disable split event channels at load time as
52 * event channels are limited resource. Split event channels are
55 bool separate_tx_rx_irq
= true;
56 module_param(separate_tx_rx_irq
, bool, 0644);
58 /* The time that packets can stay on the guest Rx internal queue
59 * before they are dropped.
61 unsigned int rx_drain_timeout_msecs
= 10000;
62 module_param(rx_drain_timeout_msecs
, uint
, 0444);
64 /* The length of time before the frontend is considered unresponsive
65 * because it isn't providing Rx slots.
67 unsigned int rx_stall_timeout_msecs
= 60000;
68 module_param(rx_stall_timeout_msecs
, uint
, 0444);
70 unsigned int xenvif_max_queues
;
71 module_param_named(max_queues
, xenvif_max_queues
, uint
, 0644);
72 MODULE_PARM_DESC(max_queues
,
73 "Maximum number of queues per virtual interface");
76 * This is the maximum slots a skb can have. If a guest sends a skb
77 * which exceeds this limit it is considered malicious.
79 #define FATAL_SKB_SLOTS_DEFAULT 20
80 static unsigned int fatal_skb_slots
= FATAL_SKB_SLOTS_DEFAULT
;
81 module_param(fatal_skb_slots
, uint
, 0444);
83 /* The amount to copy out of the first guest Tx slot into the skb's
84 * linear area. If the first slot has more data, it will be mapped
85 * and put into the first frag.
87 * This is sized to avoid pulling headers from the frags for most
90 #define XEN_NETBACK_TX_COPY_LEN 128
92 /* This is the maximum number of flows in the hash cache. */
93 #define XENVIF_HASH_CACHE_SIZE_DEFAULT 64
94 unsigned int xenvif_hash_cache_size
= XENVIF_HASH_CACHE_SIZE_DEFAULT
;
95 module_param_named(hash_cache_size
, xenvif_hash_cache_size
, uint
, 0644);
96 MODULE_PARM_DESC(hash_cache_size
, "Number of flows in the hash cache");
98 static void xenvif_idx_release(struct xenvif_queue
*queue
, u16 pending_idx
,
101 static void make_tx_response(struct xenvif_queue
*queue
,
102 struct xen_netif_tx_request
*txp
,
103 unsigned int extra_count
,
105 static void push_tx_responses(struct xenvif_queue
*queue
);
107 static inline int tx_work_todo(struct xenvif_queue
*queue
);
109 static struct xen_netif_rx_response
*make_rx_response(struct xenvif_queue
*queue
,
116 static inline unsigned long idx_to_pfn(struct xenvif_queue
*queue
,
119 return page_to_pfn(queue
->mmap_pages
[idx
]);
122 static inline unsigned long idx_to_kaddr(struct xenvif_queue
*queue
,
125 return (unsigned long)pfn_to_kaddr(idx_to_pfn(queue
, idx
));
128 #define callback_param(vif, pending_idx) \
129 (vif->pending_tx_info[pending_idx].callback_struct)
131 /* Find the containing VIF's structure from a pointer in pending_tx_info array
133 static inline struct xenvif_queue
*ubuf_to_queue(const struct ubuf_info
*ubuf
)
135 u16 pending_idx
= ubuf
->desc
;
136 struct pending_tx_info
*temp
=
137 container_of(ubuf
, struct pending_tx_info
, callback_struct
);
138 return container_of(temp
- pending_idx
,
143 static u16
frag_get_pending_idx(skb_frag_t
*frag
)
145 return (u16
)frag
->page_offset
;
148 static void frag_set_pending_idx(skb_frag_t
*frag
, u16 pending_idx
)
150 frag
->page_offset
= pending_idx
;
153 static inline pending_ring_idx_t
pending_index(unsigned i
)
155 return i
& (MAX_PENDING_REQS
-1);
158 static bool xenvif_rx_ring_slots_available(struct xenvif_queue
*queue
)
164 skb
= skb_peek(&queue
->rx_queue
);
168 needed
= DIV_ROUND_UP(skb
->len
, XEN_PAGE_SIZE
);
175 prod
= queue
->rx
.sring
->req_prod
;
176 cons
= queue
->rx
.req_cons
;
178 if (prod
- cons
>= needed
)
181 queue
->rx
.sring
->req_event
= prod
+ 1;
183 /* Make sure event is visible before we check prod
187 } while (queue
->rx
.sring
->req_prod
!= prod
);
192 void xenvif_rx_queue_tail(struct xenvif_queue
*queue
, struct sk_buff
*skb
)
196 spin_lock_irqsave(&queue
->rx_queue
.lock
, flags
);
198 __skb_queue_tail(&queue
->rx_queue
, skb
);
200 queue
->rx_queue_len
+= skb
->len
;
201 if (queue
->rx_queue_len
> queue
->rx_queue_max
)
202 netif_tx_stop_queue(netdev_get_tx_queue(queue
->vif
->dev
, queue
->id
));
204 spin_unlock_irqrestore(&queue
->rx_queue
.lock
, flags
);
207 static struct sk_buff
*xenvif_rx_dequeue(struct xenvif_queue
*queue
)
211 spin_lock_irq(&queue
->rx_queue
.lock
);
213 skb
= __skb_dequeue(&queue
->rx_queue
);
215 queue
->rx_queue_len
-= skb
->len
;
217 spin_unlock_irq(&queue
->rx_queue
.lock
);
222 static void xenvif_rx_queue_maybe_wake(struct xenvif_queue
*queue
)
224 spin_lock_irq(&queue
->rx_queue
.lock
);
226 if (queue
->rx_queue_len
< queue
->rx_queue_max
)
227 netif_tx_wake_queue(netdev_get_tx_queue(queue
->vif
->dev
, queue
->id
));
229 spin_unlock_irq(&queue
->rx_queue
.lock
);
233 static void xenvif_rx_queue_purge(struct xenvif_queue
*queue
)
236 while ((skb
= xenvif_rx_dequeue(queue
)) != NULL
)
240 static void xenvif_rx_queue_drop_expired(struct xenvif_queue
*queue
)
245 skb
= skb_peek(&queue
->rx_queue
);
248 if (time_before(jiffies
, XENVIF_RX_CB(skb
)->expires
))
250 xenvif_rx_dequeue(queue
);
255 struct netrx_pending_operations
{
256 unsigned copy_prod
, copy_cons
;
257 unsigned meta_prod
, meta_cons
;
258 struct gnttab_copy
*copy
;
259 struct xenvif_rx_meta
*meta
;
261 grant_ref_t copy_gref
;
264 static struct xenvif_rx_meta
*get_next_rx_buffer(struct xenvif_queue
*queue
,
265 struct netrx_pending_operations
*npo
)
267 struct xenvif_rx_meta
*meta
;
268 struct xen_netif_rx_request req
;
270 RING_COPY_REQUEST(&queue
->rx
, queue
->rx
.req_cons
++, &req
);
272 meta
= npo
->meta
+ npo
->meta_prod
++;
273 meta
->gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
279 npo
->copy_gref
= req
.gref
;
284 struct gop_frag_copy
{
285 struct xenvif_queue
*queue
;
286 struct netrx_pending_operations
*npo
;
287 struct xenvif_rx_meta
*meta
;
296 static void xenvif_setup_copy_gop(unsigned long gfn
,
299 struct gop_frag_copy
*info
)
301 struct gnttab_copy
*copy_gop
;
302 struct xen_page_foreign
*foreign
;
303 /* Convenient aliases */
304 struct xenvif_queue
*queue
= info
->queue
;
305 struct netrx_pending_operations
*npo
= info
->npo
;
306 struct page
*page
= info
->page
;
308 BUG_ON(npo
->copy_off
> MAX_BUFFER_OFFSET
);
310 if (npo
->copy_off
== MAX_BUFFER_OFFSET
)
311 info
->meta
= get_next_rx_buffer(queue
, npo
);
313 if (npo
->copy_off
+ *len
> MAX_BUFFER_OFFSET
)
314 *len
= MAX_BUFFER_OFFSET
- npo
->copy_off
;
316 copy_gop
= npo
->copy
+ npo
->copy_prod
++;
317 copy_gop
->flags
= GNTCOPY_dest_gref
;
318 copy_gop
->len
= *len
;
320 foreign
= xen_page_foreign(page
);
322 copy_gop
->source
.domid
= foreign
->domid
;
323 copy_gop
->source
.u
.ref
= foreign
->gref
;
324 copy_gop
->flags
|= GNTCOPY_source_gref
;
326 copy_gop
->source
.domid
= DOMID_SELF
;
327 copy_gop
->source
.u
.gmfn
= gfn
;
329 copy_gop
->source
.offset
= offset
;
331 copy_gop
->dest
.domid
= queue
->vif
->domid
;
332 copy_gop
->dest
.offset
= npo
->copy_off
;
333 copy_gop
->dest
.u
.ref
= npo
->copy_gref
;
335 npo
->copy_off
+= *len
;
336 info
->meta
->size
+= *len
;
341 /* Leave a gap for the GSO descriptor. */
342 if ((1 << info
->gso_type
) & queue
->vif
->gso_mask
)
343 queue
->rx
.req_cons
++;
345 /* Leave a gap for the hash extra segment. */
346 if (info
->hash_present
)
347 queue
->rx
.req_cons
++;
349 info
->head
= 0; /* There must be something in this buffer now */
352 static void xenvif_gop_frag_copy_grant(unsigned long gfn
,
361 xenvif_setup_copy_gop(gfn
, offset
, &bytes
, data
);
368 * Set up the grant operations for this fragment. If it's a flipping
369 * interface, we also set up the unmap request from here.
371 static void xenvif_gop_frag_copy(struct xenvif_queue
*queue
, struct sk_buff
*skb
,
372 struct netrx_pending_operations
*npo
,
373 struct page
*page
, unsigned long size
,
374 unsigned long offset
, int *head
)
376 struct gop_frag_copy info
= {
380 .gso_type
= XEN_NETIF_GSO_TYPE_NONE
,
381 /* xenvif_set_skb_hash() will have either set a s/w
382 * hash or cleared the hash depending on
383 * whether the the frontend wants a hash for this skb.
385 .hash_present
= skb
->sw_hash
,
389 if (skb_is_gso(skb
)) {
390 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
)
391 info
.gso_type
= XEN_NETIF_GSO_TYPE_TCPV4
;
392 else if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
)
393 info
.gso_type
= XEN_NETIF_GSO_TYPE_TCPV6
;
396 /* Data must not cross a page boundary. */
397 BUG_ON(size
+ offset
> PAGE_SIZE
<<compound_order(page
));
399 info
.meta
= npo
->meta
+ npo
->meta_prod
- 1;
401 /* Skip unused frames from start of page */
402 page
+= offset
>> PAGE_SHIFT
;
403 offset
&= ~PAGE_MASK
;
406 BUG_ON(offset
>= PAGE_SIZE
);
408 bytes
= PAGE_SIZE
- offset
;
413 gnttab_foreach_grant_in_range(page
, offset
, bytes
,
414 xenvif_gop_frag_copy_grant
,
421 BUG_ON(!PageCompound(page
));
430 * Prepare an SKB to be transmitted to the frontend.
432 * This function is responsible for allocating grant operations, meta
435 * It returns the number of meta structures consumed. The number of
436 * ring slots used is always equal to the number of meta slots used
437 * plus the number of GSO descriptors used. Currently, we use either
438 * zero GSO descriptors (for non-GSO packets) or one descriptor (for
439 * frontend-side LRO).
441 static int xenvif_gop_skb(struct sk_buff
*skb
,
442 struct netrx_pending_operations
*npo
,
443 struct xenvif_queue
*queue
)
445 struct xenvif
*vif
= netdev_priv(skb
->dev
);
446 int nr_frags
= skb_shinfo(skb
)->nr_frags
;
448 struct xen_netif_rx_request req
;
449 struct xenvif_rx_meta
*meta
;
455 old_meta_prod
= npo
->meta_prod
;
457 gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
458 if (skb_is_gso(skb
)) {
459 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
)
460 gso_type
= XEN_NETIF_GSO_TYPE_TCPV4
;
461 else if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
)
462 gso_type
= XEN_NETIF_GSO_TYPE_TCPV6
;
465 /* Set up a GSO prefix descriptor, if necessary */
466 if ((1 << gso_type
) & vif
->gso_prefix_mask
) {
467 RING_COPY_REQUEST(&queue
->rx
, queue
->rx
.req_cons
++, &req
);
468 meta
= npo
->meta
+ npo
->meta_prod
++;
469 meta
->gso_type
= gso_type
;
470 meta
->gso_size
= skb_shinfo(skb
)->gso_size
;
475 RING_COPY_REQUEST(&queue
->rx
, queue
->rx
.req_cons
++, &req
);
476 meta
= npo
->meta
+ npo
->meta_prod
++;
478 if ((1 << gso_type
) & vif
->gso_mask
) {
479 meta
->gso_type
= gso_type
;
480 meta
->gso_size
= skb_shinfo(skb
)->gso_size
;
482 meta
->gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
489 npo
->copy_gref
= req
.gref
;
492 while (data
< skb_tail_pointer(skb
)) {
493 unsigned int offset
= offset_in_page(data
);
494 unsigned int len
= PAGE_SIZE
- offset
;
496 if (data
+ len
> skb_tail_pointer(skb
))
497 len
= skb_tail_pointer(skb
) - data
;
499 xenvif_gop_frag_copy(queue
, skb
, npo
,
500 virt_to_page(data
), len
, offset
, &head
);
504 for (i
= 0; i
< nr_frags
; i
++) {
505 xenvif_gop_frag_copy(queue
, skb
, npo
,
506 skb_frag_page(&skb_shinfo(skb
)->frags
[i
]),
507 skb_frag_size(&skb_shinfo(skb
)->frags
[i
]),
508 skb_shinfo(skb
)->frags
[i
].page_offset
,
512 return npo
->meta_prod
- old_meta_prod
;
516 * This is a twin to xenvif_gop_skb. Assume that xenvif_gop_skb was
517 * used to set up the operations on the top of
518 * netrx_pending_operations, which have since been done. Check that
519 * they didn't give any errors and advance over them.
521 static int xenvif_check_gop(struct xenvif
*vif
, int nr_meta_slots
,
522 struct netrx_pending_operations
*npo
)
524 struct gnttab_copy
*copy_op
;
525 int status
= XEN_NETIF_RSP_OKAY
;
528 for (i
= 0; i
< nr_meta_slots
; i
++) {
529 copy_op
= npo
->copy
+ npo
->copy_cons
++;
530 if (copy_op
->status
!= GNTST_okay
) {
532 "Bad status %d from copy to DOM%d.\n",
533 copy_op
->status
, vif
->domid
);
534 status
= XEN_NETIF_RSP_ERROR
;
541 static void xenvif_add_frag_responses(struct xenvif_queue
*queue
, int status
,
542 struct xenvif_rx_meta
*meta
,
546 unsigned long offset
;
548 /* No fragments used */
549 if (nr_meta_slots
<= 1)
554 for (i
= 0; i
< nr_meta_slots
; i
++) {
556 if (i
== nr_meta_slots
- 1)
559 flags
= XEN_NETRXF_more_data
;
562 make_rx_response(queue
, meta
[i
].id
, status
, offset
,
563 meta
[i
].size
, flags
);
567 void xenvif_kick_thread(struct xenvif_queue
*queue
)
572 static void xenvif_rx_action(struct xenvif_queue
*queue
)
574 struct xenvif
*vif
= queue
->vif
;
577 struct xen_netif_rx_response
*resp
;
578 struct sk_buff_head rxq
;
582 unsigned long offset
;
583 bool need_to_notify
= false;
585 struct netrx_pending_operations npo
= {
586 .copy
= queue
->grant_copy_op
,
590 skb_queue_head_init(&rxq
);
592 while (xenvif_rx_ring_slots_available(queue
)
593 && (skb
= xenvif_rx_dequeue(queue
)) != NULL
) {
594 queue
->last_rx_time
= jiffies
;
596 XENVIF_RX_CB(skb
)->meta_slots_used
= xenvif_gop_skb(skb
, &npo
, queue
);
598 __skb_queue_tail(&rxq
, skb
);
601 BUG_ON(npo
.meta_prod
> ARRAY_SIZE(queue
->meta
));
606 BUG_ON(npo
.copy_prod
> MAX_GRANT_COPY_OPS
);
607 gnttab_batch_copy(queue
->grant_copy_op
, npo
.copy_prod
);
609 while ((skb
= __skb_dequeue(&rxq
)) != NULL
) {
610 struct xen_netif_extra_info
*extra
= NULL
;
612 if ((1 << queue
->meta
[npo
.meta_cons
].gso_type
) &
613 vif
->gso_prefix_mask
) {
614 resp
= RING_GET_RESPONSE(&queue
->rx
,
615 queue
->rx
.rsp_prod_pvt
++);
617 resp
->flags
= XEN_NETRXF_gso_prefix
| XEN_NETRXF_more_data
;
619 resp
->offset
= queue
->meta
[npo
.meta_cons
].gso_size
;
620 resp
->id
= queue
->meta
[npo
.meta_cons
].id
;
621 resp
->status
= XENVIF_RX_CB(skb
)->meta_slots_used
;
624 XENVIF_RX_CB(skb
)->meta_slots_used
--;
628 queue
->stats
.tx_bytes
+= skb
->len
;
629 queue
->stats
.tx_packets
++;
631 status
= xenvif_check_gop(vif
,
632 XENVIF_RX_CB(skb
)->meta_slots_used
,
635 if (XENVIF_RX_CB(skb
)->meta_slots_used
== 1)
638 flags
= XEN_NETRXF_more_data
;
640 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) /* local packet? */
641 flags
|= XEN_NETRXF_csum_blank
| XEN_NETRXF_data_validated
;
642 else if (skb
->ip_summed
== CHECKSUM_UNNECESSARY
)
643 /* remote but checksummed. */
644 flags
|= XEN_NETRXF_data_validated
;
647 resp
= make_rx_response(queue
, queue
->meta
[npo
.meta_cons
].id
,
649 queue
->meta
[npo
.meta_cons
].size
,
652 if ((1 << queue
->meta
[npo
.meta_cons
].gso_type
) &
654 extra
= (struct xen_netif_extra_info
*)
655 RING_GET_RESPONSE(&queue
->rx
,
656 queue
->rx
.rsp_prod_pvt
++);
658 resp
->flags
|= XEN_NETRXF_extra_info
;
660 extra
->u
.gso
.type
= queue
->meta
[npo
.meta_cons
].gso_type
;
661 extra
->u
.gso
.size
= queue
->meta
[npo
.meta_cons
].gso_size
;
662 extra
->u
.gso
.pad
= 0;
663 extra
->u
.gso
.features
= 0;
665 extra
->type
= XEN_NETIF_EXTRA_TYPE_GSO
;
670 /* Since the skb got here via xenvif_select_queue()
671 * we know that the hash has been re-calculated
672 * according to a configuration set by the frontend
673 * and therefore we know that it is legitimate to
674 * pass it to the frontend.
676 if (resp
->flags
& XEN_NETRXF_extra_info
)
677 extra
->flags
|= XEN_NETIF_EXTRA_FLAG_MORE
;
679 resp
->flags
|= XEN_NETRXF_extra_info
;
681 extra
= (struct xen_netif_extra_info
*)
682 RING_GET_RESPONSE(&queue
->rx
,
683 queue
->rx
.rsp_prod_pvt
++);
685 extra
->u
.hash
.algorithm
=
686 XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ
;
690 skb
->protocol
== htons(ETH_P_IP
) ?
691 _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP
:
692 _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP
;
695 skb
->protocol
== htons(ETH_P_IP
) ?
696 _XEN_NETIF_CTRL_HASH_TYPE_IPV4
:
697 _XEN_NETIF_CTRL_HASH_TYPE_IPV6
;
699 *(uint32_t *)extra
->u
.hash
.value
=
700 skb_get_hash_raw(skb
);
702 extra
->type
= XEN_NETIF_EXTRA_TYPE_HASH
;
706 xenvif_add_frag_responses(queue
, status
,
707 queue
->meta
+ npo
.meta_cons
+ 1,
708 XENVIF_RX_CB(skb
)->meta_slots_used
);
710 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue
->rx
, ret
);
712 need_to_notify
|= !!ret
;
714 npo
.meta_cons
+= XENVIF_RX_CB(skb
)->meta_slots_used
;
720 notify_remote_via_irq(queue
->rx_irq
);
723 void xenvif_napi_schedule_or_enable_events(struct xenvif_queue
*queue
)
727 RING_FINAL_CHECK_FOR_REQUESTS(&queue
->tx
, more_to_do
);
730 napi_schedule(&queue
->napi
);
733 static void tx_add_credit(struct xenvif_queue
*queue
)
735 unsigned long max_burst
, max_credit
;
738 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
739 * Otherwise the interface can seize up due to insufficient credit.
741 max_burst
= max(131072UL, queue
->credit_bytes
);
743 /* Take care that adding a new chunk of credit doesn't wrap to zero. */
744 max_credit
= queue
->remaining_credit
+ queue
->credit_bytes
;
745 if (max_credit
< queue
->remaining_credit
)
746 max_credit
= ULONG_MAX
; /* wrapped: clamp to ULONG_MAX */
748 queue
->remaining_credit
= min(max_credit
, max_burst
);
751 void xenvif_tx_credit_callback(unsigned long data
)
753 struct xenvif_queue
*queue
= (struct xenvif_queue
*)data
;
754 tx_add_credit(queue
);
755 xenvif_napi_schedule_or_enable_events(queue
);
758 static void xenvif_tx_err(struct xenvif_queue
*queue
,
759 struct xen_netif_tx_request
*txp
,
760 unsigned int extra_count
, RING_IDX end
)
762 RING_IDX cons
= queue
->tx
.req_cons
;
766 spin_lock_irqsave(&queue
->response_lock
, flags
);
767 make_tx_response(queue
, txp
, extra_count
, XEN_NETIF_RSP_ERROR
);
768 push_tx_responses(queue
);
769 spin_unlock_irqrestore(&queue
->response_lock
, flags
);
772 RING_COPY_REQUEST(&queue
->tx
, cons
++, txp
);
773 extra_count
= 0; /* only the first frag can have extras */
775 queue
->tx
.req_cons
= cons
;
778 static void xenvif_fatal_tx_err(struct xenvif
*vif
)
780 netdev_err(vif
->dev
, "fatal error; disabling device\n");
781 vif
->disabled
= true;
782 /* Disable the vif from queue 0's kthread */
784 xenvif_kick_thread(&vif
->queues
[0]);
787 static int xenvif_count_requests(struct xenvif_queue
*queue
,
788 struct xen_netif_tx_request
*first
,
789 unsigned int extra_count
,
790 struct xen_netif_tx_request
*txp
,
793 RING_IDX cons
= queue
->tx
.req_cons
;
798 if (!(first
->flags
& XEN_NETTXF_more_data
))
802 struct xen_netif_tx_request dropped_tx
= { 0 };
804 if (slots
>= work_to_do
) {
805 netdev_err(queue
->vif
->dev
,
806 "Asked for %d slots but exceeds this limit\n",
808 xenvif_fatal_tx_err(queue
->vif
);
812 /* This guest is really using too many slots and
813 * considered malicious.
815 if (unlikely(slots
>= fatal_skb_slots
)) {
816 netdev_err(queue
->vif
->dev
,
817 "Malicious frontend using %d slots, threshold %u\n",
818 slots
, fatal_skb_slots
);
819 xenvif_fatal_tx_err(queue
->vif
);
823 /* Xen network protocol had implicit dependency on
824 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
825 * the historical MAX_SKB_FRAGS value 18 to honor the
826 * same behavior as before. Any packet using more than
827 * 18 slots but less than fatal_skb_slots slots is
830 if (!drop_err
&& slots
>= XEN_NETBK_LEGACY_SLOTS_MAX
) {
832 netdev_dbg(queue
->vif
->dev
,
833 "Too many slots (%d) exceeding limit (%d), dropping packet\n",
834 slots
, XEN_NETBK_LEGACY_SLOTS_MAX
);
841 RING_COPY_REQUEST(&queue
->tx
, cons
+ slots
, txp
);
843 /* If the guest submitted a frame >= 64 KiB then
844 * first->size overflowed and following slots will
845 * appear to be larger than the frame.
847 * This cannot be fatal error as there are buggy
848 * frontends that do this.
850 * Consume all slots and drop the packet.
852 if (!drop_err
&& txp
->size
> first
->size
) {
854 netdev_dbg(queue
->vif
->dev
,
855 "Invalid tx request, slot size %u > remaining size %u\n",
856 txp
->size
, first
->size
);
860 first
->size
-= txp
->size
;
863 if (unlikely((txp
->offset
+ txp
->size
) > XEN_PAGE_SIZE
)) {
864 netdev_err(queue
->vif
->dev
, "Cross page boundary, txp->offset: %u, size: %u\n",
865 txp
->offset
, txp
->size
);
866 xenvif_fatal_tx_err(queue
->vif
);
870 more_data
= txp
->flags
& XEN_NETTXF_more_data
;
878 xenvif_tx_err(queue
, first
, extra_count
, cons
+ slots
);
886 struct xenvif_tx_cb
{
890 #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
892 static inline void xenvif_tx_create_map_op(struct xenvif_queue
*queue
,
894 struct xen_netif_tx_request
*txp
,
895 unsigned int extra_count
,
896 struct gnttab_map_grant_ref
*mop
)
898 queue
->pages_to_map
[mop
-queue
->tx_map_ops
] = queue
->mmap_pages
[pending_idx
];
899 gnttab_set_map_op(mop
, idx_to_kaddr(queue
, pending_idx
),
900 GNTMAP_host_map
| GNTMAP_readonly
,
901 txp
->gref
, queue
->vif
->domid
);
903 memcpy(&queue
->pending_tx_info
[pending_idx
].req
, txp
,
905 queue
->pending_tx_info
[pending_idx
].extra_count
= extra_count
;
908 static inline struct sk_buff
*xenvif_alloc_skb(unsigned int size
)
910 struct sk_buff
*skb
=
911 alloc_skb(size
+ NET_SKB_PAD
+ NET_IP_ALIGN
,
912 GFP_ATOMIC
| __GFP_NOWARN
);
913 if (unlikely(skb
== NULL
))
916 /* Packets passed to netif_rx() must have some headroom. */
917 skb_reserve(skb
, NET_SKB_PAD
+ NET_IP_ALIGN
);
919 /* Initialize it here to avoid later surprises */
920 skb_shinfo(skb
)->destructor_arg
= NULL
;
925 static struct gnttab_map_grant_ref
*xenvif_get_requests(struct xenvif_queue
*queue
,
927 struct xen_netif_tx_request
*txp
,
928 struct gnttab_map_grant_ref
*gop
,
929 unsigned int frag_overflow
,
930 struct sk_buff
*nskb
)
932 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
933 skb_frag_t
*frags
= shinfo
->frags
;
934 u16 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
936 pending_ring_idx_t index
;
937 unsigned int nr_slots
;
939 nr_slots
= shinfo
->nr_frags
;
941 /* Skip first skb fragment if it is on same page as header fragment. */
942 start
= (frag_get_pending_idx(&shinfo
->frags
[0]) == pending_idx
);
944 for (shinfo
->nr_frags
= start
; shinfo
->nr_frags
< nr_slots
;
945 shinfo
->nr_frags
++, txp
++, gop
++) {
946 index
= pending_index(queue
->pending_cons
++);
947 pending_idx
= queue
->pending_ring
[index
];
948 xenvif_tx_create_map_op(queue
, pending_idx
, txp
, 0, gop
);
949 frag_set_pending_idx(&frags
[shinfo
->nr_frags
], pending_idx
);
954 shinfo
= skb_shinfo(nskb
);
955 frags
= shinfo
->frags
;
957 for (shinfo
->nr_frags
= 0; shinfo
->nr_frags
< frag_overflow
;
958 shinfo
->nr_frags
++, txp
++, gop
++) {
959 index
= pending_index(queue
->pending_cons
++);
960 pending_idx
= queue
->pending_ring
[index
];
961 xenvif_tx_create_map_op(queue
, pending_idx
, txp
, 0,
963 frag_set_pending_idx(&frags
[shinfo
->nr_frags
],
967 skb_shinfo(skb
)->frag_list
= nskb
;
973 static inline void xenvif_grant_handle_set(struct xenvif_queue
*queue
,
975 grant_handle_t handle
)
977 if (unlikely(queue
->grant_tx_handle
[pending_idx
] !=
978 NETBACK_INVALID_HANDLE
)) {
979 netdev_err(queue
->vif
->dev
,
980 "Trying to overwrite active handle! pending_idx: 0x%x\n",
984 queue
->grant_tx_handle
[pending_idx
] = handle
;
987 static inline void xenvif_grant_handle_reset(struct xenvif_queue
*queue
,
990 if (unlikely(queue
->grant_tx_handle
[pending_idx
] ==
991 NETBACK_INVALID_HANDLE
)) {
992 netdev_err(queue
->vif
->dev
,
993 "Trying to unmap invalid handle! pending_idx: 0x%x\n",
997 queue
->grant_tx_handle
[pending_idx
] = NETBACK_INVALID_HANDLE
;
1000 static int xenvif_tx_check_gop(struct xenvif_queue
*queue
,
1001 struct sk_buff
*skb
,
1002 struct gnttab_map_grant_ref
**gopp_map
,
1003 struct gnttab_copy
**gopp_copy
)
1005 struct gnttab_map_grant_ref
*gop_map
= *gopp_map
;
1006 u16 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
1007 /* This always points to the shinfo of the skb being checked, which
1008 * could be either the first or the one on the frag_list
1010 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
1011 /* If this is non-NULL, we are currently checking the frag_list skb, and
1012 * this points to the shinfo of the first one
1014 struct skb_shared_info
*first_shinfo
= NULL
;
1015 int nr_frags
= shinfo
->nr_frags
;
1016 const bool sharedslot
= nr_frags
&&
1017 frag_get_pending_idx(&shinfo
->frags
[0]) == pending_idx
;
1020 /* Check status of header. */
1021 err
= (*gopp_copy
)->status
;
1022 if (unlikely(err
)) {
1023 if (net_ratelimit())
1024 netdev_dbg(queue
->vif
->dev
,
1025 "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
1026 (*gopp_copy
)->status
,
1028 (*gopp_copy
)->source
.u
.ref
);
1029 /* The first frag might still have this slot mapped */
1031 xenvif_idx_release(queue
, pending_idx
,
1032 XEN_NETIF_RSP_ERROR
);
1037 for (i
= 0; i
< nr_frags
; i
++, gop_map
++) {
1040 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[i
]);
1042 /* Check error status: if okay then remember grant handle. */
1043 newerr
= gop_map
->status
;
1045 if (likely(!newerr
)) {
1046 xenvif_grant_handle_set(queue
,
1049 /* Had a previous error? Invalidate this fragment. */
1050 if (unlikely(err
)) {
1051 xenvif_idx_unmap(queue
, pending_idx
);
1052 /* If the mapping of the first frag was OK, but
1053 * the header's copy failed, and they are
1054 * sharing a slot, send an error
1056 if (i
== 0 && sharedslot
)
1057 xenvif_idx_release(queue
, pending_idx
,
1058 XEN_NETIF_RSP_ERROR
);
1060 xenvif_idx_release(queue
, pending_idx
,
1061 XEN_NETIF_RSP_OKAY
);
1066 /* Error on this fragment: respond to client with an error. */
1067 if (net_ratelimit())
1068 netdev_dbg(queue
->vif
->dev
,
1069 "Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n",
1075 xenvif_idx_release(queue
, pending_idx
, XEN_NETIF_RSP_ERROR
);
1077 /* Not the first error? Preceding frags already invalidated. */
1081 /* First error: if the header haven't shared a slot with the
1082 * first frag, release it as well.
1085 xenvif_idx_release(queue
,
1086 XENVIF_TX_CB(skb
)->pending_idx
,
1087 XEN_NETIF_RSP_OKAY
);
1089 /* Invalidate preceding fragments of this skb. */
1090 for (j
= 0; j
< i
; j
++) {
1091 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[j
]);
1092 xenvif_idx_unmap(queue
, pending_idx
);
1093 xenvif_idx_release(queue
, pending_idx
,
1094 XEN_NETIF_RSP_OKAY
);
1097 /* And if we found the error while checking the frag_list, unmap
1098 * the first skb's frags
1101 for (j
= 0; j
< first_shinfo
->nr_frags
; j
++) {
1102 pending_idx
= frag_get_pending_idx(&first_shinfo
->frags
[j
]);
1103 xenvif_idx_unmap(queue
, pending_idx
);
1104 xenvif_idx_release(queue
, pending_idx
,
1105 XEN_NETIF_RSP_OKAY
);
1109 /* Remember the error: invalidate all subsequent fragments. */
1113 if (skb_has_frag_list(skb
) && !first_shinfo
) {
1114 first_shinfo
= skb_shinfo(skb
);
1115 shinfo
= skb_shinfo(skb_shinfo(skb
)->frag_list
);
1116 nr_frags
= shinfo
->nr_frags
;
1121 *gopp_map
= gop_map
;
1125 static void xenvif_fill_frags(struct xenvif_queue
*queue
, struct sk_buff
*skb
)
1127 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
1128 int nr_frags
= shinfo
->nr_frags
;
1130 u16 prev_pending_idx
= INVALID_PENDING_IDX
;
1132 for (i
= 0; i
< nr_frags
; i
++) {
1133 skb_frag_t
*frag
= shinfo
->frags
+ i
;
1134 struct xen_netif_tx_request
*txp
;
1138 pending_idx
= frag_get_pending_idx(frag
);
1140 /* If this is not the first frag, chain it to the previous*/
1141 if (prev_pending_idx
== INVALID_PENDING_IDX
)
1142 skb_shinfo(skb
)->destructor_arg
=
1143 &callback_param(queue
, pending_idx
);
1145 callback_param(queue
, prev_pending_idx
).ctx
=
1146 &callback_param(queue
, pending_idx
);
1148 callback_param(queue
, pending_idx
).ctx
= NULL
;
1149 prev_pending_idx
= pending_idx
;
1151 txp
= &queue
->pending_tx_info
[pending_idx
].req
;
1152 page
= virt_to_page(idx_to_kaddr(queue
, pending_idx
));
1153 __skb_fill_page_desc(skb
, i
, page
, txp
->offset
, txp
->size
);
1154 skb
->len
+= txp
->size
;
1155 skb
->data_len
+= txp
->size
;
1156 skb
->truesize
+= txp
->size
;
1158 /* Take an extra reference to offset network stack's put_page */
1159 get_page(queue
->mmap_pages
[pending_idx
]);
1163 static int xenvif_get_extras(struct xenvif_queue
*queue
,
1164 struct xen_netif_extra_info
*extras
,
1165 unsigned int *extra_count
,
1168 struct xen_netif_extra_info extra
;
1169 RING_IDX cons
= queue
->tx
.req_cons
;
1172 if (unlikely(work_to_do
-- <= 0)) {
1173 netdev_err(queue
->vif
->dev
, "Missing extra info\n");
1174 xenvif_fatal_tx_err(queue
->vif
);
1178 RING_COPY_REQUEST(&queue
->tx
, cons
, &extra
);
1180 queue
->tx
.req_cons
= ++cons
;
1183 if (unlikely(!extra
.type
||
1184 extra
.type
>= XEN_NETIF_EXTRA_TYPE_MAX
)) {
1185 netdev_err(queue
->vif
->dev
,
1186 "Invalid extra type: %d\n", extra
.type
);
1187 xenvif_fatal_tx_err(queue
->vif
);
1191 memcpy(&extras
[extra
.type
- 1], &extra
, sizeof(extra
));
1192 } while (extra
.flags
& XEN_NETIF_EXTRA_FLAG_MORE
);
1197 static int xenvif_set_skb_gso(struct xenvif
*vif
,
1198 struct sk_buff
*skb
,
1199 struct xen_netif_extra_info
*gso
)
1201 if (!gso
->u
.gso
.size
) {
1202 netdev_err(vif
->dev
, "GSO size must not be zero.\n");
1203 xenvif_fatal_tx_err(vif
);
1207 switch (gso
->u
.gso
.type
) {
1208 case XEN_NETIF_GSO_TYPE_TCPV4
:
1209 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV4
;
1211 case XEN_NETIF_GSO_TYPE_TCPV6
:
1212 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV6
;
1215 netdev_err(vif
->dev
, "Bad GSO type %d.\n", gso
->u
.gso
.type
);
1216 xenvif_fatal_tx_err(vif
);
1220 skb_shinfo(skb
)->gso_size
= gso
->u
.gso
.size
;
1221 /* gso_segs will be calculated later */
1226 static int checksum_setup(struct xenvif_queue
*queue
, struct sk_buff
*skb
)
1228 bool recalculate_partial_csum
= false;
1230 /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1231 * peers can fail to set NETRXF_csum_blank when sending a GSO
1232 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1233 * recalculate the partial checksum.
1235 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
&& skb_is_gso(skb
)) {
1236 queue
->stats
.rx_gso_checksum_fixup
++;
1237 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1238 recalculate_partial_csum
= true;
1241 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1242 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
)
1245 return skb_checksum_setup(skb
, recalculate_partial_csum
);
1248 static bool tx_credit_exceeded(struct xenvif_queue
*queue
, unsigned size
)
1250 u64 now
= get_jiffies_64();
1251 u64 next_credit
= queue
->credit_window_start
+
1252 msecs_to_jiffies(queue
->credit_usec
/ 1000);
1254 /* Timer could already be pending in rare cases. */
1255 if (timer_pending(&queue
->credit_timeout
))
1258 /* Passed the point where we can replenish credit? */
1259 if (time_after_eq64(now
, next_credit
)) {
1260 queue
->credit_window_start
= now
;
1261 tx_add_credit(queue
);
1264 /* Still too big to send right now? Set a callback. */
1265 if (size
> queue
->remaining_credit
) {
1266 queue
->credit_timeout
.data
=
1267 (unsigned long)queue
;
1268 mod_timer(&queue
->credit_timeout
,
1270 queue
->credit_window_start
= next_credit
;
1278 /* No locking is required in xenvif_mcast_add/del() as they are
1279 * only ever invoked from NAPI poll. An RCU list is used because
1280 * xenvif_mcast_match() is called asynchronously, during start_xmit.
1283 static int xenvif_mcast_add(struct xenvif
*vif
, const u8
*addr
)
1285 struct xenvif_mcast_addr
*mcast
;
1287 if (vif
->fe_mcast_count
== XEN_NETBK_MCAST_MAX
) {
1288 if (net_ratelimit())
1289 netdev_err(vif
->dev
,
1290 "Too many multicast addresses\n");
1294 mcast
= kzalloc(sizeof(*mcast
), GFP_ATOMIC
);
1298 ether_addr_copy(mcast
->addr
, addr
);
1299 list_add_tail_rcu(&mcast
->entry
, &vif
->fe_mcast_addr
);
1300 vif
->fe_mcast_count
++;
1305 static void xenvif_mcast_del(struct xenvif
*vif
, const u8
*addr
)
1307 struct xenvif_mcast_addr
*mcast
;
1309 list_for_each_entry_rcu(mcast
, &vif
->fe_mcast_addr
, entry
) {
1310 if (ether_addr_equal(addr
, mcast
->addr
)) {
1311 --vif
->fe_mcast_count
;
1312 list_del_rcu(&mcast
->entry
);
1313 kfree_rcu(mcast
, rcu
);
1319 bool xenvif_mcast_match(struct xenvif
*vif
, const u8
*addr
)
1321 struct xenvif_mcast_addr
*mcast
;
1324 list_for_each_entry_rcu(mcast
, &vif
->fe_mcast_addr
, entry
) {
1325 if (ether_addr_equal(addr
, mcast
->addr
)) {
1335 void xenvif_mcast_addr_list_free(struct xenvif
*vif
)
1337 /* No need for locking or RCU here. NAPI poll and TX queue
1340 while (!list_empty(&vif
->fe_mcast_addr
)) {
1341 struct xenvif_mcast_addr
*mcast
;
1343 mcast
= list_first_entry(&vif
->fe_mcast_addr
,
1344 struct xenvif_mcast_addr
,
1346 --vif
->fe_mcast_count
;
1347 list_del(&mcast
->entry
);
1352 static void xenvif_tx_build_gops(struct xenvif_queue
*queue
,
1357 struct gnttab_map_grant_ref
*gop
= queue
->tx_map_ops
;
1358 struct sk_buff
*skb
, *nskb
;
1360 unsigned int frag_overflow
;
1362 while (skb_queue_len(&queue
->tx_queue
) < budget
) {
1363 struct xen_netif_tx_request txreq
;
1364 struct xen_netif_tx_request txfrags
[XEN_NETBK_LEGACY_SLOTS_MAX
];
1365 struct xen_netif_extra_info extras
[XEN_NETIF_EXTRA_TYPE_MAX
-1];
1366 unsigned int extra_count
;
1370 unsigned int data_len
;
1371 pending_ring_idx_t index
;
1373 if (queue
->tx
.sring
->req_prod
- queue
->tx
.req_cons
>
1374 XEN_NETIF_TX_RING_SIZE
) {
1375 netdev_err(queue
->vif
->dev
,
1376 "Impossible number of requests. "
1377 "req_prod %d, req_cons %d, size %ld\n",
1378 queue
->tx
.sring
->req_prod
, queue
->tx
.req_cons
,
1379 XEN_NETIF_TX_RING_SIZE
);
1380 xenvif_fatal_tx_err(queue
->vif
);
1384 work_to_do
= RING_HAS_UNCONSUMED_REQUESTS(&queue
->tx
);
1388 idx
= queue
->tx
.req_cons
;
1389 rmb(); /* Ensure that we see the request before we copy it. */
1390 RING_COPY_REQUEST(&queue
->tx
, idx
, &txreq
);
1392 /* Credit-based scheduling. */
1393 if (txreq
.size
> queue
->remaining_credit
&&
1394 tx_credit_exceeded(queue
, txreq
.size
))
1397 queue
->remaining_credit
-= txreq
.size
;
1400 queue
->tx
.req_cons
= ++idx
;
1402 memset(extras
, 0, sizeof(extras
));
1404 if (txreq
.flags
& XEN_NETTXF_extra_info
) {
1405 work_to_do
= xenvif_get_extras(queue
, extras
,
1408 idx
= queue
->tx
.req_cons
;
1409 if (unlikely(work_to_do
< 0))
1413 if (extras
[XEN_NETIF_EXTRA_TYPE_MCAST_ADD
- 1].type
) {
1414 struct xen_netif_extra_info
*extra
;
1416 extra
= &extras
[XEN_NETIF_EXTRA_TYPE_MCAST_ADD
- 1];
1417 ret
= xenvif_mcast_add(queue
->vif
, extra
->u
.mcast
.addr
);
1419 make_tx_response(queue
, &txreq
, extra_count
,
1421 XEN_NETIF_RSP_OKAY
:
1422 XEN_NETIF_RSP_ERROR
);
1423 push_tx_responses(queue
);
1427 if (extras
[XEN_NETIF_EXTRA_TYPE_MCAST_DEL
- 1].type
) {
1428 struct xen_netif_extra_info
*extra
;
1430 extra
= &extras
[XEN_NETIF_EXTRA_TYPE_MCAST_DEL
- 1];
1431 xenvif_mcast_del(queue
->vif
, extra
->u
.mcast
.addr
);
1433 make_tx_response(queue
, &txreq
, extra_count
,
1434 XEN_NETIF_RSP_OKAY
);
1435 push_tx_responses(queue
);
1439 ret
= xenvif_count_requests(queue
, &txreq
, extra_count
,
1440 txfrags
, work_to_do
);
1441 if (unlikely(ret
< 0))
1446 if (unlikely(txreq
.size
< ETH_HLEN
)) {
1447 netdev_dbg(queue
->vif
->dev
,
1448 "Bad packet size: %d\n", txreq
.size
);
1449 xenvif_tx_err(queue
, &txreq
, extra_count
, idx
);
1453 /* No crossing a page as the payload mustn't fragment. */
1454 if (unlikely((txreq
.offset
+ txreq
.size
) > XEN_PAGE_SIZE
)) {
1455 netdev_err(queue
->vif
->dev
,
1456 "txreq.offset: %u, size: %u, end: %lu\n",
1457 txreq
.offset
, txreq
.size
,
1458 (unsigned long)(txreq
.offset
&~XEN_PAGE_MASK
) + txreq
.size
);
1459 xenvif_fatal_tx_err(queue
->vif
);
1463 index
= pending_index(queue
->pending_cons
);
1464 pending_idx
= queue
->pending_ring
[index
];
1466 data_len
= (txreq
.size
> XEN_NETBACK_TX_COPY_LEN
&&
1467 ret
< XEN_NETBK_LEGACY_SLOTS_MAX
) ?
1468 XEN_NETBACK_TX_COPY_LEN
: txreq
.size
;
1470 skb
= xenvif_alloc_skb(data_len
);
1471 if (unlikely(skb
== NULL
)) {
1472 netdev_dbg(queue
->vif
->dev
,
1473 "Can't allocate a skb in start_xmit.\n");
1474 xenvif_tx_err(queue
, &txreq
, extra_count
, idx
);
1478 skb_shinfo(skb
)->nr_frags
= ret
;
1479 if (data_len
< txreq
.size
)
1480 skb_shinfo(skb
)->nr_frags
++;
1481 /* At this point shinfo->nr_frags is in fact the number of
1482 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
1486 if (skb_shinfo(skb
)->nr_frags
> MAX_SKB_FRAGS
) {
1487 frag_overflow
= skb_shinfo(skb
)->nr_frags
- MAX_SKB_FRAGS
;
1488 BUG_ON(frag_overflow
> MAX_SKB_FRAGS
);
1489 skb_shinfo(skb
)->nr_frags
= MAX_SKB_FRAGS
;
1490 nskb
= xenvif_alloc_skb(0);
1491 if (unlikely(nskb
== NULL
)) {
1493 xenvif_tx_err(queue
, &txreq
, extra_count
, idx
);
1494 if (net_ratelimit())
1495 netdev_err(queue
->vif
->dev
,
1496 "Can't allocate the frag_list skb.\n");
1501 if (extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1].type
) {
1502 struct xen_netif_extra_info
*gso
;
1503 gso
= &extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1];
1505 if (xenvif_set_skb_gso(queue
->vif
, skb
, gso
)) {
1506 /* Failure in xenvif_set_skb_gso is fatal. */
1513 if (extras
[XEN_NETIF_EXTRA_TYPE_HASH
- 1].type
) {
1514 struct xen_netif_extra_info
*extra
;
1515 enum pkt_hash_types type
= PKT_HASH_TYPE_NONE
;
1517 extra
= &extras
[XEN_NETIF_EXTRA_TYPE_HASH
- 1];
1519 switch (extra
->u
.hash
.type
) {
1520 case _XEN_NETIF_CTRL_HASH_TYPE_IPV4
:
1521 case _XEN_NETIF_CTRL_HASH_TYPE_IPV6
:
1522 type
= PKT_HASH_TYPE_L3
;
1525 case _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP
:
1526 case _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP
:
1527 type
= PKT_HASH_TYPE_L4
;
1534 if (type
!= PKT_HASH_TYPE_NONE
)
1536 *(u32
*)extra
->u
.hash
.value
,
1540 XENVIF_TX_CB(skb
)->pending_idx
= pending_idx
;
1542 __skb_put(skb
, data_len
);
1543 queue
->tx_copy_ops
[*copy_ops
].source
.u
.ref
= txreq
.gref
;
1544 queue
->tx_copy_ops
[*copy_ops
].source
.domid
= queue
->vif
->domid
;
1545 queue
->tx_copy_ops
[*copy_ops
].source
.offset
= txreq
.offset
;
1547 queue
->tx_copy_ops
[*copy_ops
].dest
.u
.gmfn
=
1548 virt_to_gfn(skb
->data
);
1549 queue
->tx_copy_ops
[*copy_ops
].dest
.domid
= DOMID_SELF
;
1550 queue
->tx_copy_ops
[*copy_ops
].dest
.offset
=
1551 offset_in_page(skb
->data
) & ~XEN_PAGE_MASK
;
1553 queue
->tx_copy_ops
[*copy_ops
].len
= data_len
;
1554 queue
->tx_copy_ops
[*copy_ops
].flags
= GNTCOPY_source_gref
;
1558 if (data_len
< txreq
.size
) {
1559 frag_set_pending_idx(&skb_shinfo(skb
)->frags
[0],
1561 xenvif_tx_create_map_op(queue
, pending_idx
, &txreq
,
1565 frag_set_pending_idx(&skb_shinfo(skb
)->frags
[0],
1566 INVALID_PENDING_IDX
);
1567 memcpy(&queue
->pending_tx_info
[pending_idx
].req
,
1568 &txreq
, sizeof(txreq
));
1569 queue
->pending_tx_info
[pending_idx
].extra_count
=
1573 queue
->pending_cons
++;
1575 gop
= xenvif_get_requests(queue
, skb
, txfrags
, gop
,
1576 frag_overflow
, nskb
);
1578 __skb_queue_tail(&queue
->tx_queue
, skb
);
1580 queue
->tx
.req_cons
= idx
;
1582 if (((gop
-queue
->tx_map_ops
) >= ARRAY_SIZE(queue
->tx_map_ops
)) ||
1583 (*copy_ops
>= ARRAY_SIZE(queue
->tx_copy_ops
)))
1587 (*map_ops
) = gop
- queue
->tx_map_ops
;
1591 /* Consolidate skb with a frag_list into a brand new one with local pages on
1592 * frags. Returns 0 or -ENOMEM if can't allocate new pages.
1594 static int xenvif_handle_frag_list(struct xenvif_queue
*queue
, struct sk_buff
*skb
)
1596 unsigned int offset
= skb_headlen(skb
);
1597 skb_frag_t frags
[MAX_SKB_FRAGS
];
1599 struct ubuf_info
*uarg
;
1600 struct sk_buff
*nskb
= skb_shinfo(skb
)->frag_list
;
1602 queue
->stats
.tx_zerocopy_sent
+= 2;
1603 queue
->stats
.tx_frag_overflow
++;
1605 xenvif_fill_frags(queue
, nskb
);
1606 /* Subtract frags size, we will correct it later */
1607 skb
->truesize
-= skb
->data_len
;
1608 skb
->len
+= nskb
->len
;
1609 skb
->data_len
+= nskb
->len
;
1611 /* create a brand new frags array and coalesce there */
1612 for (i
= 0; offset
< skb
->len
; i
++) {
1616 BUG_ON(i
>= MAX_SKB_FRAGS
);
1617 page
= alloc_page(GFP_ATOMIC
);
1620 skb
->truesize
+= skb
->data_len
;
1621 for (j
= 0; j
< i
; j
++)
1622 put_page(frags
[j
].page
.p
);
1626 if (offset
+ PAGE_SIZE
< skb
->len
)
1629 len
= skb
->len
- offset
;
1630 if (skb_copy_bits(skb
, offset
, page_address(page
), len
))
1634 frags
[i
].page
.p
= page
;
1635 frags
[i
].page_offset
= 0;
1636 skb_frag_size_set(&frags
[i
], len
);
1639 /* Copied all the bits from the frag list -- free it. */
1640 skb_frag_list_init(skb
);
1641 xenvif_skb_zerocopy_prepare(queue
, nskb
);
1644 /* Release all the original (foreign) frags. */
1645 for (f
= 0; f
< skb_shinfo(skb
)->nr_frags
; f
++)
1646 skb_frag_unref(skb
, f
);
1647 uarg
= skb_shinfo(skb
)->destructor_arg
;
1648 /* increase inflight counter to offset decrement in callback */
1649 atomic_inc(&queue
->inflight_packets
);
1650 uarg
->callback(uarg
, true);
1651 skb_shinfo(skb
)->destructor_arg
= NULL
;
1653 /* Fill the skb with the new (local) frags. */
1654 memcpy(skb_shinfo(skb
)->frags
, frags
, i
* sizeof(skb_frag_t
));
1655 skb_shinfo(skb
)->nr_frags
= i
;
1656 skb
->truesize
+= i
* PAGE_SIZE
;
1661 static int xenvif_tx_submit(struct xenvif_queue
*queue
)
1663 struct gnttab_map_grant_ref
*gop_map
= queue
->tx_map_ops
;
1664 struct gnttab_copy
*gop_copy
= queue
->tx_copy_ops
;
1665 struct sk_buff
*skb
;
1668 while ((skb
= __skb_dequeue(&queue
->tx_queue
)) != NULL
) {
1669 struct xen_netif_tx_request
*txp
;
1673 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
1674 txp
= &queue
->pending_tx_info
[pending_idx
].req
;
1676 /* Check the remap error code. */
1677 if (unlikely(xenvif_tx_check_gop(queue
, skb
, &gop_map
, &gop_copy
))) {
1678 /* If there was an error, xenvif_tx_check_gop is
1679 * expected to release all the frags which were mapped,
1680 * so kfree_skb shouldn't do it again
1682 skb_shinfo(skb
)->nr_frags
= 0;
1683 if (skb_has_frag_list(skb
)) {
1684 struct sk_buff
*nskb
=
1685 skb_shinfo(skb
)->frag_list
;
1686 skb_shinfo(nskb
)->nr_frags
= 0;
1692 data_len
= skb
->len
;
1693 callback_param(queue
, pending_idx
).ctx
= NULL
;
1694 if (data_len
< txp
->size
) {
1695 /* Append the packet payload as a fragment. */
1696 txp
->offset
+= data_len
;
1697 txp
->size
-= data_len
;
1699 /* Schedule a response immediately. */
1700 xenvif_idx_release(queue
, pending_idx
,
1701 XEN_NETIF_RSP_OKAY
);
1704 if (txp
->flags
& XEN_NETTXF_csum_blank
)
1705 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1706 else if (txp
->flags
& XEN_NETTXF_data_validated
)
1707 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1709 xenvif_fill_frags(queue
, skb
);
1711 if (unlikely(skb_has_frag_list(skb
))) {
1712 if (xenvif_handle_frag_list(queue
, skb
)) {
1713 if (net_ratelimit())
1714 netdev_err(queue
->vif
->dev
,
1715 "Not enough memory to consolidate frag_list!\n");
1716 xenvif_skb_zerocopy_prepare(queue
, skb
);
1722 skb
->dev
= queue
->vif
->dev
;
1723 skb
->protocol
= eth_type_trans(skb
, skb
->dev
);
1724 skb_reset_network_header(skb
);
1726 if (checksum_setup(queue
, skb
)) {
1727 netdev_dbg(queue
->vif
->dev
,
1728 "Can't setup checksum in net_tx_action\n");
1729 /* We have to set this flag to trigger the callback */
1730 if (skb_shinfo(skb
)->destructor_arg
)
1731 xenvif_skb_zerocopy_prepare(queue
, skb
);
1736 skb_probe_transport_header(skb
, 0);
1738 /* If the packet is GSO then we will have just set up the
1739 * transport header offset in checksum_setup so it's now
1740 * straightforward to calculate gso_segs.
1742 if (skb_is_gso(skb
)) {
1743 int mss
= skb_shinfo(skb
)->gso_size
;
1744 int hdrlen
= skb_transport_header(skb
) -
1745 skb_mac_header(skb
) +
1748 skb_shinfo(skb
)->gso_segs
=
1749 DIV_ROUND_UP(skb
->len
- hdrlen
, mss
);
1752 queue
->stats
.rx_bytes
+= skb
->len
;
1753 queue
->stats
.rx_packets
++;
1757 /* Set this flag right before netif_receive_skb, otherwise
1758 * someone might think this packet already left netback, and
1759 * do a skb_copy_ubufs while we are still in control of the
1760 * skb. E.g. the __pskb_pull_tail earlier can do such thing.
1762 if (skb_shinfo(skb
)->destructor_arg
) {
1763 xenvif_skb_zerocopy_prepare(queue
, skb
);
1764 queue
->stats
.tx_zerocopy_sent
++;
1767 netif_receive_skb(skb
);
1773 void xenvif_zerocopy_callback(struct ubuf_info
*ubuf
, bool zerocopy_success
)
1775 unsigned long flags
;
1776 pending_ring_idx_t index
;
1777 struct xenvif_queue
*queue
= ubuf_to_queue(ubuf
);
1779 /* This is the only place where we grab this lock, to protect callbacks
1782 spin_lock_irqsave(&queue
->callback_lock
, flags
);
1784 u16 pending_idx
= ubuf
->desc
;
1785 ubuf
= (struct ubuf_info
*) ubuf
->ctx
;
1786 BUG_ON(queue
->dealloc_prod
- queue
->dealloc_cons
>=
1788 index
= pending_index(queue
->dealloc_prod
);
1789 queue
->dealloc_ring
[index
] = pending_idx
;
1790 /* Sync with xenvif_tx_dealloc_action:
1791 * insert idx then incr producer.
1794 queue
->dealloc_prod
++;
1796 spin_unlock_irqrestore(&queue
->callback_lock
, flags
);
1798 if (likely(zerocopy_success
))
1799 queue
->stats
.tx_zerocopy_success
++;
1801 queue
->stats
.tx_zerocopy_fail
++;
1802 xenvif_skb_zerocopy_complete(queue
);
1805 static inline void xenvif_tx_dealloc_action(struct xenvif_queue
*queue
)
1807 struct gnttab_unmap_grant_ref
*gop
;
1808 pending_ring_idx_t dc
, dp
;
1809 u16 pending_idx
, pending_idx_release
[MAX_PENDING_REQS
];
1812 dc
= queue
->dealloc_cons
;
1813 gop
= queue
->tx_unmap_ops
;
1815 /* Free up any grants we have finished using */
1817 dp
= queue
->dealloc_prod
;
1819 /* Ensure we see all indices enqueued by all
1820 * xenvif_zerocopy_callback().
1825 BUG_ON(gop
- queue
->tx_unmap_ops
>= MAX_PENDING_REQS
);
1827 queue
->dealloc_ring
[pending_index(dc
++)];
1829 pending_idx_release
[gop
- queue
->tx_unmap_ops
] =
1831 queue
->pages_to_unmap
[gop
- queue
->tx_unmap_ops
] =
1832 queue
->mmap_pages
[pending_idx
];
1833 gnttab_set_unmap_op(gop
,
1834 idx_to_kaddr(queue
, pending_idx
),
1836 queue
->grant_tx_handle
[pending_idx
]);
1837 xenvif_grant_handle_reset(queue
, pending_idx
);
1841 } while (dp
!= queue
->dealloc_prod
);
1843 queue
->dealloc_cons
= dc
;
1845 if (gop
- queue
->tx_unmap_ops
> 0) {
1847 ret
= gnttab_unmap_refs(queue
->tx_unmap_ops
,
1849 queue
->pages_to_unmap
,
1850 gop
- queue
->tx_unmap_ops
);
1852 netdev_err(queue
->vif
->dev
, "Unmap fail: nr_ops %tu ret %d\n",
1853 gop
- queue
->tx_unmap_ops
, ret
);
1854 for (i
= 0; i
< gop
- queue
->tx_unmap_ops
; ++i
) {
1855 if (gop
[i
].status
!= GNTST_okay
)
1856 netdev_err(queue
->vif
->dev
,
1857 " host_addr: 0x%llx handle: 0x%x status: %d\n",
1866 for (i
= 0; i
< gop
- queue
->tx_unmap_ops
; ++i
)
1867 xenvif_idx_release(queue
, pending_idx_release
[i
],
1868 XEN_NETIF_RSP_OKAY
);
1872 /* Called after netfront has transmitted */
1873 int xenvif_tx_action(struct xenvif_queue
*queue
, int budget
)
1875 unsigned nr_mops
, nr_cops
= 0;
1878 if (unlikely(!tx_work_todo(queue
)))
1881 xenvif_tx_build_gops(queue
, budget
, &nr_cops
, &nr_mops
);
1886 gnttab_batch_copy(queue
->tx_copy_ops
, nr_cops
);
1888 ret
= gnttab_map_refs(queue
->tx_map_ops
,
1890 queue
->pages_to_map
,
1895 work_done
= xenvif_tx_submit(queue
);
1900 static void xenvif_idx_release(struct xenvif_queue
*queue
, u16 pending_idx
,
1903 struct pending_tx_info
*pending_tx_info
;
1904 pending_ring_idx_t index
;
1905 unsigned long flags
;
1907 pending_tx_info
= &queue
->pending_tx_info
[pending_idx
];
1909 spin_lock_irqsave(&queue
->response_lock
, flags
);
1911 make_tx_response(queue
, &pending_tx_info
->req
,
1912 pending_tx_info
->extra_count
, status
);
1914 /* Release the pending index before pusing the Tx response so
1915 * its available before a new Tx request is pushed by the
1918 index
= pending_index(queue
->pending_prod
++);
1919 queue
->pending_ring
[index
] = pending_idx
;
1921 push_tx_responses(queue
);
1923 spin_unlock_irqrestore(&queue
->response_lock
, flags
);
1927 static void make_tx_response(struct xenvif_queue
*queue
,
1928 struct xen_netif_tx_request
*txp
,
1929 unsigned int extra_count
,
1932 RING_IDX i
= queue
->tx
.rsp_prod_pvt
;
1933 struct xen_netif_tx_response
*resp
;
1935 resp
= RING_GET_RESPONSE(&queue
->tx
, i
);
1939 while (extra_count
-- != 0)
1940 RING_GET_RESPONSE(&queue
->tx
, ++i
)->status
= XEN_NETIF_RSP_NULL
;
1942 queue
->tx
.rsp_prod_pvt
= ++i
;
1945 static void push_tx_responses(struct xenvif_queue
*queue
)
1949 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue
->tx
, notify
);
1951 notify_remote_via_irq(queue
->tx_irq
);
1954 static struct xen_netif_rx_response
*make_rx_response(struct xenvif_queue
*queue
,
1961 RING_IDX i
= queue
->rx
.rsp_prod_pvt
;
1962 struct xen_netif_rx_response
*resp
;
1964 resp
= RING_GET_RESPONSE(&queue
->rx
, i
);
1965 resp
->offset
= offset
;
1966 resp
->flags
= flags
;
1968 resp
->status
= (s16
)size
;
1970 resp
->status
= (s16
)st
;
1972 queue
->rx
.rsp_prod_pvt
= ++i
;
1977 void xenvif_idx_unmap(struct xenvif_queue
*queue
, u16 pending_idx
)
1980 struct gnttab_unmap_grant_ref tx_unmap_op
;
1982 gnttab_set_unmap_op(&tx_unmap_op
,
1983 idx_to_kaddr(queue
, pending_idx
),
1985 queue
->grant_tx_handle
[pending_idx
]);
1986 xenvif_grant_handle_reset(queue
, pending_idx
);
1988 ret
= gnttab_unmap_refs(&tx_unmap_op
, NULL
,
1989 &queue
->mmap_pages
[pending_idx
], 1);
1991 netdev_err(queue
->vif
->dev
,
1992 "Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: 0x%x status: %d\n",
1995 tx_unmap_op
.host_addr
,
1997 tx_unmap_op
.status
);
2002 static inline int tx_work_todo(struct xenvif_queue
*queue
)
2004 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue
->tx
)))
2010 static inline bool tx_dealloc_work_todo(struct xenvif_queue
*queue
)
2012 return queue
->dealloc_cons
!= queue
->dealloc_prod
;
2015 void xenvif_unmap_frontend_data_rings(struct xenvif_queue
*queue
)
2017 if (queue
->tx
.sring
)
2018 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue
->vif
),
2020 if (queue
->rx
.sring
)
2021 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue
->vif
),
2025 int xenvif_map_frontend_data_rings(struct xenvif_queue
*queue
,
2026 grant_ref_t tx_ring_ref
,
2027 grant_ref_t rx_ring_ref
)
2030 struct xen_netif_tx_sring
*txs
;
2031 struct xen_netif_rx_sring
*rxs
;
2035 err
= xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue
->vif
),
2036 &tx_ring_ref
, 1, &addr
);
2040 txs
= (struct xen_netif_tx_sring
*)addr
;
2041 BACK_RING_INIT(&queue
->tx
, txs
, XEN_PAGE_SIZE
);
2043 err
= xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue
->vif
),
2044 &rx_ring_ref
, 1, &addr
);
2048 rxs
= (struct xen_netif_rx_sring
*)addr
;
2049 BACK_RING_INIT(&queue
->rx
, rxs
, XEN_PAGE_SIZE
);
2054 xenvif_unmap_frontend_data_rings(queue
);
2058 static void xenvif_queue_carrier_off(struct xenvif_queue
*queue
)
2060 struct xenvif
*vif
= queue
->vif
;
2062 queue
->stalled
= true;
2064 /* At least one queue has stalled? Disable the carrier. */
2065 spin_lock(&vif
->lock
);
2066 if (vif
->stalled_queues
++ == 0) {
2067 netdev_info(vif
->dev
, "Guest Rx stalled");
2068 netif_carrier_off(vif
->dev
);
2070 spin_unlock(&vif
->lock
);
2073 static void xenvif_queue_carrier_on(struct xenvif_queue
*queue
)
2075 struct xenvif
*vif
= queue
->vif
;
2077 queue
->last_rx_time
= jiffies
; /* Reset Rx stall detection. */
2078 queue
->stalled
= false;
2080 /* All queues are ready? Enable the carrier. */
2081 spin_lock(&vif
->lock
);
2082 if (--vif
->stalled_queues
== 0) {
2083 netdev_info(vif
->dev
, "Guest Rx ready");
2084 netif_carrier_on(vif
->dev
);
2086 spin_unlock(&vif
->lock
);
2089 static bool xenvif_rx_queue_stalled(struct xenvif_queue
*queue
)
2091 RING_IDX prod
, cons
;
2093 prod
= queue
->rx
.sring
->req_prod
;
2094 cons
= queue
->rx
.req_cons
;
2096 return !queue
->stalled
&& prod
- cons
< 1
2097 && time_after(jiffies
,
2098 queue
->last_rx_time
+ queue
->vif
->stall_timeout
);
2101 static bool xenvif_rx_queue_ready(struct xenvif_queue
*queue
)
2103 RING_IDX prod
, cons
;
2105 prod
= queue
->rx
.sring
->req_prod
;
2106 cons
= queue
->rx
.req_cons
;
2108 return queue
->stalled
&& prod
- cons
>= 1;
2111 static bool xenvif_have_rx_work(struct xenvif_queue
*queue
)
2113 return xenvif_rx_ring_slots_available(queue
)
2114 || (queue
->vif
->stall_timeout
&&
2115 (xenvif_rx_queue_stalled(queue
)
2116 || xenvif_rx_queue_ready(queue
)))
2117 || kthread_should_stop()
2118 || queue
->vif
->disabled
;
2121 static long xenvif_rx_queue_timeout(struct xenvif_queue
*queue
)
2123 struct sk_buff
*skb
;
2126 skb
= skb_peek(&queue
->rx_queue
);
2128 return MAX_SCHEDULE_TIMEOUT
;
2130 timeout
= XENVIF_RX_CB(skb
)->expires
- jiffies
;
2131 return timeout
< 0 ? 0 : timeout
;
2134 /* Wait until the guest Rx thread has work.
2136 * The timeout needs to be adjusted based on the current head of the
2137 * queue (and not just the head at the beginning). In particular, if
2138 * the queue is initially empty an infinite timeout is used and this
2139 * needs to be reduced when a skb is queued.
2141 * This cannot be done with wait_event_timeout() because it only
2142 * calculates the timeout once.
2144 static void xenvif_wait_for_rx_work(struct xenvif_queue
*queue
)
2148 if (xenvif_have_rx_work(queue
))
2154 prepare_to_wait(&queue
->wq
, &wait
, TASK_INTERRUPTIBLE
);
2155 if (xenvif_have_rx_work(queue
))
2157 ret
= schedule_timeout(xenvif_rx_queue_timeout(queue
));
2161 finish_wait(&queue
->wq
, &wait
);
2164 int xenvif_kthread_guest_rx(void *data
)
2166 struct xenvif_queue
*queue
= data
;
2167 struct xenvif
*vif
= queue
->vif
;
2169 if (!vif
->stall_timeout
)
2170 xenvif_queue_carrier_on(queue
);
2173 xenvif_wait_for_rx_work(queue
);
2175 if (kthread_should_stop())
2178 /* This frontend is found to be rogue, disable it in
2179 * kthread context. Currently this is only set when
2180 * netback finds out frontend sends malformed packet,
2181 * but we cannot disable the interface in softirq
2182 * context so we defer it here, if this thread is
2183 * associated with queue 0.
2185 if (unlikely(vif
->disabled
&& queue
->id
== 0)) {
2186 xenvif_carrier_off(vif
);
2190 if (!skb_queue_empty(&queue
->rx_queue
))
2191 xenvif_rx_action(queue
);
2193 /* If the guest hasn't provided any Rx slots for a
2194 * while it's probably not responsive, drop the
2195 * carrier so packets are dropped earlier.
2197 if (vif
->stall_timeout
) {
2198 if (xenvif_rx_queue_stalled(queue
))
2199 xenvif_queue_carrier_off(queue
);
2200 else if (xenvif_rx_queue_ready(queue
))
2201 xenvif_queue_carrier_on(queue
);
2204 /* Queued packets may have foreign pages from other
2205 * domains. These cannot be queued indefinitely as
2206 * this would starve guests of grant refs and transmit
2209 xenvif_rx_queue_drop_expired(queue
);
2211 xenvif_rx_queue_maybe_wake(queue
);
2216 /* Bin any remaining skbs */
2217 xenvif_rx_queue_purge(queue
);
2222 static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue
*queue
)
2224 /* Dealloc thread must remain running until all inflight
2227 return kthread_should_stop() &&
2228 !atomic_read(&queue
->inflight_packets
);
2231 int xenvif_dealloc_kthread(void *data
)
2233 struct xenvif_queue
*queue
= data
;
2236 wait_event_interruptible(queue
->dealloc_wq
,
2237 tx_dealloc_work_todo(queue
) ||
2238 xenvif_dealloc_kthread_should_stop(queue
));
2239 if (xenvif_dealloc_kthread_should_stop(queue
))
2242 xenvif_tx_dealloc_action(queue
);
2246 /* Unmap anything remaining*/
2247 if (tx_dealloc_work_todo(queue
))
2248 xenvif_tx_dealloc_action(queue
);
2253 static void make_ctrl_response(struct xenvif
*vif
,
2254 const struct xen_netif_ctrl_request
*req
,
2255 u32 status
, u32 data
)
2257 RING_IDX idx
= vif
->ctrl
.rsp_prod_pvt
;
2258 struct xen_netif_ctrl_response rsp
= {
2265 *RING_GET_RESPONSE(&vif
->ctrl
, idx
) = rsp
;
2266 vif
->ctrl
.rsp_prod_pvt
= ++idx
;
2269 static void push_ctrl_response(struct xenvif
*vif
)
2273 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif
->ctrl
, notify
);
2275 notify_remote_via_irq(vif
->ctrl_irq
);
2278 static void process_ctrl_request(struct xenvif
*vif
,
2279 const struct xen_netif_ctrl_request
*req
)
2281 u32 status
= XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED
;
2284 switch (req
->type
) {
2285 case XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM
:
2286 status
= xenvif_set_hash_alg(vif
, req
->data
[0]);
2289 case XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS
:
2290 status
= xenvif_get_hash_flags(vif
, &data
);
2293 case XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS
:
2294 status
= xenvif_set_hash_flags(vif
, req
->data
[0]);
2297 case XEN_NETIF_CTRL_TYPE_SET_HASH_KEY
:
2298 status
= xenvif_set_hash_key(vif
, req
->data
[0],
2302 case XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE
:
2303 status
= XEN_NETIF_CTRL_STATUS_SUCCESS
;
2304 data
= XEN_NETBK_MAX_HASH_MAPPING_SIZE
;
2307 case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
:
2308 status
= xenvif_set_hash_mapping_size(vif
,
2312 case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING
:
2313 status
= xenvif_set_hash_mapping(vif
, req
->data
[0],
2322 make_ctrl_response(vif
, req
, status
, data
);
2323 push_ctrl_response(vif
);
2326 static void xenvif_ctrl_action(struct xenvif
*vif
)
2329 RING_IDX req_prod
, req_cons
;
2331 req_prod
= vif
->ctrl
.sring
->req_prod
;
2332 req_cons
= vif
->ctrl
.req_cons
;
2334 /* Make sure we can see requests before we process them. */
2337 if (req_cons
== req_prod
)
2340 while (req_cons
!= req_prod
) {
2341 struct xen_netif_ctrl_request req
;
2343 RING_COPY_REQUEST(&vif
->ctrl
, req_cons
, &req
);
2346 process_ctrl_request(vif
, &req
);
2349 vif
->ctrl
.req_cons
= req_cons
;
2350 vif
->ctrl
.sring
->req_event
= req_cons
+ 1;
2354 static bool xenvif_ctrl_work_todo(struct xenvif
*vif
)
2356 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif
->ctrl
)))
2362 int xenvif_ctrl_kthread(void *data
)
2364 struct xenvif
*vif
= data
;
2367 wait_event_interruptible(vif
->ctrl_wq
,
2368 xenvif_ctrl_work_todo(vif
) ||
2369 kthread_should_stop());
2370 if (kthread_should_stop())
2373 while (xenvif_ctrl_work_todo(vif
))
2374 xenvif_ctrl_action(vif
);
2382 static int __init
netback_init(void)
2389 /* Allow as many queues as there are CPUs if user has not
2390 * specified a value.
2392 if (xenvif_max_queues
== 0)
2393 xenvif_max_queues
= num_online_cpus();
2395 if (fatal_skb_slots
< XEN_NETBK_LEGACY_SLOTS_MAX
) {
2396 pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
2397 fatal_skb_slots
, XEN_NETBK_LEGACY_SLOTS_MAX
);
2398 fatal_skb_slots
= XEN_NETBK_LEGACY_SLOTS_MAX
;
2401 rc
= xenvif_xenbus_init();
2405 #ifdef CONFIG_DEBUG_FS
2406 xen_netback_dbg_root
= debugfs_create_dir("xen-netback", NULL
);
2407 if (IS_ERR_OR_NULL(xen_netback_dbg_root
))
2408 pr_warn("Init of debugfs returned %ld!\n",
2409 PTR_ERR(xen_netback_dbg_root
));
2410 #endif /* CONFIG_DEBUG_FS */
2418 module_init(netback_init
);
2420 static void __exit
netback_fini(void)
2422 #ifdef CONFIG_DEBUG_FS
2423 if (!IS_ERR_OR_NULL(xen_netback_dbg_root
))
2424 debugfs_remove_recursive(xen_netback_dbg_root
);
2425 #endif /* CONFIG_DEBUG_FS */
2426 xenvif_xenbus_fini();
2428 module_exit(netback_fini
);
2430 MODULE_LICENSE("Dual BSD/GPL");
2431 MODULE_ALIAS("xen-backend:vif");