2 * Back-end of the driver for virtual network devices. This portion of the
3 * driver exports a 'unified' network-device interface that can be accessed
4 * by any operating system that implements a compatible front end. A
5 * reference front-end implementation can be found in:
6 * drivers/net/xen-netfront.c
8 * Copyright (c) 2002-2005, K A Fraser
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
37 #include <linux/kthread.h>
38 #include <linux/if_vlan.h>
39 #include <linux/udp.h>
43 #include <xen/events.h>
44 #include <xen/interface/memory.h>
46 #include <asm/xen/hypercall.h>
47 #include <asm/xen/page.h>
49 struct pending_tx_info
{
50 struct xen_netif_tx_request req
;
53 typedef unsigned int pending_ring_idx_t
;
55 struct netbk_rx_meta
{
61 #define MAX_PENDING_REQS 256
63 #define MAX_BUFFER_OFFSET PAGE_SIZE
65 /* extra field used in struct page */
68 #if BITS_PER_LONG < 64
70 #define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
71 unsigned int group
:GROUP_WIDTH
;
72 unsigned int idx
:IDX_WIDTH
;
74 unsigned int group
, idx
;
82 struct task_struct
*task
;
84 struct sk_buff_head rx_queue
;
85 struct sk_buff_head tx_queue
;
87 struct timer_list net_timer
;
89 struct page
*mmap_pages
[MAX_PENDING_REQS
];
91 pending_ring_idx_t pending_prod
;
92 pending_ring_idx_t pending_cons
;
93 struct list_head net_schedule_list
;
95 /* Protect the net_schedule_list in netif. */
96 spinlock_t net_schedule_list_lock
;
98 atomic_t netfront_count
;
100 struct pending_tx_info pending_tx_info
[MAX_PENDING_REQS
];
101 struct gnttab_copy tx_copy_ops
[MAX_PENDING_REQS
];
103 u16 pending_ring
[MAX_PENDING_REQS
];
106 * Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
107 * head/fragment page uses 2 copy operations because it
108 * straddles two buffers in the frontend.
110 struct gnttab_copy grant_copy_op
[2*XEN_NETIF_RX_RING_SIZE
];
111 struct netbk_rx_meta meta
[2*XEN_NETIF_RX_RING_SIZE
];
114 static struct xen_netbk
*xen_netbk
;
115 static int xen_netbk_group_nr
;
117 void xen_netbk_add_xenvif(struct xenvif
*vif
)
120 int min_netfront_count
;
122 struct xen_netbk
*netbk
;
124 min_netfront_count
= atomic_read(&xen_netbk
[0].netfront_count
);
125 for (i
= 0; i
< xen_netbk_group_nr
; i
++) {
126 int netfront_count
= atomic_read(&xen_netbk
[i
].netfront_count
);
127 if (netfront_count
< min_netfront_count
) {
129 min_netfront_count
= netfront_count
;
133 netbk
= &xen_netbk
[min_group
];
136 atomic_inc(&netbk
->netfront_count
);
139 void xen_netbk_remove_xenvif(struct xenvif
*vif
)
141 struct xen_netbk
*netbk
= vif
->netbk
;
143 atomic_dec(&netbk
->netfront_count
);
146 static void xen_netbk_idx_release(struct xen_netbk
*netbk
, u16 pending_idx
);
147 static void make_tx_response(struct xenvif
*vif
,
148 struct xen_netif_tx_request
*txp
,
150 static struct xen_netif_rx_response
*make_rx_response(struct xenvif
*vif
,
157 static inline unsigned long idx_to_pfn(struct xen_netbk
*netbk
,
160 return page_to_pfn(netbk
->mmap_pages
[idx
]);
163 static inline unsigned long idx_to_kaddr(struct xen_netbk
*netbk
,
166 return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk
, idx
));
169 /* extra field used in struct page */
170 static inline void set_page_ext(struct page
*pg
, struct xen_netbk
*netbk
,
173 unsigned int group
= netbk
- xen_netbk
;
174 union page_ext ext
= { .e
= { .group
= group
+ 1, .idx
= idx
} };
176 BUILD_BUG_ON(sizeof(ext
) > sizeof(ext
.mapping
));
177 pg
->mapping
= ext
.mapping
;
180 static int get_page_ext(struct page
*pg
,
181 unsigned int *pgroup
, unsigned int *pidx
)
183 union page_ext ext
= { .mapping
= pg
->mapping
};
184 struct xen_netbk
*netbk
;
185 unsigned int group
, idx
;
187 group
= ext
.e
.group
- 1;
189 if (group
< 0 || group
>= xen_netbk_group_nr
)
192 netbk
= &xen_netbk
[group
];
196 if ((idx
< 0) || (idx
>= MAX_PENDING_REQS
))
199 if (netbk
->mmap_pages
[idx
] != pg
)
209 * This is the amount of packet we copy rather than map, so that the
210 * guest can't fiddle with the contents of the headers while we do
211 * packet processing on them (netfilter, routing, etc).
213 #define PKT_PROT_LEN (ETH_HLEN + \
215 sizeof(struct iphdr) + MAX_IPOPTLEN + \
216 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
218 static inline pending_ring_idx_t
pending_index(unsigned i
)
220 return i
& (MAX_PENDING_REQS
-1);
223 static inline pending_ring_idx_t
nr_pending_reqs(struct xen_netbk
*netbk
)
225 return MAX_PENDING_REQS
-
226 netbk
->pending_prod
+ netbk
->pending_cons
;
229 static void xen_netbk_kick_thread(struct xen_netbk
*netbk
)
234 static int max_required_rx_slots(struct xenvif
*vif
)
236 int max
= DIV_ROUND_UP(vif
->dev
->mtu
, PAGE_SIZE
);
238 if (vif
->can_sg
|| vif
->gso
|| vif
->gso_prefix
)
239 max
+= MAX_SKB_FRAGS
+ 1; /* extra_info + frags */
244 int xen_netbk_rx_ring_full(struct xenvif
*vif
)
246 RING_IDX peek
= vif
->rx_req_cons_peek
;
247 RING_IDX needed
= max_required_rx_slots(vif
);
249 return ((vif
->rx
.sring
->req_prod
- peek
) < needed
) ||
250 ((vif
->rx
.rsp_prod_pvt
+ XEN_NETIF_RX_RING_SIZE
- peek
) < needed
);
253 int xen_netbk_must_stop_queue(struct xenvif
*vif
)
255 if (!xen_netbk_rx_ring_full(vif
))
258 vif
->rx
.sring
->req_event
= vif
->rx_req_cons_peek
+
259 max_required_rx_slots(vif
);
260 mb(); /* request notification /then/ check the queue */
262 return xen_netbk_rx_ring_full(vif
);
266 * Returns true if we should start a new receive buffer instead of
267 * adding 'size' bytes to a buffer which currently contains 'offset'
270 static bool start_new_rx_buffer(int offset
, unsigned long size
, int head
)
272 /* simple case: we have completely filled the current buffer. */
273 if (offset
== MAX_BUFFER_OFFSET
)
277 * complex case: start a fresh buffer if the current frag
278 * would overflow the current buffer but only if:
279 * (i) this frag would fit completely in the next buffer
280 * and (ii) there is already some data in the current buffer
281 * and (iii) this is not the head buffer.
284 * - (i) stops us splitting a frag into two copies
285 * unless the frag is too large for a single buffer.
286 * - (ii) stops us from leaving a buffer pointlessly empty.
287 * - (iii) stops us leaving the first buffer
288 * empty. Strictly speaking this is already covered
289 * by (ii) but is explicitly checked because
290 * netfront relies on the first buffer being
291 * non-empty and can crash otherwise.
293 * This means we will effectively linearise small
294 * frags but do not needlessly split large buffers
295 * into multiple copies tend to give large frags their
296 * own buffers as before.
298 if ((offset
+ size
> MAX_BUFFER_OFFSET
) &&
299 (size
<= MAX_BUFFER_OFFSET
) && offset
&& !head
)
306 * Figure out how many ring slots we're going to need to send @skb to
307 * the guest. This function is essentially a dry run of
308 * netbk_gop_frag_copy.
310 unsigned int xen_netbk_count_skb_slots(struct xenvif
*vif
, struct sk_buff
*skb
)
315 count
= DIV_ROUND_UP(
316 offset_in_page(skb
->data
)+skb_headlen(skb
), PAGE_SIZE
);
318 copy_off
= skb_headlen(skb
) % PAGE_SIZE
;
320 if (skb_shinfo(skb
)->gso_size
)
323 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
324 unsigned long size
= skb_shinfo(skb
)->frags
[i
].size
;
327 BUG_ON(copy_off
> MAX_BUFFER_OFFSET
);
329 if (start_new_rx_buffer(copy_off
, size
, 0)) {
335 if (copy_off
+ bytes
> MAX_BUFFER_OFFSET
)
336 bytes
= MAX_BUFFER_OFFSET
- copy_off
;
345 struct netrx_pending_operations
{
346 unsigned copy_prod
, copy_cons
;
347 unsigned meta_prod
, meta_cons
;
348 struct gnttab_copy
*copy
;
349 struct netbk_rx_meta
*meta
;
351 grant_ref_t copy_gref
;
354 static struct netbk_rx_meta
*get_next_rx_buffer(struct xenvif
*vif
,
355 struct netrx_pending_operations
*npo
)
357 struct netbk_rx_meta
*meta
;
358 struct xen_netif_rx_request
*req
;
360 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
362 meta
= npo
->meta
+ npo
->meta_prod
++;
368 npo
->copy_gref
= req
->gref
;
374 * Set up the grant operations for this fragment. If it's a flipping
375 * interface, we also set up the unmap request from here.
377 static void netbk_gop_frag_copy(struct xenvif
*vif
, struct sk_buff
*skb
,
378 struct netrx_pending_operations
*npo
,
379 struct page
*page
, unsigned long size
,
380 unsigned long offset
, int *head
)
382 struct gnttab_copy
*copy_gop
;
383 struct netbk_rx_meta
*meta
;
385 * These variables a used iff get_page_ext returns true,
386 * in which case they are guaranteed to be initialized.
388 unsigned int uninitialized_var(group
), uninitialized_var(idx
);
389 int foreign
= get_page_ext(page
, &group
, &idx
);
392 /* Data must not cross a page boundary. */
393 BUG_ON(size
+ offset
> PAGE_SIZE
);
395 meta
= npo
->meta
+ npo
->meta_prod
- 1;
398 BUG_ON(npo
->copy_off
> MAX_BUFFER_OFFSET
);
400 if (start_new_rx_buffer(npo
->copy_off
, size
, *head
)) {
402 * Netfront requires there to be some data in the head
407 meta
= get_next_rx_buffer(vif
, npo
);
411 if (npo
->copy_off
+ bytes
> MAX_BUFFER_OFFSET
)
412 bytes
= MAX_BUFFER_OFFSET
- npo
->copy_off
;
414 copy_gop
= npo
->copy
+ npo
->copy_prod
++;
415 copy_gop
->flags
= GNTCOPY_dest_gref
;
417 struct xen_netbk
*netbk
= &xen_netbk
[group
];
418 struct pending_tx_info
*src_pend
;
420 src_pend
= &netbk
->pending_tx_info
[idx
];
422 copy_gop
->source
.domid
= src_pend
->vif
->domid
;
423 copy_gop
->source
.u
.ref
= src_pend
->req
.gref
;
424 copy_gop
->flags
|= GNTCOPY_source_gref
;
426 void *vaddr
= page_address(page
);
427 copy_gop
->source
.domid
= DOMID_SELF
;
428 copy_gop
->source
.u
.gmfn
= virt_to_mfn(vaddr
);
430 copy_gop
->source
.offset
= offset
;
431 copy_gop
->dest
.domid
= vif
->domid
;
433 copy_gop
->dest
.offset
= npo
->copy_off
;
434 copy_gop
->dest
.u
.ref
= npo
->copy_gref
;
435 copy_gop
->len
= bytes
;
437 npo
->copy_off
+= bytes
;
443 /* Leave a gap for the GSO descriptor. */
444 if (*head
&& skb_shinfo(skb
)->gso_size
&& !vif
->gso_prefix
)
447 *head
= 0; /* There must be something in this buffer now. */
453 * Prepare an SKB to be transmitted to the frontend.
455 * This function is responsible for allocating grant operations, meta
458 * It returns the number of meta structures consumed. The number of
459 * ring slots used is always equal to the number of meta slots used
460 * plus the number of GSO descriptors used. Currently, we use either
461 * zero GSO descriptors (for non-GSO packets) or one descriptor (for
462 * frontend-side LRO).
464 static int netbk_gop_skb(struct sk_buff
*skb
,
465 struct netrx_pending_operations
*npo
)
467 struct xenvif
*vif
= netdev_priv(skb
->dev
);
468 int nr_frags
= skb_shinfo(skb
)->nr_frags
;
470 struct xen_netif_rx_request
*req
;
471 struct netbk_rx_meta
*meta
;
476 old_meta_prod
= npo
->meta_prod
;
478 /* Set up a GSO prefix descriptor, if necessary */
479 if (skb_shinfo(skb
)->gso_size
&& vif
->gso_prefix
) {
480 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
481 meta
= npo
->meta
+ npo
->meta_prod
++;
482 meta
->gso_size
= skb_shinfo(skb
)->gso_size
;
487 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
488 meta
= npo
->meta
+ npo
->meta_prod
++;
490 if (!vif
->gso_prefix
)
491 meta
->gso_size
= skb_shinfo(skb
)->gso_size
;
498 npo
->copy_gref
= req
->gref
;
501 while (data
< skb_tail_pointer(skb
)) {
502 unsigned int offset
= offset_in_page(data
);
503 unsigned int len
= PAGE_SIZE
- offset
;
505 if (data
+ len
> skb_tail_pointer(skb
))
506 len
= skb_tail_pointer(skb
) - data
;
508 netbk_gop_frag_copy(vif
, skb
, npo
,
509 virt_to_page(data
), len
, offset
, &head
);
513 for (i
= 0; i
< nr_frags
; i
++) {
514 netbk_gop_frag_copy(vif
, skb
, npo
,
515 skb_shinfo(skb
)->frags
[i
].page
,
516 skb_shinfo(skb
)->frags
[i
].size
,
517 skb_shinfo(skb
)->frags
[i
].page_offset
,
521 return npo
->meta_prod
- old_meta_prod
;
525 * This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
526 * used to set up the operations on the top of
527 * netrx_pending_operations, which have since been done. Check that
528 * they didn't give any errors and advance over them.
530 static int netbk_check_gop(struct xenvif
*vif
, int nr_meta_slots
,
531 struct netrx_pending_operations
*npo
)
533 struct gnttab_copy
*copy_op
;
534 int status
= XEN_NETIF_RSP_OKAY
;
537 for (i
= 0; i
< nr_meta_slots
; i
++) {
538 copy_op
= npo
->copy
+ npo
->copy_cons
++;
539 if (copy_op
->status
!= GNTST_okay
) {
541 "Bad status %d from copy to DOM%d.\n",
542 copy_op
->status
, vif
->domid
);
543 status
= XEN_NETIF_RSP_ERROR
;
550 static void netbk_add_frag_responses(struct xenvif
*vif
, int status
,
551 struct netbk_rx_meta
*meta
,
555 unsigned long offset
;
557 /* No fragments used */
558 if (nr_meta_slots
<= 1)
563 for (i
= 0; i
< nr_meta_slots
; i
++) {
565 if (i
== nr_meta_slots
- 1)
568 flags
= XEN_NETRXF_more_data
;
571 make_rx_response(vif
, meta
[i
].id
, status
, offset
,
572 meta
[i
].size
, flags
);
576 struct skb_cb_overlay
{
580 static void xen_netbk_rx_action(struct xen_netbk
*netbk
)
582 struct xenvif
*vif
= NULL
, *tmp
;
585 struct xen_netif_rx_response
*resp
;
586 struct sk_buff_head rxq
;
592 unsigned long offset
;
593 struct skb_cb_overlay
*sco
;
595 struct netrx_pending_operations npo
= {
596 .copy
= netbk
->grant_copy_op
,
600 skb_queue_head_init(&rxq
);
604 while ((skb
= skb_dequeue(&netbk
->rx_queue
)) != NULL
) {
605 vif
= netdev_priv(skb
->dev
);
606 nr_frags
= skb_shinfo(skb
)->nr_frags
;
608 sco
= (struct skb_cb_overlay
*)skb
->cb
;
609 sco
->meta_slots_used
= netbk_gop_skb(skb
, &npo
);
611 count
+= nr_frags
+ 1;
613 __skb_queue_tail(&rxq
, skb
);
615 /* Filled the batch queue? */
616 if (count
+ MAX_SKB_FRAGS
>= XEN_NETIF_RX_RING_SIZE
)
620 BUG_ON(npo
.meta_prod
> ARRAY_SIZE(netbk
->meta
));
625 BUG_ON(npo
.copy_prod
> ARRAY_SIZE(netbk
->grant_copy_op
));
626 ret
= HYPERVISOR_grant_table_op(GNTTABOP_copy
, &netbk
->grant_copy_op
,
630 while ((skb
= __skb_dequeue(&rxq
)) != NULL
) {
631 sco
= (struct skb_cb_overlay
*)skb
->cb
;
633 vif
= netdev_priv(skb
->dev
);
635 if (netbk
->meta
[npo
.meta_cons
].gso_size
&& vif
->gso_prefix
) {
636 resp
= RING_GET_RESPONSE(&vif
->rx
,
637 vif
->rx
.rsp_prod_pvt
++);
639 resp
->flags
= XEN_NETRXF_gso_prefix
| XEN_NETRXF_more_data
;
641 resp
->offset
= netbk
->meta
[npo
.meta_cons
].gso_size
;
642 resp
->id
= netbk
->meta
[npo
.meta_cons
].id
;
643 resp
->status
= sco
->meta_slots_used
;
646 sco
->meta_slots_used
--;
650 vif
->dev
->stats
.tx_bytes
+= skb
->len
;
651 vif
->dev
->stats
.tx_packets
++;
653 status
= netbk_check_gop(vif
, sco
->meta_slots_used
, &npo
);
655 if (sco
->meta_slots_used
== 1)
658 flags
= XEN_NETRXF_more_data
;
660 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) /* local packet? */
661 flags
|= XEN_NETRXF_csum_blank
| XEN_NETRXF_data_validated
;
662 else if (skb
->ip_summed
== CHECKSUM_UNNECESSARY
)
663 /* remote but checksummed. */
664 flags
|= XEN_NETRXF_data_validated
;
667 resp
= make_rx_response(vif
, netbk
->meta
[npo
.meta_cons
].id
,
669 netbk
->meta
[npo
.meta_cons
].size
,
672 if (netbk
->meta
[npo
.meta_cons
].gso_size
&& !vif
->gso_prefix
) {
673 struct xen_netif_extra_info
*gso
=
674 (struct xen_netif_extra_info
*)
675 RING_GET_RESPONSE(&vif
->rx
,
676 vif
->rx
.rsp_prod_pvt
++);
678 resp
->flags
|= XEN_NETRXF_extra_info
;
680 gso
->u
.gso
.size
= netbk
->meta
[npo
.meta_cons
].gso_size
;
681 gso
->u
.gso
.type
= XEN_NETIF_GSO_TYPE_TCPV4
;
683 gso
->u
.gso
.features
= 0;
685 gso
->type
= XEN_NETIF_EXTRA_TYPE_GSO
;
689 netbk_add_frag_responses(vif
, status
,
690 netbk
->meta
+ npo
.meta_cons
+ 1,
691 sco
->meta_slots_used
);
693 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif
->rx
, ret
);
695 if (ret
&& list_empty(&vif
->notify_list
))
696 list_add_tail(&vif
->notify_list
, ¬ify
);
698 xenvif_notify_tx_completion(vif
);
701 npo
.meta_cons
+= sco
->meta_slots_used
;
705 list_for_each_entry_safe(vif
, tmp
, ¬ify
, notify_list
) {
706 notify_remote_via_irq(vif
->irq
);
707 list_del_init(&vif
->notify_list
);
710 /* More work to do? */
711 if (!skb_queue_empty(&netbk
->rx_queue
) &&
712 !timer_pending(&netbk
->net_timer
))
713 xen_netbk_kick_thread(netbk
);
716 void xen_netbk_queue_tx_skb(struct xenvif
*vif
, struct sk_buff
*skb
)
718 struct xen_netbk
*netbk
= vif
->netbk
;
720 skb_queue_tail(&netbk
->rx_queue
, skb
);
722 xen_netbk_kick_thread(netbk
);
725 static void xen_netbk_alarm(unsigned long data
)
727 struct xen_netbk
*netbk
= (struct xen_netbk
*)data
;
728 xen_netbk_kick_thread(netbk
);
731 static int __on_net_schedule_list(struct xenvif
*vif
)
733 return !list_empty(&vif
->schedule_list
);
736 /* Must be called with net_schedule_list_lock held */
737 static void remove_from_net_schedule_list(struct xenvif
*vif
)
739 if (likely(__on_net_schedule_list(vif
))) {
740 list_del_init(&vif
->schedule_list
);
745 static struct xenvif
*poll_net_schedule_list(struct xen_netbk
*netbk
)
747 struct xenvif
*vif
= NULL
;
749 spin_lock_irq(&netbk
->net_schedule_list_lock
);
750 if (list_empty(&netbk
->net_schedule_list
))
753 vif
= list_first_entry(&netbk
->net_schedule_list
,
754 struct xenvif
, schedule_list
);
760 remove_from_net_schedule_list(vif
);
762 spin_unlock_irq(&netbk
->net_schedule_list_lock
);
766 void xen_netbk_schedule_xenvif(struct xenvif
*vif
)
769 struct xen_netbk
*netbk
= vif
->netbk
;
771 if (__on_net_schedule_list(vif
))
774 spin_lock_irqsave(&netbk
->net_schedule_list_lock
, flags
);
775 if (!__on_net_schedule_list(vif
) &&
776 likely(xenvif_schedulable(vif
))) {
777 list_add_tail(&vif
->schedule_list
, &netbk
->net_schedule_list
);
780 spin_unlock_irqrestore(&netbk
->net_schedule_list_lock
, flags
);
784 if ((nr_pending_reqs(netbk
) < (MAX_PENDING_REQS
/2)) &&
785 !list_empty(&netbk
->net_schedule_list
))
786 xen_netbk_kick_thread(netbk
);
789 void xen_netbk_deschedule_xenvif(struct xenvif
*vif
)
791 struct xen_netbk
*netbk
= vif
->netbk
;
792 spin_lock_irq(&netbk
->net_schedule_list_lock
);
793 remove_from_net_schedule_list(vif
);
794 spin_unlock_irq(&netbk
->net_schedule_list_lock
);
797 void xen_netbk_check_rx_xenvif(struct xenvif
*vif
)
801 RING_FINAL_CHECK_FOR_REQUESTS(&vif
->tx
, more_to_do
);
804 xen_netbk_schedule_xenvif(vif
);
807 static void tx_add_credit(struct xenvif
*vif
)
809 unsigned long max_burst
, max_credit
;
812 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
813 * Otherwise the interface can seize up due to insufficient credit.
815 max_burst
= RING_GET_REQUEST(&vif
->tx
, vif
->tx
.req_cons
)->size
;
816 max_burst
= min(max_burst
, 131072UL);
817 max_burst
= max(max_burst
, vif
->credit_bytes
);
819 /* Take care that adding a new chunk of credit doesn't wrap to zero. */
820 max_credit
= vif
->remaining_credit
+ vif
->credit_bytes
;
821 if (max_credit
< vif
->remaining_credit
)
822 max_credit
= ULONG_MAX
; /* wrapped: clamp to ULONG_MAX */
824 vif
->remaining_credit
= min(max_credit
, max_burst
);
827 static void tx_credit_callback(unsigned long data
)
829 struct xenvif
*vif
= (struct xenvif
*)data
;
831 xen_netbk_check_rx_xenvif(vif
);
834 static void netbk_tx_err(struct xenvif
*vif
,
835 struct xen_netif_tx_request
*txp
, RING_IDX end
)
837 RING_IDX cons
= vif
->tx
.req_cons
;
840 make_tx_response(vif
, txp
, XEN_NETIF_RSP_ERROR
);
843 txp
= RING_GET_REQUEST(&vif
->tx
, cons
++);
845 vif
->tx
.req_cons
= cons
;
846 xen_netbk_check_rx_xenvif(vif
);
850 static int netbk_count_requests(struct xenvif
*vif
,
851 struct xen_netif_tx_request
*first
,
852 struct xen_netif_tx_request
*txp
,
855 RING_IDX cons
= vif
->tx
.req_cons
;
858 if (!(first
->flags
& XEN_NETTXF_more_data
))
862 if (frags
>= work_to_do
) {
863 netdev_dbg(vif
->dev
, "Need more frags\n");
867 if (unlikely(frags
>= MAX_SKB_FRAGS
)) {
868 netdev_dbg(vif
->dev
, "Too many frags\n");
872 memcpy(txp
, RING_GET_REQUEST(&vif
->tx
, cons
+ frags
),
874 if (txp
->size
> first
->size
) {
875 netdev_dbg(vif
->dev
, "Frags galore\n");
879 first
->size
-= txp
->size
;
882 if (unlikely((txp
->offset
+ txp
->size
) > PAGE_SIZE
)) {
883 netdev_dbg(vif
->dev
, "txp->offset: %x, size: %u\n",
884 txp
->offset
, txp
->size
);
887 } while ((txp
++)->flags
& XEN_NETTXF_more_data
);
891 static struct page
*xen_netbk_alloc_page(struct xen_netbk
*netbk
,
893 unsigned long pending_idx
)
896 page
= alloc_page(GFP_KERNEL
|__GFP_COLD
);
899 set_page_ext(page
, netbk
, pending_idx
);
900 netbk
->mmap_pages
[pending_idx
] = page
;
904 static struct gnttab_copy
*xen_netbk_get_requests(struct xen_netbk
*netbk
,
907 struct xen_netif_tx_request
*txp
,
908 struct gnttab_copy
*gop
)
910 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
911 skb_frag_t
*frags
= shinfo
->frags
;
912 unsigned long pending_idx
= *((u16
*)skb
->data
);
915 /* Skip first skb fragment if it is on same page as header fragment. */
916 start
= ((unsigned long)shinfo
->frags
[0].page
== pending_idx
);
918 for (i
= start
; i
< shinfo
->nr_frags
; i
++, txp
++) {
920 pending_ring_idx_t index
;
921 struct pending_tx_info
*pending_tx_info
=
922 netbk
->pending_tx_info
;
924 index
= pending_index(netbk
->pending_cons
++);
925 pending_idx
= netbk
->pending_ring
[index
];
926 page
= xen_netbk_alloc_page(netbk
, skb
, pending_idx
);
930 netbk
->mmap_pages
[pending_idx
] = page
;
932 gop
->source
.u
.ref
= txp
->gref
;
933 gop
->source
.domid
= vif
->domid
;
934 gop
->source
.offset
= txp
->offset
;
936 gop
->dest
.u
.gmfn
= virt_to_mfn(page_address(page
));
937 gop
->dest
.domid
= DOMID_SELF
;
938 gop
->dest
.offset
= txp
->offset
;
940 gop
->len
= txp
->size
;
941 gop
->flags
= GNTCOPY_source_gref
;
945 memcpy(&pending_tx_info
[pending_idx
].req
, txp
, sizeof(*txp
));
947 pending_tx_info
[pending_idx
].vif
= vif
;
948 frags
[i
].page
= (void *)pending_idx
;
954 static int xen_netbk_tx_check_gop(struct xen_netbk
*netbk
,
956 struct gnttab_copy
**gopp
)
958 struct gnttab_copy
*gop
= *gopp
;
959 int pending_idx
= *((u16
*)skb
->data
);
960 struct pending_tx_info
*pending_tx_info
= netbk
->pending_tx_info
;
961 struct xenvif
*vif
= pending_tx_info
[pending_idx
].vif
;
962 struct xen_netif_tx_request
*txp
;
963 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
964 int nr_frags
= shinfo
->nr_frags
;
967 /* Check status of header. */
970 pending_ring_idx_t index
;
971 index
= pending_index(netbk
->pending_prod
++);
972 txp
= &pending_tx_info
[pending_idx
].req
;
973 make_tx_response(vif
, txp
, XEN_NETIF_RSP_ERROR
);
974 netbk
->pending_ring
[index
] = pending_idx
;
978 /* Skip first skb fragment if it is on same page as header fragment. */
979 start
= ((unsigned long)shinfo
->frags
[0].page
== pending_idx
);
981 for (i
= start
; i
< nr_frags
; i
++) {
983 pending_ring_idx_t index
;
985 pending_idx
= (unsigned long)shinfo
->frags
[i
].page
;
987 /* Check error status: if okay then remember grant handle. */
988 newerr
= (++gop
)->status
;
989 if (likely(!newerr
)) {
990 /* Had a previous error? Invalidate this fragment. */
992 xen_netbk_idx_release(netbk
, pending_idx
);
996 /* Error on this fragment: respond to client with an error. */
997 txp
= &netbk
->pending_tx_info
[pending_idx
].req
;
998 make_tx_response(vif
, txp
, XEN_NETIF_RSP_ERROR
);
999 index
= pending_index(netbk
->pending_prod
++);
1000 netbk
->pending_ring
[index
] = pending_idx
;
1003 /* Not the first error? Preceding frags already invalidated. */
1007 /* First error: invalidate header and preceding fragments. */
1008 pending_idx
= *((u16
*)skb
->data
);
1009 xen_netbk_idx_release(netbk
, pending_idx
);
1010 for (j
= start
; j
< i
; j
++) {
1011 pending_idx
= (unsigned long)shinfo
->frags
[i
].page
;
1012 xen_netbk_idx_release(netbk
, pending_idx
);
1015 /* Remember the error: invalidate all subsequent fragments. */
1023 static void xen_netbk_fill_frags(struct xen_netbk
*netbk
, struct sk_buff
*skb
)
1025 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
1026 int nr_frags
= shinfo
->nr_frags
;
1029 for (i
= 0; i
< nr_frags
; i
++) {
1030 skb_frag_t
*frag
= shinfo
->frags
+ i
;
1031 struct xen_netif_tx_request
*txp
;
1032 unsigned long pending_idx
;
1034 pending_idx
= (unsigned long)frag
->page
;
1036 txp
= &netbk
->pending_tx_info
[pending_idx
].req
;
1037 frag
->page
= virt_to_page(idx_to_kaddr(netbk
, pending_idx
));
1038 frag
->size
= txp
->size
;
1039 frag
->page_offset
= txp
->offset
;
1041 skb
->len
+= txp
->size
;
1042 skb
->data_len
+= txp
->size
;
1043 skb
->truesize
+= txp
->size
;
1045 /* Take an extra reference to offset xen_netbk_idx_release */
1046 get_page(netbk
->mmap_pages
[pending_idx
]);
1047 xen_netbk_idx_release(netbk
, pending_idx
);
1051 static int xen_netbk_get_extras(struct xenvif
*vif
,
1052 struct xen_netif_extra_info
*extras
,
1055 struct xen_netif_extra_info extra
;
1056 RING_IDX cons
= vif
->tx
.req_cons
;
1059 if (unlikely(work_to_do
-- <= 0)) {
1060 netdev_dbg(vif
->dev
, "Missing extra info\n");
1064 memcpy(&extra
, RING_GET_REQUEST(&vif
->tx
, cons
),
1066 if (unlikely(!extra
.type
||
1067 extra
.type
>= XEN_NETIF_EXTRA_TYPE_MAX
)) {
1068 vif
->tx
.req_cons
= ++cons
;
1069 netdev_dbg(vif
->dev
,
1070 "Invalid extra type: %d\n", extra
.type
);
1074 memcpy(&extras
[extra
.type
- 1], &extra
, sizeof(extra
));
1075 vif
->tx
.req_cons
= ++cons
;
1076 } while (extra
.flags
& XEN_NETIF_EXTRA_FLAG_MORE
);
1081 static int netbk_set_skb_gso(struct xenvif
*vif
,
1082 struct sk_buff
*skb
,
1083 struct xen_netif_extra_info
*gso
)
1085 if (!gso
->u
.gso
.size
) {
1086 netdev_dbg(vif
->dev
, "GSO size must not be zero.\n");
1090 /* Currently only TCPv4 S.O. is supported. */
1091 if (gso
->u
.gso
.type
!= XEN_NETIF_GSO_TYPE_TCPV4
) {
1092 netdev_dbg(vif
->dev
, "Bad GSO type %d.\n", gso
->u
.gso
.type
);
1096 skb_shinfo(skb
)->gso_size
= gso
->u
.gso
.size
;
1097 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV4
;
1099 /* Header must be checked, and gso_segs computed. */
1100 skb_shinfo(skb
)->gso_type
|= SKB_GSO_DODGY
;
1101 skb_shinfo(skb
)->gso_segs
= 0;
1106 static int checksum_setup(struct xenvif
*vif
, struct sk_buff
*skb
)
1111 int recalculate_partial_csum
= 0;
1114 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1115 * peers can fail to set NETRXF_csum_blank when sending a GSO
1116 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1117 * recalculate the partial checksum.
1119 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
&& skb_is_gso(skb
)) {
1120 vif
->rx_gso_checksum_fixup
++;
1121 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1122 recalculate_partial_csum
= 1;
1125 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1126 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
)
1129 if (skb
->protocol
!= htons(ETH_P_IP
))
1132 iph
= (void *)skb
->data
;
1133 th
= skb
->data
+ 4 * iph
->ihl
;
1134 if (th
>= skb_tail_pointer(skb
))
1137 skb
->csum_start
= th
- skb
->head
;
1138 switch (iph
->protocol
) {
1140 skb
->csum_offset
= offsetof(struct tcphdr
, check
);
1142 if (recalculate_partial_csum
) {
1143 struct tcphdr
*tcph
= (struct tcphdr
*)th
;
1144 tcph
->check
= ~csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
1145 skb
->len
- iph
->ihl
*4,
1150 skb
->csum_offset
= offsetof(struct udphdr
, check
);
1152 if (recalculate_partial_csum
) {
1153 struct udphdr
*udph
= (struct udphdr
*)th
;
1154 udph
->check
= ~csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
1155 skb
->len
- iph
->ihl
*4,
1160 if (net_ratelimit())
1161 netdev_err(vif
->dev
,
1162 "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
1167 if ((th
+ skb
->csum_offset
+ 2) > skb_tail_pointer(skb
))
1176 static bool tx_credit_exceeded(struct xenvif
*vif
, unsigned size
)
1178 unsigned long now
= jiffies
;
1179 unsigned long next_credit
=
1180 vif
->credit_timeout
.expires
+
1181 msecs_to_jiffies(vif
->credit_usec
/ 1000);
1183 /* Timer could already be pending in rare cases. */
1184 if (timer_pending(&vif
->credit_timeout
))
1187 /* Passed the point where we can replenish credit? */
1188 if (time_after_eq(now
, next_credit
)) {
1189 vif
->credit_timeout
.expires
= now
;
1193 /* Still too big to send right now? Set a callback. */
1194 if (size
> vif
->remaining_credit
) {
1195 vif
->credit_timeout
.data
=
1197 vif
->credit_timeout
.function
=
1199 mod_timer(&vif
->credit_timeout
,
1208 static unsigned xen_netbk_tx_build_gops(struct xen_netbk
*netbk
)
1210 struct gnttab_copy
*gop
= netbk
->tx_copy_ops
, *request_gop
;
1211 struct sk_buff
*skb
;
1214 while (((nr_pending_reqs(netbk
) + MAX_SKB_FRAGS
) < MAX_PENDING_REQS
) &&
1215 !list_empty(&netbk
->net_schedule_list
)) {
1217 struct xen_netif_tx_request txreq
;
1218 struct xen_netif_tx_request txfrags
[MAX_SKB_FRAGS
];
1220 struct xen_netif_extra_info extras
[XEN_NETIF_EXTRA_TYPE_MAX
-1];
1224 unsigned int data_len
;
1225 pending_ring_idx_t index
;
1227 /* Get a netif from the list with work to do. */
1228 vif
= poll_net_schedule_list(netbk
);
1232 RING_FINAL_CHECK_FOR_REQUESTS(&vif
->tx
, work_to_do
);
1238 idx
= vif
->tx
.req_cons
;
1239 rmb(); /* Ensure that we see the request before we copy it. */
1240 memcpy(&txreq
, RING_GET_REQUEST(&vif
->tx
, idx
), sizeof(txreq
));
1242 /* Credit-based scheduling. */
1243 if (txreq
.size
> vif
->remaining_credit
&&
1244 tx_credit_exceeded(vif
, txreq
.size
)) {
1249 vif
->remaining_credit
-= txreq
.size
;
1252 vif
->tx
.req_cons
= ++idx
;
1254 memset(extras
, 0, sizeof(extras
));
1255 if (txreq
.flags
& XEN_NETTXF_extra_info
) {
1256 work_to_do
= xen_netbk_get_extras(vif
, extras
,
1258 idx
= vif
->tx
.req_cons
;
1259 if (unlikely(work_to_do
< 0)) {
1260 netbk_tx_err(vif
, &txreq
, idx
);
1265 ret
= netbk_count_requests(vif
, &txreq
, txfrags
, work_to_do
);
1266 if (unlikely(ret
< 0)) {
1267 netbk_tx_err(vif
, &txreq
, idx
- ret
);
1272 if (unlikely(txreq
.size
< ETH_HLEN
)) {
1273 netdev_dbg(vif
->dev
,
1274 "Bad packet size: %d\n", txreq
.size
);
1275 netbk_tx_err(vif
, &txreq
, idx
);
1279 /* No crossing a page as the payload mustn't fragment. */
1280 if (unlikely((txreq
.offset
+ txreq
.size
) > PAGE_SIZE
)) {
1281 netdev_dbg(vif
->dev
,
1282 "txreq.offset: %x, size: %u, end: %lu\n",
1283 txreq
.offset
, txreq
.size
,
1284 (txreq
.offset
&~PAGE_MASK
) + txreq
.size
);
1285 netbk_tx_err(vif
, &txreq
, idx
);
1289 index
= pending_index(netbk
->pending_cons
);
1290 pending_idx
= netbk
->pending_ring
[index
];
1292 data_len
= (txreq
.size
> PKT_PROT_LEN
&&
1293 ret
< MAX_SKB_FRAGS
) ?
1294 PKT_PROT_LEN
: txreq
.size
;
1296 skb
= alloc_skb(data_len
+ NET_SKB_PAD
+ NET_IP_ALIGN
,
1297 GFP_ATOMIC
| __GFP_NOWARN
);
1298 if (unlikely(skb
== NULL
)) {
1299 netdev_dbg(vif
->dev
,
1300 "Can't allocate a skb in start_xmit.\n");
1301 netbk_tx_err(vif
, &txreq
, idx
);
1305 /* Packets passed to netif_rx() must have some headroom. */
1306 skb_reserve(skb
, NET_SKB_PAD
+ NET_IP_ALIGN
);
1308 if (extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1].type
) {
1309 struct xen_netif_extra_info
*gso
;
1310 gso
= &extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1];
1312 if (netbk_set_skb_gso(vif
, skb
, gso
)) {
1314 netbk_tx_err(vif
, &txreq
, idx
);
1319 /* XXX could copy straight to head */
1320 page
= xen_netbk_alloc_page(netbk
, skb
, pending_idx
);
1323 netbk_tx_err(vif
, &txreq
, idx
);
1327 netbk
->mmap_pages
[pending_idx
] = page
;
1329 gop
->source
.u
.ref
= txreq
.gref
;
1330 gop
->source
.domid
= vif
->domid
;
1331 gop
->source
.offset
= txreq
.offset
;
1333 gop
->dest
.u
.gmfn
= virt_to_mfn(page_address(page
));
1334 gop
->dest
.domid
= DOMID_SELF
;
1335 gop
->dest
.offset
= txreq
.offset
;
1337 gop
->len
= txreq
.size
;
1338 gop
->flags
= GNTCOPY_source_gref
;
1342 memcpy(&netbk
->pending_tx_info
[pending_idx
].req
,
1343 &txreq
, sizeof(txreq
));
1344 netbk
->pending_tx_info
[pending_idx
].vif
= vif
;
1345 *((u16
*)skb
->data
) = pending_idx
;
1347 __skb_put(skb
, data_len
);
1349 skb_shinfo(skb
)->nr_frags
= ret
;
1350 if (data_len
< txreq
.size
) {
1351 skb_shinfo(skb
)->nr_frags
++;
1352 skb_shinfo(skb
)->frags
[0].page
=
1353 (void *)(unsigned long)pending_idx
;
1355 /* Discriminate from any valid pending_idx value. */
1356 skb_shinfo(skb
)->frags
[0].page
= (void *)~0UL;
1359 __skb_queue_tail(&netbk
->tx_queue
, skb
);
1361 netbk
->pending_cons
++;
1363 request_gop
= xen_netbk_get_requests(netbk
, vif
,
1365 if (request_gop
== NULL
) {
1367 netbk_tx_err(vif
, &txreq
, idx
);
1372 vif
->tx
.req_cons
= idx
;
1373 xen_netbk_check_rx_xenvif(vif
);
1375 if ((gop
-netbk
->tx_copy_ops
) >= ARRAY_SIZE(netbk
->tx_copy_ops
))
1379 return gop
- netbk
->tx_copy_ops
;
1382 static void xen_netbk_tx_submit(struct xen_netbk
*netbk
)
1384 struct gnttab_copy
*gop
= netbk
->tx_copy_ops
;
1385 struct sk_buff
*skb
;
1387 while ((skb
= __skb_dequeue(&netbk
->tx_queue
)) != NULL
) {
1388 struct xen_netif_tx_request
*txp
;
1393 pending_idx
= *((u16
*)skb
->data
);
1394 vif
= netbk
->pending_tx_info
[pending_idx
].vif
;
1395 txp
= &netbk
->pending_tx_info
[pending_idx
].req
;
1397 /* Check the remap error code. */
1398 if (unlikely(xen_netbk_tx_check_gop(netbk
, skb
, &gop
))) {
1399 netdev_dbg(vif
->dev
, "netback grant failed.\n");
1400 skb_shinfo(skb
)->nr_frags
= 0;
1405 data_len
= skb
->len
;
1407 (void *)(idx_to_kaddr(netbk
, pending_idx
)|txp
->offset
),
1409 if (data_len
< txp
->size
) {
1410 /* Append the packet payload as a fragment. */
1411 txp
->offset
+= data_len
;
1412 txp
->size
-= data_len
;
1414 /* Schedule a response immediately. */
1415 xen_netbk_idx_release(netbk
, pending_idx
);
1418 if (txp
->flags
& XEN_NETTXF_csum_blank
)
1419 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1420 else if (txp
->flags
& XEN_NETTXF_data_validated
)
1421 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1423 xen_netbk_fill_frags(netbk
, skb
);
1426 * If the initial fragment was < PKT_PROT_LEN then
1427 * pull through some bytes from the other fragments to
1428 * increase the linear region to PKT_PROT_LEN bytes.
1430 if (skb_headlen(skb
) < PKT_PROT_LEN
&& skb_is_nonlinear(skb
)) {
1431 int target
= min_t(int, skb
->len
, PKT_PROT_LEN
);
1432 __pskb_pull_tail(skb
, target
- skb_headlen(skb
));
1435 skb
->dev
= vif
->dev
;
1436 skb
->protocol
= eth_type_trans(skb
, skb
->dev
);
1438 if (checksum_setup(vif
, skb
)) {
1439 netdev_dbg(vif
->dev
,
1440 "Can't setup checksum in net_tx_action\n");
1445 vif
->dev
->stats
.rx_bytes
+= skb
->len
;
1446 vif
->dev
->stats
.rx_packets
++;
1448 xenvif_receive_skb(vif
, skb
);
1452 /* Called after netfront has transmitted */
1453 static void xen_netbk_tx_action(struct xen_netbk
*netbk
)
1458 nr_gops
= xen_netbk_tx_build_gops(netbk
);
1462 ret
= HYPERVISOR_grant_table_op(GNTTABOP_copy
,
1463 netbk
->tx_copy_ops
, nr_gops
);
1466 xen_netbk_tx_submit(netbk
);
1470 static void xen_netbk_idx_release(struct xen_netbk
*netbk
, u16 pending_idx
)
1473 struct pending_tx_info
*pending_tx_info
;
1474 pending_ring_idx_t index
;
1476 /* Already complete? */
1477 if (netbk
->mmap_pages
[pending_idx
] == NULL
)
1480 pending_tx_info
= &netbk
->pending_tx_info
[pending_idx
];
1482 vif
= pending_tx_info
->vif
;
1484 make_tx_response(vif
, &pending_tx_info
->req
, XEN_NETIF_RSP_OKAY
);
1486 index
= pending_index(netbk
->pending_prod
++);
1487 netbk
->pending_ring
[index
] = pending_idx
;
1491 netbk
->mmap_pages
[pending_idx
]->mapping
= 0;
1492 put_page(netbk
->mmap_pages
[pending_idx
]);
1493 netbk
->mmap_pages
[pending_idx
] = NULL
;
1496 static void make_tx_response(struct xenvif
*vif
,
1497 struct xen_netif_tx_request
*txp
,
1500 RING_IDX i
= vif
->tx
.rsp_prod_pvt
;
1501 struct xen_netif_tx_response
*resp
;
1504 resp
= RING_GET_RESPONSE(&vif
->tx
, i
);
1508 if (txp
->flags
& XEN_NETTXF_extra_info
)
1509 RING_GET_RESPONSE(&vif
->tx
, ++i
)->status
= XEN_NETIF_RSP_NULL
;
1511 vif
->tx
.rsp_prod_pvt
= ++i
;
1512 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif
->tx
, notify
);
1514 notify_remote_via_irq(vif
->irq
);
1517 static struct xen_netif_rx_response
*make_rx_response(struct xenvif
*vif
,
1524 RING_IDX i
= vif
->rx
.rsp_prod_pvt
;
1525 struct xen_netif_rx_response
*resp
;
1527 resp
= RING_GET_RESPONSE(&vif
->rx
, i
);
1528 resp
->offset
= offset
;
1529 resp
->flags
= flags
;
1531 resp
->status
= (s16
)size
;
1533 resp
->status
= (s16
)st
;
1535 vif
->rx
.rsp_prod_pvt
= ++i
;
1540 static inline int rx_work_todo(struct xen_netbk
*netbk
)
1542 return !skb_queue_empty(&netbk
->rx_queue
);
1545 static inline int tx_work_todo(struct xen_netbk
*netbk
)
1548 if (((nr_pending_reqs(netbk
) + MAX_SKB_FRAGS
) < MAX_PENDING_REQS
) &&
1549 !list_empty(&netbk
->net_schedule_list
))
1555 static int xen_netbk_kthread(void *data
)
1557 struct xen_netbk
*netbk
= data
;
1558 while (!kthread_should_stop()) {
1559 wait_event_interruptible(netbk
->wq
,
1560 rx_work_todo(netbk
) ||
1561 tx_work_todo(netbk
) ||
1562 kthread_should_stop());
1565 if (kthread_should_stop())
1568 if (rx_work_todo(netbk
))
1569 xen_netbk_rx_action(netbk
);
1571 if (tx_work_todo(netbk
))
1572 xen_netbk_tx_action(netbk
);
1578 void xen_netbk_unmap_frontend_rings(struct xenvif
*vif
)
1580 struct gnttab_unmap_grant_ref op
;
1582 if (vif
->tx
.sring
) {
1583 gnttab_set_unmap_op(&op
, (unsigned long)vif
->tx_comms_area
->addr
,
1584 GNTMAP_host_map
, vif
->tx_shmem_handle
);
1586 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref
, &op
, 1))
1590 if (vif
->rx
.sring
) {
1591 gnttab_set_unmap_op(&op
, (unsigned long)vif
->rx_comms_area
->addr
,
1592 GNTMAP_host_map
, vif
->rx_shmem_handle
);
1594 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref
, &op
, 1))
1597 if (vif
->rx_comms_area
)
1598 free_vm_area(vif
->rx_comms_area
);
1599 if (vif
->tx_comms_area
)
1600 free_vm_area(vif
->tx_comms_area
);
1603 int xen_netbk_map_frontend_rings(struct xenvif
*vif
,
1604 grant_ref_t tx_ring_ref
,
1605 grant_ref_t rx_ring_ref
)
1607 struct gnttab_map_grant_ref op
;
1608 struct xen_netif_tx_sring
*txs
;
1609 struct xen_netif_rx_sring
*rxs
;
1613 vif
->tx_comms_area
= alloc_vm_area(PAGE_SIZE
);
1614 if (vif
->tx_comms_area
== NULL
)
1617 vif
->rx_comms_area
= alloc_vm_area(PAGE_SIZE
);
1618 if (vif
->rx_comms_area
== NULL
)
1621 gnttab_set_map_op(&op
, (unsigned long)vif
->tx_comms_area
->addr
,
1622 GNTMAP_host_map
, tx_ring_ref
, vif
->domid
);
1624 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref
, &op
, 1))
1628 netdev_warn(vif
->dev
,
1629 "failed to map tx ring. err=%d status=%d\n",
1635 vif
->tx_shmem_ref
= tx_ring_ref
;
1636 vif
->tx_shmem_handle
= op
.handle
;
1638 txs
= (struct xen_netif_tx_sring
*)vif
->tx_comms_area
->addr
;
1639 BACK_RING_INIT(&vif
->tx
, txs
, PAGE_SIZE
);
1641 gnttab_set_map_op(&op
, (unsigned long)vif
->rx_comms_area
->addr
,
1642 GNTMAP_host_map
, rx_ring_ref
, vif
->domid
);
1644 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref
, &op
, 1))
1648 netdev_warn(vif
->dev
,
1649 "failed to map rx ring. err=%d status=%d\n",
1655 vif
->rx_shmem_ref
= rx_ring_ref
;
1656 vif
->rx_shmem_handle
= op
.handle
;
1657 vif
->rx_req_cons_peek
= 0;
1659 rxs
= (struct xen_netif_rx_sring
*)vif
->rx_comms_area
->addr
;
1660 BACK_RING_INIT(&vif
->rx
, rxs
, PAGE_SIZE
);
1665 xen_netbk_unmap_frontend_rings(vif
);
1669 static int __init
netback_init(void)
1675 if (!xen_pv_domain())
1678 xen_netbk_group_nr
= num_online_cpus();
1679 xen_netbk
= vzalloc(sizeof(struct xen_netbk
) * xen_netbk_group_nr
);
1681 printk(KERN_ALERT
"%s: out of memory\n", __func__
);
1685 for (group
= 0; group
< xen_netbk_group_nr
; group
++) {
1686 struct xen_netbk
*netbk
= &xen_netbk
[group
];
1687 skb_queue_head_init(&netbk
->rx_queue
);
1688 skb_queue_head_init(&netbk
->tx_queue
);
1690 init_timer(&netbk
->net_timer
);
1691 netbk
->net_timer
.data
= (unsigned long)netbk
;
1692 netbk
->net_timer
.function
= xen_netbk_alarm
;
1694 netbk
->pending_cons
= 0;
1695 netbk
->pending_prod
= MAX_PENDING_REQS
;
1696 for (i
= 0; i
< MAX_PENDING_REQS
; i
++)
1697 netbk
->pending_ring
[i
] = i
;
1699 init_waitqueue_head(&netbk
->wq
);
1700 netbk
->task
= kthread_create(xen_netbk_kthread
,
1702 "netback/%u", group
);
1704 if (IS_ERR(netbk
->task
)) {
1705 printk(KERN_ALERT
"kthread_run() fails at netback\n");
1706 del_timer(&netbk
->net_timer
);
1707 rc
= PTR_ERR(netbk
->task
);
1711 kthread_bind(netbk
->task
, group
);
1713 INIT_LIST_HEAD(&netbk
->net_schedule_list
);
1715 spin_lock_init(&netbk
->net_schedule_list_lock
);
1717 atomic_set(&netbk
->netfront_count
, 0);
1719 wake_up_process(netbk
->task
);
1722 rc
= xenvif_xenbus_init();
1729 while (--group
>= 0) {
1730 struct xen_netbk
*netbk
= &xen_netbk
[group
];
1731 for (i
= 0; i
< MAX_PENDING_REQS
; i
++) {
1732 if (netbk
->mmap_pages
[i
])
1733 __free_page(netbk
->mmap_pages
[i
]);
1735 del_timer(&netbk
->net_timer
);
1736 kthread_stop(netbk
->task
);
1743 module_init(netback_init
);
1745 MODULE_LICENSE("Dual BSD/GPL");
1746 MODULE_ALIAS("xen-backend:vif");