2 * Copyright (c) 2016 Citrix Systems Inc.
3 * Copyright (c) 2002-2005, K A Fraser
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License version 2
7 * as published by the Free Software Foundation; or, when distributed
8 * separately from the Linux kernel or incorporated into other
9 * software packages, subject to the following license:
11 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this source file (the "Software"), to deal in the Software without
13 * restriction, including without limitation the rights to use, copy, modify,
14 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
15 * and to permit persons to whom the Software is furnished to do so, subject to
16 * the following conditions:
18 * The above copyright notice and this permission notice shall be included in
19 * all copies or substantial portions of the Software.
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 #include <linux/kthread.h>
34 #include <xen/events.h>
36 static bool xenvif_rx_ring_slots_available(struct xenvif_queue
*queue
)
42 skb
= skb_peek(&queue
->rx_queue
);
46 needed
= DIV_ROUND_UP(skb
->len
, XEN_PAGE_SIZE
);
53 prod
= queue
->rx
.sring
->req_prod
;
54 cons
= queue
->rx
.req_cons
;
56 if (prod
- cons
>= needed
)
59 queue
->rx
.sring
->req_event
= prod
+ 1;
61 /* Make sure event is visible before we check prod
65 } while (queue
->rx
.sring
->req_prod
!= prod
);
70 void xenvif_rx_queue_tail(struct xenvif_queue
*queue
, struct sk_buff
*skb
)
74 spin_lock_irqsave(&queue
->rx_queue
.lock
, flags
);
76 __skb_queue_tail(&queue
->rx_queue
, skb
);
78 queue
->rx_queue_len
+= skb
->len
;
79 if (queue
->rx_queue_len
> queue
->rx_queue_max
) {
80 struct net_device
*dev
= queue
->vif
->dev
;
82 netif_tx_stop_queue(netdev_get_tx_queue(dev
, queue
->id
));
85 spin_unlock_irqrestore(&queue
->rx_queue
.lock
, flags
);
88 static struct sk_buff
*xenvif_rx_dequeue(struct xenvif_queue
*queue
)
92 spin_lock_irq(&queue
->rx_queue
.lock
);
94 skb
= __skb_dequeue(&queue
->rx_queue
);
96 queue
->rx_queue_len
-= skb
->len
;
97 if (queue
->rx_queue_len
< queue
->rx_queue_max
) {
98 struct netdev_queue
*txq
;
100 txq
= netdev_get_tx_queue(queue
->vif
->dev
, queue
->id
);
101 netif_tx_wake_queue(txq
);
105 spin_unlock_irq(&queue
->rx_queue
.lock
);
110 static void xenvif_rx_queue_purge(struct xenvif_queue
*queue
)
114 while ((skb
= xenvif_rx_dequeue(queue
)) != NULL
)
118 static void xenvif_rx_queue_drop_expired(struct xenvif_queue
*queue
)
123 skb
= skb_peek(&queue
->rx_queue
);
126 if (time_before(jiffies
, XENVIF_RX_CB(skb
)->expires
))
128 xenvif_rx_dequeue(queue
);
133 static void xenvif_rx_copy_flush(struct xenvif_queue
*queue
)
138 gnttab_batch_copy(queue
->rx_copy
.op
, queue
->rx_copy
.num
);
140 for (i
= 0; i
< queue
->rx_copy
.num
; i
++) {
141 struct gnttab_copy
*op
;
143 op
= &queue
->rx_copy
.op
[i
];
145 /* If the copy failed, overwrite the status field in
146 * the corresponding response.
148 if (unlikely(op
->status
!= GNTST_okay
)) {
149 struct xen_netif_rx_response
*rsp
;
151 rsp
= RING_GET_RESPONSE(&queue
->rx
,
152 queue
->rx_copy
.idx
[i
]);
153 rsp
->status
= op
->status
;
157 queue
->rx_copy
.num
= 0;
159 /* Push responses for all completed packets. */
160 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue
->rx
, notify
);
162 notify_remote_via_irq(queue
->rx_irq
);
164 __skb_queue_purge(queue
->rx_copy
.completed
);
167 static void xenvif_rx_copy_add(struct xenvif_queue
*queue
,
168 struct xen_netif_rx_request
*req
,
169 unsigned int offset
, void *data
, size_t len
)
171 struct gnttab_copy
*op
;
173 struct xen_page_foreign
*foreign
;
175 if (queue
->rx_copy
.num
== COPY_BATCH_SIZE
)
176 xenvif_rx_copy_flush(queue
);
178 op
= &queue
->rx_copy
.op
[queue
->rx_copy
.num
];
180 page
= virt_to_page(data
);
182 op
->flags
= GNTCOPY_dest_gref
;
184 foreign
= xen_page_foreign(page
);
186 op
->source
.domid
= foreign
->domid
;
187 op
->source
.u
.ref
= foreign
->gref
;
188 op
->flags
|= GNTCOPY_source_gref
;
190 op
->source
.u
.gmfn
= virt_to_gfn(data
);
191 op
->source
.domid
= DOMID_SELF
;
194 op
->source
.offset
= xen_offset_in_page(data
);
195 op
->dest
.u
.ref
= req
->gref
;
196 op
->dest
.domid
= queue
->vif
->domid
;
197 op
->dest
.offset
= offset
;
200 queue
->rx_copy
.idx
[queue
->rx_copy
.num
] = queue
->rx
.req_cons
;
201 queue
->rx_copy
.num
++;
204 static unsigned int xenvif_gso_type(struct sk_buff
*skb
)
206 if (skb_is_gso(skb
)) {
207 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
)
208 return XEN_NETIF_GSO_TYPE_TCPV4
;
210 return XEN_NETIF_GSO_TYPE_TCPV6
;
212 return XEN_NETIF_GSO_TYPE_NONE
;
215 struct xenvif_pkt_state
{
217 size_t remaining_len
;
218 struct sk_buff
*frag_iter
;
219 int frag
; /* frag == -1 => frag_iter->head */
220 unsigned int frag_offset
;
221 struct xen_netif_extra_info extras
[XEN_NETIF_EXTRA_TYPE_MAX
- 1];
222 unsigned int extra_count
;
226 static void xenvif_rx_next_skb(struct xenvif_queue
*queue
,
227 struct xenvif_pkt_state
*pkt
)
230 unsigned int gso_type
;
232 skb
= xenvif_rx_dequeue(queue
);
234 queue
->stats
.tx_bytes
+= skb
->len
;
235 queue
->stats
.tx_packets
++;
237 /* Reset packet state. */
238 memset(pkt
, 0, sizeof(struct xenvif_pkt_state
));
241 pkt
->frag_iter
= skb
;
242 pkt
->remaining_len
= skb
->len
;
245 gso_type
= xenvif_gso_type(skb
);
246 if ((1 << gso_type
) & queue
->vif
->gso_mask
) {
247 struct xen_netif_extra_info
*extra
;
249 extra
= &pkt
->extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1];
251 extra
->u
.gso
.type
= gso_type
;
252 extra
->u
.gso
.size
= skb_shinfo(skb
)->gso_size
;
253 extra
->u
.gso
.pad
= 0;
254 extra
->u
.gso
.features
= 0;
255 extra
->type
= XEN_NETIF_EXTRA_TYPE_GSO
;
262 struct xen_netif_extra_info
*extra
;
264 extra
= &pkt
->extras
[XEN_NETIF_EXTRA_TYPE_HASH
- 1];
266 extra
->u
.hash
.algorithm
=
267 XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ
;
271 skb
->protocol
== htons(ETH_P_IP
) ?
272 _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP
:
273 _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP
;
276 skb
->protocol
== htons(ETH_P_IP
) ?
277 _XEN_NETIF_CTRL_HASH_TYPE_IPV4
:
278 _XEN_NETIF_CTRL_HASH_TYPE_IPV6
;
280 *(uint32_t *)extra
->u
.hash
.value
= skb_get_hash_raw(skb
);
282 extra
->type
= XEN_NETIF_EXTRA_TYPE_HASH
;
289 static void xenvif_rx_complete(struct xenvif_queue
*queue
,
290 struct xenvif_pkt_state
*pkt
)
292 /* All responses are ready to be pushed. */
293 queue
->rx
.rsp_prod_pvt
= queue
->rx
.req_cons
;
295 __skb_queue_tail(queue
->rx_copy
.completed
, pkt
->skb
);
298 static void xenvif_rx_next_frag(struct xenvif_pkt_state
*pkt
)
300 struct sk_buff
*frag_iter
= pkt
->frag_iter
;
301 unsigned int nr_frags
= skb_shinfo(frag_iter
)->nr_frags
;
304 pkt
->frag_offset
= 0;
306 if (pkt
->frag
>= nr_frags
) {
307 if (frag_iter
== pkt
->skb
)
308 pkt
->frag_iter
= skb_shinfo(frag_iter
)->frag_list
;
310 pkt
->frag_iter
= frag_iter
->next
;
316 static void xenvif_rx_next_chunk(struct xenvif_queue
*queue
,
317 struct xenvif_pkt_state
*pkt
,
318 unsigned int offset
, void **data
,
321 struct sk_buff
*frag_iter
= pkt
->frag_iter
;
323 size_t frag_len
, chunk_len
;
327 if (pkt
->frag
== -1) {
328 frag_data
= frag_iter
->data
;
329 frag_len
= skb_headlen(frag_iter
);
331 skb_frag_t
*frag
= &skb_shinfo(frag_iter
)->frags
[pkt
->frag
];
333 frag_data
= skb_frag_address(frag
);
334 frag_len
= skb_frag_size(frag
);
337 frag_data
+= pkt
->frag_offset
;
338 frag_len
-= pkt
->frag_offset
;
340 chunk_len
= min_t(size_t, frag_len
, XEN_PAGE_SIZE
- offset
);
341 chunk_len
= min_t(size_t, chunk_len
, XEN_PAGE_SIZE
-
342 xen_offset_in_page(frag_data
));
344 pkt
->frag_offset
+= chunk_len
;
346 /* Advance to next frag? */
347 if (frag_len
== chunk_len
)
348 xenvif_rx_next_frag(pkt
);
354 static void xenvif_rx_data_slot(struct xenvif_queue
*queue
,
355 struct xenvif_pkt_state
*pkt
,
356 struct xen_netif_rx_request
*req
,
357 struct xen_netif_rx_response
*rsp
)
359 unsigned int offset
= 0;
366 xenvif_rx_next_chunk(queue
, pkt
, offset
, &data
, &len
);
367 xenvif_rx_copy_add(queue
, req
, offset
, data
, len
);
370 pkt
->remaining_len
-= len
;
372 } while (offset
< XEN_PAGE_SIZE
&& pkt
->remaining_len
> 0);
374 if (pkt
->remaining_len
> 0)
375 flags
= XEN_NETRXF_more_data
;
379 if (pkt
->slot
== 0) {
380 struct sk_buff
*skb
= pkt
->skb
;
382 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
383 flags
|= XEN_NETRXF_csum_blank
|
384 XEN_NETRXF_data_validated
;
385 else if (skb
->ip_summed
== CHECKSUM_UNNECESSARY
)
386 flags
|= XEN_NETRXF_data_validated
;
388 if (pkt
->extra_count
!= 0)
389 flags
|= XEN_NETRXF_extra_info
;
395 rsp
->status
= (s16
)offset
;
398 static void xenvif_rx_extra_slot(struct xenvif_queue
*queue
,
399 struct xenvif_pkt_state
*pkt
,
400 struct xen_netif_rx_request
*req
,
401 struct xen_netif_rx_response
*rsp
)
403 struct xen_netif_extra_info
*extra
= (void *)rsp
;
408 for (i
= 0; i
< ARRAY_SIZE(pkt
->extras
); i
++) {
409 if (pkt
->extras
[i
].type
) {
410 *extra
= pkt
->extras
[i
];
412 if (pkt
->extra_count
!= 0)
413 extra
->flags
|= XEN_NETIF_EXTRA_FLAG_MORE
;
415 pkt
->extras
[i
].type
= 0;
422 void xenvif_rx_skb(struct xenvif_queue
*queue
)
424 struct xenvif_pkt_state pkt
;
426 xenvif_rx_next_skb(queue
, &pkt
);
428 queue
->last_rx_time
= jiffies
;
431 struct xen_netif_rx_request
*req
;
432 struct xen_netif_rx_response
*rsp
;
434 req
= RING_GET_REQUEST(&queue
->rx
, queue
->rx
.req_cons
);
435 rsp
= RING_GET_RESPONSE(&queue
->rx
, queue
->rx
.req_cons
);
437 /* Extras must go after the first data slot */
438 if (pkt
.slot
!= 0 && pkt
.extra_count
!= 0)
439 xenvif_rx_extra_slot(queue
, &pkt
, req
, rsp
);
441 xenvif_rx_data_slot(queue
, &pkt
, req
, rsp
);
443 queue
->rx
.req_cons
++;
445 } while (pkt
.remaining_len
> 0 || pkt
.extra_count
!= 0);
447 xenvif_rx_complete(queue
, &pkt
);
450 #define RX_BATCH_SIZE 64
452 void xenvif_rx_action(struct xenvif_queue
*queue
)
454 struct sk_buff_head completed_skbs
;
455 unsigned int work_done
= 0;
457 __skb_queue_head_init(&completed_skbs
);
458 queue
->rx_copy
.completed
= &completed_skbs
;
460 while (xenvif_rx_ring_slots_available(queue
) &&
461 work_done
< RX_BATCH_SIZE
) {
462 xenvif_rx_skb(queue
);
466 /* Flush any pending copies and complete all skbs. */
467 xenvif_rx_copy_flush(queue
);
470 static bool xenvif_rx_queue_stalled(struct xenvif_queue
*queue
)
474 prod
= queue
->rx
.sring
->req_prod
;
475 cons
= queue
->rx
.req_cons
;
477 return !queue
->stalled
&&
480 queue
->last_rx_time
+ queue
->vif
->stall_timeout
);
483 static bool xenvif_rx_queue_ready(struct xenvif_queue
*queue
)
487 prod
= queue
->rx
.sring
->req_prod
;
488 cons
= queue
->rx
.req_cons
;
490 return queue
->stalled
&& prod
- cons
>= 1;
493 static bool xenvif_have_rx_work(struct xenvif_queue
*queue
)
495 return xenvif_rx_ring_slots_available(queue
) ||
496 (queue
->vif
->stall_timeout
&&
497 (xenvif_rx_queue_stalled(queue
) ||
498 xenvif_rx_queue_ready(queue
))) ||
499 kthread_should_stop() ||
500 queue
->vif
->disabled
;
503 static long xenvif_rx_queue_timeout(struct xenvif_queue
*queue
)
508 skb
= skb_peek(&queue
->rx_queue
);
510 return MAX_SCHEDULE_TIMEOUT
;
512 timeout
= XENVIF_RX_CB(skb
)->expires
- jiffies
;
513 return timeout
< 0 ? 0 : timeout
;
516 /* Wait until the guest Rx thread has work.
518 * The timeout needs to be adjusted based on the current head of the
519 * queue (and not just the head at the beginning). In particular, if
520 * the queue is initially empty an infinite timeout is used and this
521 * needs to be reduced when a skb is queued.
523 * This cannot be done with wait_event_timeout() because it only
524 * calculates the timeout once.
526 static void xenvif_wait_for_rx_work(struct xenvif_queue
*queue
)
530 if (xenvif_have_rx_work(queue
))
536 prepare_to_wait(&queue
->wq
, &wait
, TASK_INTERRUPTIBLE
);
537 if (xenvif_have_rx_work(queue
))
539 ret
= schedule_timeout(xenvif_rx_queue_timeout(queue
));
543 finish_wait(&queue
->wq
, &wait
);
546 static void xenvif_queue_carrier_off(struct xenvif_queue
*queue
)
548 struct xenvif
*vif
= queue
->vif
;
550 queue
->stalled
= true;
552 /* At least one queue has stalled? Disable the carrier. */
553 spin_lock(&vif
->lock
);
554 if (vif
->stalled_queues
++ == 0) {
555 netdev_info(vif
->dev
, "Guest Rx stalled");
556 netif_carrier_off(vif
->dev
);
558 spin_unlock(&vif
->lock
);
561 static void xenvif_queue_carrier_on(struct xenvif_queue
*queue
)
563 struct xenvif
*vif
= queue
->vif
;
565 queue
->last_rx_time
= jiffies
; /* Reset Rx stall detection. */
566 queue
->stalled
= false;
568 /* All queues are ready? Enable the carrier. */
569 spin_lock(&vif
->lock
);
570 if (--vif
->stalled_queues
== 0) {
571 netdev_info(vif
->dev
, "Guest Rx ready");
572 netif_carrier_on(vif
->dev
);
574 spin_unlock(&vif
->lock
);
577 int xenvif_kthread_guest_rx(void *data
)
579 struct xenvif_queue
*queue
= data
;
580 struct xenvif
*vif
= queue
->vif
;
582 if (!vif
->stall_timeout
)
583 xenvif_queue_carrier_on(queue
);
586 xenvif_wait_for_rx_work(queue
);
588 if (kthread_should_stop())
591 /* This frontend is found to be rogue, disable it in
592 * kthread context. Currently this is only set when
593 * netback finds out frontend sends malformed packet,
594 * but we cannot disable the interface in softirq
595 * context so we defer it here, if this thread is
596 * associated with queue 0.
598 if (unlikely(vif
->disabled
&& queue
->id
== 0)) {
599 xenvif_carrier_off(vif
);
603 if (!skb_queue_empty(&queue
->rx_queue
))
604 xenvif_rx_action(queue
);
606 /* If the guest hasn't provided any Rx slots for a
607 * while it's probably not responsive, drop the
608 * carrier so packets are dropped earlier.
610 if (vif
->stall_timeout
) {
611 if (xenvif_rx_queue_stalled(queue
))
612 xenvif_queue_carrier_off(queue
);
613 else if (xenvif_rx_queue_ready(queue
))
614 xenvif_queue_carrier_on(queue
);
617 /* Queued packets may have foreign pages from other
618 * domains. These cannot be queued indefinitely as
619 * this would starve guests of grant refs and transmit
622 xenvif_rx_queue_drop_expired(queue
);
627 /* Bin any remaining skbs */
628 xenvif_rx_queue_purge(queue
);