2 * Copyright (c) 2016 Citrix Systems Inc.
3 * Copyright (c) 2002-2005, K A Fraser
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License version 2
7 * as published by the Free Software Foundation; or, when distributed
8 * separately from the Linux kernel or incorporated into other
9 * software packages, subject to the following license:
11 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this source file (the "Software"), to deal in the Software without
13 * restriction, including without limitation the rights to use, copy, modify,
14 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
15 * and to permit persons to whom the Software is furnished to do so, subject to
16 * the following conditions:
18 * The above copyright notice and this permission notice shall be included in
19 * all copies or substantial portions of the Software.
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 #include <linux/kthread.h>
34 #include <xen/events.h>
37 * Update the needed ring page slots for the first SKB queued.
38 * Note that any call sequence outside the RX thread calling this function
39 * needs to wake up the RX thread via a call of xenvif_kick_thread()
40 * afterwards in order to avoid a race with putting the thread to sleep.
42 static void xenvif_update_needed_slots(struct xenvif_queue
*queue
,
43 const struct sk_buff
*skb
)
45 unsigned int needed
= 0;
48 needed
= DIV_ROUND_UP(skb
->len
, XEN_PAGE_SIZE
);
55 WRITE_ONCE(queue
->rx_slots_needed
, needed
);
58 static bool xenvif_rx_ring_slots_available(struct xenvif_queue
*queue
)
63 needed
= READ_ONCE(queue
->rx_slots_needed
);
68 prod
= queue
->rx
.sring
->req_prod
;
69 cons
= queue
->rx
.req_cons
;
71 if (prod
- cons
>= needed
)
74 queue
->rx
.sring
->req_event
= prod
+ 1;
76 /* Make sure event is visible before we check prod
80 } while (queue
->rx
.sring
->req_prod
!= prod
);
85 bool xenvif_rx_queue_tail(struct xenvif_queue
*queue
, struct sk_buff
*skb
)
90 spin_lock_irqsave(&queue
->rx_queue
.lock
, flags
);
92 if (queue
->rx_queue_len
>= queue
->rx_queue_max
) {
93 struct net_device
*dev
= queue
->vif
->dev
;
95 netif_tx_stop_queue(netdev_get_tx_queue(dev
, queue
->id
));
98 if (skb_queue_empty(&queue
->rx_queue
))
99 xenvif_update_needed_slots(queue
, skb
);
101 __skb_queue_tail(&queue
->rx_queue
, skb
);
103 queue
->rx_queue_len
+= skb
->len
;
106 spin_unlock_irqrestore(&queue
->rx_queue
.lock
, flags
);
111 static struct sk_buff
*xenvif_rx_dequeue(struct xenvif_queue
*queue
)
115 spin_lock_irq(&queue
->rx_queue
.lock
);
117 skb
= __skb_dequeue(&queue
->rx_queue
);
119 xenvif_update_needed_slots(queue
, skb_peek(&queue
->rx_queue
));
121 queue
->rx_queue_len
-= skb
->len
;
122 if (queue
->rx_queue_len
< queue
->rx_queue_max
) {
123 struct netdev_queue
*txq
;
125 txq
= netdev_get_tx_queue(queue
->vif
->dev
, queue
->id
);
126 netif_tx_wake_queue(txq
);
130 spin_unlock_irq(&queue
->rx_queue
.lock
);
135 static void xenvif_rx_queue_purge(struct xenvif_queue
*queue
)
139 while ((skb
= xenvif_rx_dequeue(queue
)) != NULL
)
143 static void xenvif_rx_queue_drop_expired(struct xenvif_queue
*queue
)
148 skb
= skb_peek(&queue
->rx_queue
);
151 if (time_before(jiffies
, XENVIF_RX_CB(skb
)->expires
))
153 xenvif_rx_dequeue(queue
);
155 queue
->vif
->dev
->stats
.rx_dropped
++;
159 static void xenvif_rx_copy_flush(struct xenvif_queue
*queue
)
164 gnttab_batch_copy(queue
->rx_copy
.op
, queue
->rx_copy
.num
);
166 for (i
= 0; i
< queue
->rx_copy
.num
; i
++) {
167 struct gnttab_copy
*op
;
169 op
= &queue
->rx_copy
.op
[i
];
171 /* If the copy failed, overwrite the status field in
172 * the corresponding response.
174 if (unlikely(op
->status
!= GNTST_okay
)) {
175 struct xen_netif_rx_response
*rsp
;
177 rsp
= RING_GET_RESPONSE(&queue
->rx
,
178 queue
->rx_copy
.idx
[i
]);
179 rsp
->status
= op
->status
;
183 queue
->rx_copy
.num
= 0;
185 /* Push responses for all completed packets. */
186 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue
->rx
, notify
);
188 notify_remote_via_irq(queue
->rx_irq
);
190 __skb_queue_purge(queue
->rx_copy
.completed
);
193 static void xenvif_rx_copy_add(struct xenvif_queue
*queue
,
194 struct xen_netif_rx_request
*req
,
195 unsigned int offset
, void *data
, size_t len
)
197 struct gnttab_copy
*op
;
199 struct xen_page_foreign
*foreign
;
201 if (queue
->rx_copy
.num
== COPY_BATCH_SIZE
)
202 xenvif_rx_copy_flush(queue
);
204 op
= &queue
->rx_copy
.op
[queue
->rx_copy
.num
];
206 page
= virt_to_page(data
);
208 op
->flags
= GNTCOPY_dest_gref
;
210 foreign
= xen_page_foreign(page
);
212 op
->source
.domid
= foreign
->domid
;
213 op
->source
.u
.ref
= foreign
->gref
;
214 op
->flags
|= GNTCOPY_source_gref
;
216 op
->source
.u
.gmfn
= virt_to_gfn(data
);
217 op
->source
.domid
= DOMID_SELF
;
220 op
->source
.offset
= xen_offset_in_page(data
);
221 op
->dest
.u
.ref
= req
->gref
;
222 op
->dest
.domid
= queue
->vif
->domid
;
223 op
->dest
.offset
= offset
;
226 queue
->rx_copy
.idx
[queue
->rx_copy
.num
] = queue
->rx
.req_cons
;
227 queue
->rx_copy
.num
++;
230 static unsigned int xenvif_gso_type(struct sk_buff
*skb
)
232 if (skb_is_gso(skb
)) {
233 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
)
234 return XEN_NETIF_GSO_TYPE_TCPV4
;
236 return XEN_NETIF_GSO_TYPE_TCPV6
;
238 return XEN_NETIF_GSO_TYPE_NONE
;
241 struct xenvif_pkt_state
{
243 size_t remaining_len
;
244 struct sk_buff
*frag_iter
;
245 int frag
; /* frag == -1 => frag_iter->head */
246 unsigned int frag_offset
;
247 struct xen_netif_extra_info extras
[XEN_NETIF_EXTRA_TYPE_MAX
- 1];
248 unsigned int extra_count
;
252 static void xenvif_rx_next_skb(struct xenvif_queue
*queue
,
253 struct xenvif_pkt_state
*pkt
)
256 unsigned int gso_type
;
258 skb
= xenvif_rx_dequeue(queue
);
260 queue
->stats
.tx_bytes
+= skb
->len
;
261 queue
->stats
.tx_packets
++;
263 /* Reset packet state. */
264 memset(pkt
, 0, sizeof(struct xenvif_pkt_state
));
267 pkt
->frag_iter
= skb
;
268 pkt
->remaining_len
= skb
->len
;
271 gso_type
= xenvif_gso_type(skb
);
272 if ((1 << gso_type
) & queue
->vif
->gso_mask
) {
273 struct xen_netif_extra_info
*extra
;
275 extra
= &pkt
->extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1];
277 extra
->u
.gso
.type
= gso_type
;
278 extra
->u
.gso
.size
= skb_shinfo(skb
)->gso_size
;
279 extra
->u
.gso
.pad
= 0;
280 extra
->u
.gso
.features
= 0;
281 extra
->type
= XEN_NETIF_EXTRA_TYPE_GSO
;
287 if (queue
->vif
->xdp_headroom
) {
288 struct xen_netif_extra_info
*extra
;
290 extra
= &pkt
->extras
[XEN_NETIF_EXTRA_TYPE_XDP
- 1];
292 memset(extra
, 0, sizeof(struct xen_netif_extra_info
));
293 extra
->u
.xdp
.headroom
= queue
->vif
->xdp_headroom
;
294 extra
->type
= XEN_NETIF_EXTRA_TYPE_XDP
;
301 struct xen_netif_extra_info
*extra
;
303 extra
= &pkt
->extras
[XEN_NETIF_EXTRA_TYPE_HASH
- 1];
305 extra
->u
.hash
.algorithm
=
306 XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ
;
310 skb
->protocol
== htons(ETH_P_IP
) ?
311 _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP
:
312 _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP
;
315 skb
->protocol
== htons(ETH_P_IP
) ?
316 _XEN_NETIF_CTRL_HASH_TYPE_IPV4
:
317 _XEN_NETIF_CTRL_HASH_TYPE_IPV6
;
319 *(uint32_t *)extra
->u
.hash
.value
= skb_get_hash_raw(skb
);
321 extra
->type
= XEN_NETIF_EXTRA_TYPE_HASH
;
328 static void xenvif_rx_complete(struct xenvif_queue
*queue
,
329 struct xenvif_pkt_state
*pkt
)
331 /* All responses are ready to be pushed. */
332 queue
->rx
.rsp_prod_pvt
= queue
->rx
.req_cons
;
334 __skb_queue_tail(queue
->rx_copy
.completed
, pkt
->skb
);
337 static void xenvif_rx_next_frag(struct xenvif_pkt_state
*pkt
)
339 struct sk_buff
*frag_iter
= pkt
->frag_iter
;
340 unsigned int nr_frags
= skb_shinfo(frag_iter
)->nr_frags
;
343 pkt
->frag_offset
= 0;
345 if (pkt
->frag
>= nr_frags
) {
346 if (frag_iter
== pkt
->skb
)
347 pkt
->frag_iter
= skb_shinfo(frag_iter
)->frag_list
;
349 pkt
->frag_iter
= frag_iter
->next
;
355 static void xenvif_rx_next_chunk(struct xenvif_queue
*queue
,
356 struct xenvif_pkt_state
*pkt
,
357 unsigned int offset
, void **data
,
360 struct sk_buff
*frag_iter
= pkt
->frag_iter
;
362 size_t frag_len
, chunk_len
;
366 if (pkt
->frag
== -1) {
367 frag_data
= frag_iter
->data
;
368 frag_len
= skb_headlen(frag_iter
);
370 skb_frag_t
*frag
= &skb_shinfo(frag_iter
)->frags
[pkt
->frag
];
372 frag_data
= skb_frag_address(frag
);
373 frag_len
= skb_frag_size(frag
);
376 frag_data
+= pkt
->frag_offset
;
377 frag_len
-= pkt
->frag_offset
;
379 chunk_len
= min_t(size_t, frag_len
, XEN_PAGE_SIZE
- offset
);
380 chunk_len
= min_t(size_t, chunk_len
, XEN_PAGE_SIZE
-
381 xen_offset_in_page(frag_data
));
383 pkt
->frag_offset
+= chunk_len
;
385 /* Advance to next frag? */
386 if (frag_len
== chunk_len
)
387 xenvif_rx_next_frag(pkt
);
393 static void xenvif_rx_data_slot(struct xenvif_queue
*queue
,
394 struct xenvif_pkt_state
*pkt
,
395 struct xen_netif_rx_request
*req
,
396 struct xen_netif_rx_response
*rsp
)
398 unsigned int offset
= queue
->vif
->xdp_headroom
;
405 xenvif_rx_next_chunk(queue
, pkt
, offset
, &data
, &len
);
406 xenvif_rx_copy_add(queue
, req
, offset
, data
, len
);
409 pkt
->remaining_len
-= len
;
411 } while (offset
< XEN_PAGE_SIZE
&& pkt
->remaining_len
> 0);
413 if (pkt
->remaining_len
> 0)
414 flags
= XEN_NETRXF_more_data
;
418 if (pkt
->slot
== 0) {
419 struct sk_buff
*skb
= pkt
->skb
;
421 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
422 flags
|= XEN_NETRXF_csum_blank
|
423 XEN_NETRXF_data_validated
;
424 else if (skb
->ip_summed
== CHECKSUM_UNNECESSARY
)
425 flags
|= XEN_NETRXF_data_validated
;
427 if (pkt
->extra_count
!= 0)
428 flags
|= XEN_NETRXF_extra_info
;
434 rsp
->status
= (s16
)offset
;
437 static void xenvif_rx_extra_slot(struct xenvif_queue
*queue
,
438 struct xenvif_pkt_state
*pkt
,
439 struct xen_netif_rx_request
*req
,
440 struct xen_netif_rx_response
*rsp
)
442 struct xen_netif_extra_info
*extra
= (void *)rsp
;
447 for (i
= 0; i
< ARRAY_SIZE(pkt
->extras
); i
++) {
448 if (pkt
->extras
[i
].type
) {
449 *extra
= pkt
->extras
[i
];
451 if (pkt
->extra_count
!= 0)
452 extra
->flags
|= XEN_NETIF_EXTRA_FLAG_MORE
;
454 pkt
->extras
[i
].type
= 0;
461 static void xenvif_rx_skb(struct xenvif_queue
*queue
)
463 struct xenvif_pkt_state pkt
;
465 xenvif_rx_next_skb(queue
, &pkt
);
467 queue
->last_rx_time
= jiffies
;
470 struct xen_netif_rx_request
*req
;
471 struct xen_netif_rx_response
*rsp
;
473 req
= RING_GET_REQUEST(&queue
->rx
, queue
->rx
.req_cons
);
474 rsp
= RING_GET_RESPONSE(&queue
->rx
, queue
->rx
.req_cons
);
476 /* Extras must go after the first data slot */
477 if (pkt
.slot
!= 0 && pkt
.extra_count
!= 0)
478 xenvif_rx_extra_slot(queue
, &pkt
, req
, rsp
);
480 xenvif_rx_data_slot(queue
, &pkt
, req
, rsp
);
482 queue
->rx
.req_cons
++;
484 } while (pkt
.remaining_len
> 0 || pkt
.extra_count
!= 0);
486 xenvif_rx_complete(queue
, &pkt
);
489 #define RX_BATCH_SIZE 64
491 static void xenvif_rx_action(struct xenvif_queue
*queue
)
493 struct sk_buff_head completed_skbs
;
494 unsigned int work_done
= 0;
496 __skb_queue_head_init(&completed_skbs
);
497 queue
->rx_copy
.completed
= &completed_skbs
;
499 while (xenvif_rx_ring_slots_available(queue
) &&
500 !skb_queue_empty(&queue
->rx_queue
) &&
501 work_done
< RX_BATCH_SIZE
) {
502 xenvif_rx_skb(queue
);
506 /* Flush any pending copies and complete all skbs. */
507 xenvif_rx_copy_flush(queue
);
510 static RING_IDX
xenvif_rx_queue_slots(const struct xenvif_queue
*queue
)
514 prod
= queue
->rx
.sring
->req_prod
;
515 cons
= queue
->rx
.req_cons
;
520 static bool xenvif_rx_queue_stalled(const struct xenvif_queue
*queue
)
522 unsigned int needed
= READ_ONCE(queue
->rx_slots_needed
);
524 return !queue
->stalled
&&
525 xenvif_rx_queue_slots(queue
) < needed
&&
527 queue
->last_rx_time
+ queue
->vif
->stall_timeout
);
530 static bool xenvif_rx_queue_ready(struct xenvif_queue
*queue
)
532 unsigned int needed
= READ_ONCE(queue
->rx_slots_needed
);
534 return queue
->stalled
&& xenvif_rx_queue_slots(queue
) >= needed
;
537 bool xenvif_have_rx_work(struct xenvif_queue
*queue
, bool test_kthread
)
539 return xenvif_rx_ring_slots_available(queue
) ||
540 (queue
->vif
->stall_timeout
&&
541 (xenvif_rx_queue_stalled(queue
) ||
542 xenvif_rx_queue_ready(queue
))) ||
543 (test_kthread
&& kthread_should_stop()) ||
544 queue
->vif
->disabled
;
547 static long xenvif_rx_queue_timeout(struct xenvif_queue
*queue
)
552 skb
= skb_peek(&queue
->rx_queue
);
554 return MAX_SCHEDULE_TIMEOUT
;
556 timeout
= XENVIF_RX_CB(skb
)->expires
- jiffies
;
557 return timeout
< 0 ? 0 : timeout
;
560 /* Wait until the guest Rx thread has work.
562 * The timeout needs to be adjusted based on the current head of the
563 * queue (and not just the head at the beginning). In particular, if
564 * the queue is initially empty an infinite timeout is used and this
565 * needs to be reduced when a skb is queued.
567 * This cannot be done with wait_event_timeout() because it only
568 * calculates the timeout once.
570 static void xenvif_wait_for_rx_work(struct xenvif_queue
*queue
)
574 if (xenvif_have_rx_work(queue
, true))
580 prepare_to_wait(&queue
->wq
, &wait
, TASK_INTERRUPTIBLE
);
581 if (xenvif_have_rx_work(queue
, true))
583 if (atomic_fetch_andnot(NETBK_RX_EOI
| NETBK_COMMON_EOI
,
584 &queue
->eoi_pending
) &
585 (NETBK_RX_EOI
| NETBK_COMMON_EOI
))
586 xen_irq_lateeoi(queue
->rx_irq
, 0);
588 ret
= schedule_timeout(xenvif_rx_queue_timeout(queue
));
592 finish_wait(&queue
->wq
, &wait
);
595 static void xenvif_queue_carrier_off(struct xenvif_queue
*queue
)
597 struct xenvif
*vif
= queue
->vif
;
599 queue
->stalled
= true;
601 /* At least one queue has stalled? Disable the carrier. */
602 spin_lock(&vif
->lock
);
603 if (vif
->stalled_queues
++ == 0) {
604 netdev_info(vif
->dev
, "Guest Rx stalled");
605 netif_carrier_off(vif
->dev
);
607 spin_unlock(&vif
->lock
);
610 static void xenvif_queue_carrier_on(struct xenvif_queue
*queue
)
612 struct xenvif
*vif
= queue
->vif
;
614 queue
->last_rx_time
= jiffies
; /* Reset Rx stall detection. */
615 queue
->stalled
= false;
617 /* All queues are ready? Enable the carrier. */
618 spin_lock(&vif
->lock
);
619 if (--vif
->stalled_queues
== 0) {
620 netdev_info(vif
->dev
, "Guest Rx ready");
621 netif_carrier_on(vif
->dev
);
623 spin_unlock(&vif
->lock
);
626 int xenvif_kthread_guest_rx(void *data
)
628 struct xenvif_queue
*queue
= data
;
629 struct xenvif
*vif
= queue
->vif
;
631 if (!vif
->stall_timeout
)
632 xenvif_queue_carrier_on(queue
);
635 xenvif_wait_for_rx_work(queue
);
637 if (kthread_should_stop())
640 /* This frontend is found to be rogue, disable it in
641 * kthread context. Currently this is only set when
642 * netback finds out frontend sends malformed packet,
643 * but we cannot disable the interface in softirq
644 * context so we defer it here, if this thread is
645 * associated with queue 0.
647 if (unlikely(vif
->disabled
&& queue
->id
== 0)) {
648 xenvif_carrier_off(vif
);
652 if (!skb_queue_empty(&queue
->rx_queue
))
653 xenvif_rx_action(queue
);
655 /* If the guest hasn't provided any Rx slots for a
656 * while it's probably not responsive, drop the
657 * carrier so packets are dropped earlier.
659 if (vif
->stall_timeout
) {
660 if (xenvif_rx_queue_stalled(queue
))
661 xenvif_queue_carrier_off(queue
);
662 else if (xenvif_rx_queue_ready(queue
))
663 xenvif_queue_carrier_on(queue
);
666 /* Queued packets may have foreign pages from other
667 * domains. These cannot be queued indefinitely as
668 * this would starve guests of grant refs and transmit
671 xenvif_rx_queue_drop_expired(queue
);
676 /* Bin any remaining skbs */
677 xenvif_rx_queue_purge(queue
);