1 /****************************************************************************
2 * Driver for Solarflare Solarstorm network controllers and boards
3 * Copyright 2005-2006 Fen Systems Ltd.
4 * Copyright 2005-2008 Solarflare Communications Inc.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
11 #include <linux/pci.h>
12 #include <linux/tcp.h>
15 #include <linux/if_ether.h>
16 #include <linux/highmem.h>
17 #include "net_driver.h"
21 #include "workarounds.h"
24 * TX descriptor ring full threshold
26 * The tx_queue descriptor ring fill-level must fall below this value
27 * before we restart the netif queue
29 #define EFX_NETDEV_TX_THRESHOLD(_tx_queue) \
30 (_tx_queue->efx->type->txd_ring_mask / 2u)
32 /* We want to be able to nest calls to netif_stop_queue(), since each
33 * channel can have an individual stop on the queue.
35 void efx_stop_queue(struct efx_nic
*efx
)
37 spin_lock_bh(&efx
->netif_stop_lock
);
38 EFX_TRACE(efx
, "stop TX queue\n");
40 atomic_inc(&efx
->netif_stop_count
);
41 netif_stop_queue(efx
->net_dev
);
43 spin_unlock_bh(&efx
->netif_stop_lock
);
46 /* Wake netif's TX queue
47 * We want to be able to nest calls to netif_stop_queue(), since each
48 * channel can have an individual stop on the queue.
50 inline void efx_wake_queue(struct efx_nic
*efx
)
53 if (atomic_dec_and_lock(&efx
->netif_stop_count
,
54 &efx
->netif_stop_lock
)) {
55 EFX_TRACE(efx
, "waking TX queue\n");
56 netif_wake_queue(efx
->net_dev
);
57 spin_unlock(&efx
->netif_stop_lock
);
62 static inline void efx_dequeue_buffer(struct efx_tx_queue
*tx_queue
,
63 struct efx_tx_buffer
*buffer
)
65 if (buffer
->unmap_len
) {
66 struct pci_dev
*pci_dev
= tx_queue
->efx
->pci_dev
;
67 if (buffer
->unmap_single
)
68 pci_unmap_single(pci_dev
, buffer
->unmap_addr
,
69 buffer
->unmap_len
, PCI_DMA_TODEVICE
);
71 pci_unmap_page(pci_dev
, buffer
->unmap_addr
,
72 buffer
->unmap_len
, PCI_DMA_TODEVICE
);
73 buffer
->unmap_len
= 0;
74 buffer
->unmap_single
= 0;
78 dev_kfree_skb_any((struct sk_buff
*) buffer
->skb
);
80 EFX_TRACE(tx_queue
->efx
, "TX queue %d transmission id %x "
81 "complete\n", tx_queue
->queue
, read_ptr
);
86 * struct efx_tso_header - a DMA mapped buffer for packet headers
87 * @next: Linked list of free ones.
88 * The list is protected by the TX queue lock.
89 * @dma_unmap_len: Length to unmap for an oversize buffer, or 0.
90 * @dma_addr: The DMA address of the header below.
92 * This controls the memory used for a TSO header. Use TSOH_DATA()
93 * to find the packet header data. Use TSOH_SIZE() to calculate the
94 * total size required for a given packet header length. TSO headers
95 * in the free list are exactly %TSOH_STD_SIZE bytes in size.
97 struct efx_tso_header
{
99 struct efx_tso_header
*next
;
105 static int efx_enqueue_skb_tso(struct efx_tx_queue
*tx_queue
,
106 const struct sk_buff
*skb
);
107 static void efx_fini_tso(struct efx_tx_queue
*tx_queue
);
108 static void efx_tsoh_heap_free(struct efx_tx_queue
*tx_queue
,
109 struct efx_tso_header
*tsoh
);
111 static inline void efx_tsoh_free(struct efx_tx_queue
*tx_queue
,
112 struct efx_tx_buffer
*buffer
)
115 if (likely(!buffer
->tsoh
->unmap_len
)) {
116 buffer
->tsoh
->next
= tx_queue
->tso_headers_free
;
117 tx_queue
->tso_headers_free
= buffer
->tsoh
;
119 efx_tsoh_heap_free(tx_queue
, buffer
->tsoh
);
127 * Add a socket buffer to a TX queue
129 * This maps all fragments of a socket buffer for DMA and adds them to
130 * the TX queue. The queue's insert pointer will be incremented by
131 * the number of fragments in the socket buffer.
133 * If any DMA mapping fails, any mapped fragments will be unmapped,
134 * the queue's insert pointer will be restored to its original value.
136 * Returns NETDEV_TX_OK or NETDEV_TX_BUSY
137 * You must hold netif_tx_lock() to call this function.
139 static inline int efx_enqueue_skb(struct efx_tx_queue
*tx_queue
,
140 const struct sk_buff
*skb
)
142 struct efx_nic
*efx
= tx_queue
->efx
;
143 struct pci_dev
*pci_dev
= efx
->pci_dev
;
144 struct efx_tx_buffer
*buffer
;
145 skb_frag_t
*fragment
;
148 unsigned int len
, unmap_len
= 0, fill_level
, insert_ptr
, misalign
;
149 dma_addr_t dma_addr
, unmap_addr
= 0;
150 unsigned int dma_len
;
151 unsigned unmap_single
;
153 int rc
= NETDEV_TX_OK
;
155 EFX_BUG_ON_PARANOID(tx_queue
->write_count
!= tx_queue
->insert_count
);
157 if (skb_shinfo((struct sk_buff
*)skb
)->gso_size
)
158 return efx_enqueue_skb_tso(tx_queue
, skb
);
160 /* Get size of the initial fragment */
161 len
= skb_headlen(skb
);
163 fill_level
= tx_queue
->insert_count
- tx_queue
->old_read_count
;
164 q_space
= efx
->type
->txd_ring_mask
- 1 - fill_level
;
166 /* Map for DMA. Use pci_map_single rather than pci_map_page
167 * since this is more efficient on machines with sparse
171 dma_addr
= pci_map_single(pci_dev
, skb
->data
, len
, PCI_DMA_TODEVICE
);
173 /* Process all fragments */
175 if (unlikely(pci_dma_mapping_error(dma_addr
)))
178 /* Store fields for marking in the per-fragment final
181 unmap_addr
= dma_addr
;
183 /* Add to TX queue, splitting across DMA boundaries */
185 if (unlikely(q_space
-- <= 0)) {
186 /* It might be that completions have
187 * happened since the xmit path last
188 * checked. Update the xmit path's
189 * copy of read_count.
192 /* This memory barrier protects the
193 * change of stopped from the access
196 tx_queue
->old_read_count
=
197 *(volatile unsigned *)
198 &tx_queue
->read_count
;
199 fill_level
= (tx_queue
->insert_count
200 - tx_queue
->old_read_count
);
201 q_space
= (efx
->type
->txd_ring_mask
- 1 -
203 if (unlikely(q_space
-- <= 0))
209 insert_ptr
= (tx_queue
->insert_count
&
210 efx
->type
->txd_ring_mask
);
211 buffer
= &tx_queue
->buffer
[insert_ptr
];
212 efx_tsoh_free(tx_queue
, buffer
);
213 EFX_BUG_ON_PARANOID(buffer
->tsoh
);
214 EFX_BUG_ON_PARANOID(buffer
->skb
);
215 EFX_BUG_ON_PARANOID(buffer
->len
);
216 EFX_BUG_ON_PARANOID(buffer
->continuation
!= 1);
217 EFX_BUG_ON_PARANOID(buffer
->unmap_len
);
219 dma_len
= (((~dma_addr
) & efx
->type
->tx_dma_mask
) + 1);
220 if (likely(dma_len
> len
))
223 misalign
= (unsigned)dma_addr
& efx
->type
->bug5391_mask
;
224 if (misalign
&& dma_len
+ misalign
> 512)
225 dma_len
= 512 - misalign
;
227 /* Fill out per descriptor fields */
228 buffer
->len
= dma_len
;
229 buffer
->dma_addr
= dma_addr
;
232 ++tx_queue
->insert_count
;
235 /* Transfer ownership of the unmapping to the final buffer */
236 buffer
->unmap_addr
= unmap_addr
;
237 buffer
->unmap_single
= unmap_single
;
238 buffer
->unmap_len
= unmap_len
;
241 /* Get address and size of next fragment */
242 if (i
>= skb_shinfo(skb
)->nr_frags
)
244 fragment
= &skb_shinfo(skb
)->frags
[i
];
245 len
= fragment
->size
;
246 page
= fragment
->page
;
247 page_offset
= fragment
->page_offset
;
251 dma_addr
= pci_map_page(pci_dev
, page
, page_offset
, len
,
255 /* Transfer ownership of the skb to the final buffer */
257 buffer
->continuation
= 0;
259 /* Pass off to hardware */
260 falcon_push_buffers(tx_queue
);
265 EFX_ERR_RL(efx
, " TX queue %d could not map skb with %d bytes %d "
266 "fragments for DMA\n", tx_queue
->queue
, skb
->len
,
267 skb_shinfo(skb
)->nr_frags
+ 1);
269 /* Mark the packet as transmitted, and free the SKB ourselves */
270 dev_kfree_skb_any((struct sk_buff
*)skb
);
276 if (tx_queue
->stopped
== 1)
280 /* Work backwards until we hit the original insert pointer value */
281 while (tx_queue
->insert_count
!= tx_queue
->write_count
) {
282 --tx_queue
->insert_count
;
283 insert_ptr
= tx_queue
->insert_count
& efx
->type
->txd_ring_mask
;
284 buffer
= &tx_queue
->buffer
[insert_ptr
];
285 efx_dequeue_buffer(tx_queue
, buffer
);
289 /* Free the fragment we were mid-way through pushing */
291 pci_unmap_page(pci_dev
, unmap_addr
, unmap_len
,
297 /* Remove packets from the TX queue
299 * This removes packets from the TX queue, up to and including the
302 static inline void efx_dequeue_buffers(struct efx_tx_queue
*tx_queue
,
305 struct efx_nic
*efx
= tx_queue
->efx
;
306 unsigned int stop_index
, read_ptr
;
307 unsigned int mask
= tx_queue
->efx
->type
->txd_ring_mask
;
309 stop_index
= (index
+ 1) & mask
;
310 read_ptr
= tx_queue
->read_count
& mask
;
312 while (read_ptr
!= stop_index
) {
313 struct efx_tx_buffer
*buffer
= &tx_queue
->buffer
[read_ptr
];
314 if (unlikely(buffer
->len
== 0)) {
315 EFX_ERR(tx_queue
->efx
, "TX queue %d spurious TX "
316 "completion id %x\n", tx_queue
->queue
,
318 efx_schedule_reset(efx
, RESET_TYPE_TX_SKIP
);
322 efx_dequeue_buffer(tx_queue
, buffer
);
323 buffer
->continuation
= 1;
326 ++tx_queue
->read_count
;
327 read_ptr
= tx_queue
->read_count
& mask
;
331 /* Initiate a packet transmission on the specified TX queue.
332 * Note that returning anything other than NETDEV_TX_OK will cause the
333 * OS to free the skb.
335 * This function is split out from efx_hard_start_xmit to allow the
336 * loopback test to direct packets via specific TX queues. It is
337 * therefore a non-static inline, so as not to penalise performance
338 * for non-loopback transmissions.
340 * Context: netif_tx_lock held
342 inline int efx_xmit(struct efx_nic
*efx
,
343 struct efx_tx_queue
*tx_queue
, struct sk_buff
*skb
)
347 /* Map fragments for DMA and add to TX queue */
348 rc
= efx_enqueue_skb(tx_queue
, skb
);
349 if (unlikely(rc
!= NETDEV_TX_OK
))
352 /* Update last TX timer */
353 efx
->net_dev
->trans_start
= jiffies
;
359 /* Initiate a packet transmission. We use one channel per CPU
360 * (sharing when we have more CPUs than channels). On Falcon, the TX
361 * completion events will be directed back to the CPU that transmitted
362 * the packet, which should be cache-efficient.
364 * Context: non-blocking.
365 * Note that returning anything other than NETDEV_TX_OK will cause the
366 * OS to free the skb.
368 int efx_hard_start_xmit(struct sk_buff
*skb
, struct net_device
*net_dev
)
370 struct efx_nic
*efx
= net_dev
->priv
;
371 return efx_xmit(efx
, &efx
->tx_queue
[0], skb
);
374 void efx_xmit_done(struct efx_tx_queue
*tx_queue
, unsigned int index
)
377 struct efx_nic
*efx
= tx_queue
->efx
;
379 EFX_BUG_ON_PARANOID(index
> efx
->type
->txd_ring_mask
);
381 efx_dequeue_buffers(tx_queue
, index
);
383 /* See if we need to restart the netif queue. This barrier
384 * separates the update of read_count from the test of
387 if (unlikely(tx_queue
->stopped
)) {
388 fill_level
= tx_queue
->insert_count
- tx_queue
->read_count
;
389 if (fill_level
< EFX_NETDEV_TX_THRESHOLD(tx_queue
)) {
390 EFX_BUG_ON_PARANOID(!efx_dev_registered(efx
));
392 /* Do this under netif_tx_lock(), to avoid racing
393 * with efx_xmit(). */
394 netif_tx_lock(efx
->net_dev
);
395 if (tx_queue
->stopped
) {
396 tx_queue
->stopped
= 0;
399 netif_tx_unlock(efx
->net_dev
);
404 int efx_probe_tx_queue(struct efx_tx_queue
*tx_queue
)
406 struct efx_nic
*efx
= tx_queue
->efx
;
407 unsigned int txq_size
;
410 EFX_LOG(efx
, "creating TX queue %d\n", tx_queue
->queue
);
412 /* Allocate software ring */
413 txq_size
= (efx
->type
->txd_ring_mask
+ 1) * sizeof(*tx_queue
->buffer
);
414 tx_queue
->buffer
= kzalloc(txq_size
, GFP_KERNEL
);
415 if (!tx_queue
->buffer
) {
419 for (i
= 0; i
<= efx
->type
->txd_ring_mask
; ++i
)
420 tx_queue
->buffer
[i
].continuation
= 1;
422 /* Allocate hardware ring */
423 rc
= falcon_probe_tx(tx_queue
);
430 kfree(tx_queue
->buffer
);
431 tx_queue
->buffer
= NULL
;
438 int efx_init_tx_queue(struct efx_tx_queue
*tx_queue
)
440 EFX_LOG(tx_queue
->efx
, "initialising TX queue %d\n", tx_queue
->queue
);
442 tx_queue
->insert_count
= 0;
443 tx_queue
->write_count
= 0;
444 tx_queue
->read_count
= 0;
445 tx_queue
->old_read_count
= 0;
446 BUG_ON(tx_queue
->stopped
);
448 /* Set up TX descriptor ring */
449 return falcon_init_tx(tx_queue
);
452 void efx_release_tx_buffers(struct efx_tx_queue
*tx_queue
)
454 struct efx_tx_buffer
*buffer
;
456 if (!tx_queue
->buffer
)
459 /* Free any buffers left in the ring */
460 while (tx_queue
->read_count
!= tx_queue
->write_count
) {
461 buffer
= &tx_queue
->buffer
[tx_queue
->read_count
&
462 tx_queue
->efx
->type
->txd_ring_mask
];
463 efx_dequeue_buffer(tx_queue
, buffer
);
464 buffer
->continuation
= 1;
467 ++tx_queue
->read_count
;
471 void efx_fini_tx_queue(struct efx_tx_queue
*tx_queue
)
473 EFX_LOG(tx_queue
->efx
, "shutting down TX queue %d\n", tx_queue
->queue
);
475 /* Flush TX queue, remove descriptor ring */
476 falcon_fini_tx(tx_queue
);
478 efx_release_tx_buffers(tx_queue
);
480 /* Free up TSO header cache */
481 efx_fini_tso(tx_queue
);
483 /* Release queue's stop on port, if any */
484 if (tx_queue
->stopped
) {
485 tx_queue
->stopped
= 0;
486 efx_wake_queue(tx_queue
->efx
);
490 void efx_remove_tx_queue(struct efx_tx_queue
*tx_queue
)
492 EFX_LOG(tx_queue
->efx
, "destroying TX queue %d\n", tx_queue
->queue
);
493 falcon_remove_tx(tx_queue
);
495 kfree(tx_queue
->buffer
);
496 tx_queue
->buffer
= NULL
;
501 /* Efx TCP segmentation acceleration.
503 * Why? Because by doing it here in the driver we can go significantly
504 * faster than the GSO.
506 * Requires TX checksum offload support.
509 /* Number of bytes inserted at the start of a TSO header buffer,
510 * similar to NET_IP_ALIGN.
512 #if defined(__i386__) || defined(__x86_64__)
513 #define TSOH_OFFSET 0
515 #define TSOH_OFFSET NET_IP_ALIGN
518 #define TSOH_BUFFER(tsoh) ((u8 *)(tsoh + 1) + TSOH_OFFSET)
520 /* Total size of struct efx_tso_header, buffer and padding */
521 #define TSOH_SIZE(hdr_len) \
522 (sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len)
524 /* Size of blocks on free list. Larger blocks must be allocated from
527 #define TSOH_STD_SIZE 128
529 #define PTR_DIFF(p1, p2) ((u8 *)(p1) - (u8 *)(p2))
530 #define ETH_HDR_LEN(skb) (skb_network_header(skb) - (skb)->data)
531 #define SKB_TCP_OFF(skb) PTR_DIFF(tcp_hdr(skb), (skb)->data)
532 #define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data)
535 * struct tso_state - TSO state for an SKB
536 * @remaining_len: Bytes of data we've yet to segment
537 * @seqnum: Current sequence number
538 * @packet_space: Remaining space in current packet
539 * @ifc: Input fragment cursor.
540 * Where we are in the current fragment of the incoming SKB. These
541 * values get updated in place when we split a fragment over
544 * These values are set once at the start of the TSO send and do
545 * not get changed as the routine progresses.
547 * The state used during segmentation. It is put into this data structure
548 * just to make it easy to pass into inline functions.
551 unsigned remaining_len
;
553 unsigned packet_space
;
556 /* DMA address of current position */
558 /* Remaining length */
560 /* DMA address and length of the whole fragment */
561 unsigned int unmap_len
;
562 dma_addr_t unmap_addr
;
568 /* The number of bytes of header */
569 unsigned int header_length
;
571 /* The number of bytes to put in each outgoing segment. */
572 int full_packet_size
;
574 /* Current IPv4 ID, host endian. */
581 * Verify that our various assumptions about sk_buffs and the conditions
582 * under which TSO will be attempted hold true.
584 static inline void efx_tso_check_safe(const struct sk_buff
*skb
)
586 EFX_BUG_ON_PARANOID(skb
->protocol
!= htons(ETH_P_IP
));
587 EFX_BUG_ON_PARANOID(((struct ethhdr
*)skb
->data
)->h_proto
!=
589 EFX_BUG_ON_PARANOID(ip_hdr(skb
)->protocol
!= IPPROTO_TCP
);
590 EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb
), skb
->data
)
591 + (tcp_hdr(skb
)->doff
<< 2u)) >
597 * Allocate a page worth of efx_tso_header structures, and string them
598 * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM.
600 static int efx_tsoh_block_alloc(struct efx_tx_queue
*tx_queue
)
603 struct pci_dev
*pci_dev
= tx_queue
->efx
->pci_dev
;
604 struct efx_tso_header
*tsoh
;
608 base_kva
= pci_alloc_consistent(pci_dev
, PAGE_SIZE
, &dma_addr
);
609 if (base_kva
== NULL
) {
610 EFX_ERR(tx_queue
->efx
, "Unable to allocate page for TSO"
615 /* pci_alloc_consistent() allocates pages. */
616 EFX_BUG_ON_PARANOID(dma_addr
& (PAGE_SIZE
- 1u));
618 for (kva
= base_kva
; kva
< base_kva
+ PAGE_SIZE
; kva
+= TSOH_STD_SIZE
) {
619 tsoh
= (struct efx_tso_header
*)kva
;
620 tsoh
->dma_addr
= dma_addr
+ (TSOH_BUFFER(tsoh
) - base_kva
);
621 tsoh
->next
= tx_queue
->tso_headers_free
;
622 tx_queue
->tso_headers_free
= tsoh
;
629 /* Free up a TSO header, and all others in the same page. */
630 static void efx_tsoh_block_free(struct efx_tx_queue
*tx_queue
,
631 struct efx_tso_header
*tsoh
,
632 struct pci_dev
*pci_dev
)
634 struct efx_tso_header
**p
;
635 unsigned long base_kva
;
638 base_kva
= (unsigned long)tsoh
& PAGE_MASK
;
639 base_dma
= tsoh
->dma_addr
& PAGE_MASK
;
641 p
= &tx_queue
->tso_headers_free
;
643 if (((unsigned long)*p
& PAGE_MASK
) == base_kva
)
649 pci_free_consistent(pci_dev
, PAGE_SIZE
, (void *)base_kva
, base_dma
);
652 static struct efx_tso_header
*
653 efx_tsoh_heap_alloc(struct efx_tx_queue
*tx_queue
, size_t header_len
)
655 struct efx_tso_header
*tsoh
;
657 tsoh
= kmalloc(TSOH_SIZE(header_len
), GFP_ATOMIC
| GFP_DMA
);
661 tsoh
->dma_addr
= pci_map_single(tx_queue
->efx
->pci_dev
,
662 TSOH_BUFFER(tsoh
), header_len
,
664 if (unlikely(pci_dma_mapping_error(tsoh
->dma_addr
))) {
669 tsoh
->unmap_len
= header_len
;
674 efx_tsoh_heap_free(struct efx_tx_queue
*tx_queue
, struct efx_tso_header
*tsoh
)
676 pci_unmap_single(tx_queue
->efx
->pci_dev
,
677 tsoh
->dma_addr
, tsoh
->unmap_len
,
683 * efx_tx_queue_insert - push descriptors onto the TX queue
684 * @tx_queue: Efx TX queue
685 * @dma_addr: DMA address of fragment
686 * @len: Length of fragment
687 * @skb: Only non-null for end of last segment
688 * @end_of_packet: True if last fragment in a packet
689 * @unmap_addr: DMA address of fragment for unmapping
690 * @unmap_len: Only set this in last segment of a fragment
692 * Push descriptors onto the TX queue. Return 0 on success or 1 if
695 static int efx_tx_queue_insert(struct efx_tx_queue
*tx_queue
,
696 dma_addr_t dma_addr
, unsigned len
,
697 const struct sk_buff
*skb
, int end_of_packet
,
698 dma_addr_t unmap_addr
, unsigned unmap_len
)
700 struct efx_tx_buffer
*buffer
;
701 struct efx_nic
*efx
= tx_queue
->efx
;
702 unsigned dma_len
, fill_level
, insert_ptr
, misalign
;
705 EFX_BUG_ON_PARANOID(len
<= 0);
707 fill_level
= tx_queue
->insert_count
- tx_queue
->old_read_count
;
708 /* -1 as there is no way to represent all descriptors used */
709 q_space
= efx
->type
->txd_ring_mask
- 1 - fill_level
;
712 if (unlikely(q_space
-- <= 0)) {
713 /* It might be that completions have happened
714 * since the xmit path last checked. Update
715 * the xmit path's copy of read_count.
718 /* This memory barrier protects the change of
719 * stopped from the access of read_count. */
721 tx_queue
->old_read_count
=
722 *(volatile unsigned *)&tx_queue
->read_count
;
723 fill_level
= (tx_queue
->insert_count
724 - tx_queue
->old_read_count
);
725 q_space
= efx
->type
->txd_ring_mask
- 1 - fill_level
;
726 if (unlikely(q_space
-- <= 0))
732 insert_ptr
= tx_queue
->insert_count
& efx
->type
->txd_ring_mask
;
733 buffer
= &tx_queue
->buffer
[insert_ptr
];
734 ++tx_queue
->insert_count
;
736 EFX_BUG_ON_PARANOID(tx_queue
->insert_count
-
737 tx_queue
->read_count
>
738 efx
->type
->txd_ring_mask
);
740 efx_tsoh_free(tx_queue
, buffer
);
741 EFX_BUG_ON_PARANOID(buffer
->len
);
742 EFX_BUG_ON_PARANOID(buffer
->unmap_len
);
743 EFX_BUG_ON_PARANOID(buffer
->skb
);
744 EFX_BUG_ON_PARANOID(buffer
->continuation
!= 1);
745 EFX_BUG_ON_PARANOID(buffer
->tsoh
);
747 buffer
->dma_addr
= dma_addr
;
749 /* Ensure we do not cross a boundary unsupported by H/W */
750 dma_len
= (~dma_addr
& efx
->type
->tx_dma_mask
) + 1;
752 misalign
= (unsigned)dma_addr
& efx
->type
->bug5391_mask
;
753 if (misalign
&& dma_len
+ misalign
> 512)
754 dma_len
= 512 - misalign
;
756 /* If there is enough space to send then do so */
760 buffer
->len
= dma_len
; /* Don't set the other members */
765 EFX_BUG_ON_PARANOID(!len
);
768 buffer
->continuation
= !end_of_packet
;
769 buffer
->unmap_addr
= unmap_addr
;
770 buffer
->unmap_len
= unmap_len
;
776 * Put a TSO header into the TX queue.
778 * This is special-cased because we know that it is small enough to fit in
779 * a single fragment, and we know it doesn't cross a page boundary. It
780 * also allows us to not worry about end-of-packet etc.
782 static inline void efx_tso_put_header(struct efx_tx_queue
*tx_queue
,
783 struct efx_tso_header
*tsoh
, unsigned len
)
785 struct efx_tx_buffer
*buffer
;
787 buffer
= &tx_queue
->buffer
[tx_queue
->insert_count
&
788 tx_queue
->efx
->type
->txd_ring_mask
];
789 efx_tsoh_free(tx_queue
, buffer
);
790 EFX_BUG_ON_PARANOID(buffer
->len
);
791 EFX_BUG_ON_PARANOID(buffer
->unmap_len
);
792 EFX_BUG_ON_PARANOID(buffer
->skb
);
793 EFX_BUG_ON_PARANOID(buffer
->continuation
!= 1);
794 EFX_BUG_ON_PARANOID(buffer
->tsoh
);
796 buffer
->dma_addr
= tsoh
->dma_addr
;
799 ++tx_queue
->insert_count
;
803 /* Remove descriptors put into a tx_queue. */
804 static void efx_enqueue_unwind(struct efx_tx_queue
*tx_queue
)
806 struct efx_tx_buffer
*buffer
;
808 /* Work backwards until we hit the original insert pointer value */
809 while (tx_queue
->insert_count
!= tx_queue
->write_count
) {
810 --tx_queue
->insert_count
;
811 buffer
= &tx_queue
->buffer
[tx_queue
->insert_count
&
812 tx_queue
->efx
->type
->txd_ring_mask
];
813 efx_tsoh_free(tx_queue
, buffer
);
814 EFX_BUG_ON_PARANOID(buffer
->skb
);
816 buffer
->continuation
= 1;
817 if (buffer
->unmap_len
) {
818 pci_unmap_page(tx_queue
->efx
->pci_dev
,
820 buffer
->unmap_len
, PCI_DMA_TODEVICE
);
821 buffer
->unmap_len
= 0;
827 /* Parse the SKB header and initialise state. */
828 static inline void tso_start(struct tso_state
*st
, const struct sk_buff
*skb
)
830 /* All ethernet/IP/TCP headers combined size is TCP header size
831 * plus offset of TCP header relative to start of packet.
833 st
->p
.header_length
= ((tcp_hdr(skb
)->doff
<< 2u)
834 + PTR_DIFF(tcp_hdr(skb
), skb
->data
));
835 st
->p
.full_packet_size
= (st
->p
.header_length
836 + skb_shinfo(skb
)->gso_size
);
838 st
->p
.ipv4_id
= ntohs(ip_hdr(skb
)->id
);
839 st
->seqnum
= ntohl(tcp_hdr(skb
)->seq
);
841 EFX_BUG_ON_PARANOID(tcp_hdr(skb
)->urg
);
842 EFX_BUG_ON_PARANOID(tcp_hdr(skb
)->syn
);
843 EFX_BUG_ON_PARANOID(tcp_hdr(skb
)->rst
);
845 st
->packet_space
= st
->p
.full_packet_size
;
846 st
->remaining_len
= skb
->len
- st
->p
.header_length
;
851 * tso_get_fragment - record fragment details and map for DMA
854 * @data: Pointer to fragment data
855 * @len: Length of fragment
857 * Record fragment details and map for DMA. Return 0 on success, or
858 * -%ENOMEM if DMA mapping fails.
860 static inline int tso_get_fragment(struct tso_state
*st
, struct efx_nic
*efx
,
861 int len
, struct page
*page
, int page_off
)
864 st
->ifc
.unmap_addr
= pci_map_page(efx
->pci_dev
, page
, page_off
,
865 len
, PCI_DMA_TODEVICE
);
866 if (likely(!pci_dma_mapping_error(st
->ifc
.unmap_addr
))) {
867 st
->ifc
.unmap_len
= len
;
869 st
->ifc
.dma_addr
= st
->ifc
.unmap_addr
;
871 st
->ifc
.page_off
= page_off
;
879 * tso_fill_packet_with_fragment - form descriptors for the current fragment
880 * @tx_queue: Efx TX queue
881 * @skb: Socket buffer
884 * Form descriptors for the current fragment, until we reach the end
885 * of fragment or end-of-packet. Return 0 on success, 1 if not enough
886 * space in @tx_queue.
888 static inline int tso_fill_packet_with_fragment(struct efx_tx_queue
*tx_queue
,
889 const struct sk_buff
*skb
,
890 struct tso_state
*st
)
893 int n
, end_of_packet
, rc
;
895 if (st
->ifc
.len
== 0)
897 if (st
->packet_space
== 0)
900 EFX_BUG_ON_PARANOID(st
->ifc
.len
<= 0);
901 EFX_BUG_ON_PARANOID(st
->packet_space
<= 0);
903 n
= min(st
->ifc
.len
, st
->packet_space
);
905 st
->packet_space
-= n
;
906 st
->remaining_len
-= n
;
908 st
->ifc
.page_off
+= n
;
909 end_of_packet
= st
->remaining_len
== 0 || st
->packet_space
== 0;
911 rc
= efx_tx_queue_insert(tx_queue
, st
->ifc
.dma_addr
, n
,
912 st
->remaining_len
? NULL
: skb
,
913 end_of_packet
, st
->ifc
.unmap_addr
,
914 st
->ifc
.len
? 0 : st
->ifc
.unmap_len
);
916 st
->ifc
.dma_addr
+= n
;
923 * tso_start_new_packet - generate a new header and prepare for the new packet
924 * @tx_queue: Efx TX queue
925 * @skb: Socket buffer
928 * Generate a new header and prepare for the new packet. Return 0 on
929 * success, or -1 if failed to alloc header.
931 static inline int tso_start_new_packet(struct efx_tx_queue
*tx_queue
,
932 const struct sk_buff
*skb
,
933 struct tso_state
*st
)
935 struct efx_tso_header
*tsoh
;
936 struct iphdr
*tsoh_iph
;
937 struct tcphdr
*tsoh_th
;
941 /* Allocate a DMA-mapped header buffer. */
942 if (likely(TSOH_SIZE(st
->p
.header_length
) <= TSOH_STD_SIZE
)) {
943 if (tx_queue
->tso_headers_free
== NULL
) {
944 if (efx_tsoh_block_alloc(tx_queue
))
947 EFX_BUG_ON_PARANOID(!tx_queue
->tso_headers_free
);
948 tsoh
= tx_queue
->tso_headers_free
;
949 tx_queue
->tso_headers_free
= tsoh
->next
;
952 tx_queue
->tso_long_headers
++;
953 tsoh
= efx_tsoh_heap_alloc(tx_queue
, st
->p
.header_length
);
958 header
= TSOH_BUFFER(tsoh
);
959 tsoh_th
= (struct tcphdr
*)(header
+ SKB_TCP_OFF(skb
));
960 tsoh_iph
= (struct iphdr
*)(header
+ SKB_IPV4_OFF(skb
));
962 /* Copy and update the headers. */
963 memcpy(header
, skb
->data
, st
->p
.header_length
);
965 tsoh_th
->seq
= htonl(st
->seqnum
);
966 st
->seqnum
+= skb_shinfo(skb
)->gso_size
;
967 if (st
->remaining_len
> skb_shinfo(skb
)->gso_size
) {
968 /* This packet will not finish the TSO burst. */
969 ip_length
= st
->p
.full_packet_size
- ETH_HDR_LEN(skb
);
973 /* This packet will be the last in the TSO burst. */
974 ip_length
= (st
->p
.header_length
- ETH_HDR_LEN(skb
)
975 + st
->remaining_len
);
976 tsoh_th
->fin
= tcp_hdr(skb
)->fin
;
977 tsoh_th
->psh
= tcp_hdr(skb
)->psh
;
979 tsoh_iph
->tot_len
= htons(ip_length
);
981 /* Linux leaves suitable gaps in the IP ID space for us to fill. */
982 tsoh_iph
->id
= htons(st
->p
.ipv4_id
);
985 st
->packet_space
= skb_shinfo(skb
)->gso_size
;
986 ++tx_queue
->tso_packets
;
988 /* Form a descriptor for this header. */
989 efx_tso_put_header(tx_queue
, tsoh
, st
->p
.header_length
);
996 * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer
997 * @tx_queue: Efx TX queue
998 * @skb: Socket buffer
1000 * Context: You must hold netif_tx_lock() to call this function.
1002 * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
1003 * @skb was not enqueued. In all cases @skb is consumed. Return
1004 * %NETDEV_TX_OK or %NETDEV_TX_BUSY.
1006 static int efx_enqueue_skb_tso(struct efx_tx_queue
*tx_queue
,
1007 const struct sk_buff
*skb
)
1009 int frag_i
, rc
, rc2
= NETDEV_TX_OK
;
1010 struct tso_state state
;
1013 /* Verify TSO is safe - these checks should never fail. */
1014 efx_tso_check_safe(skb
);
1016 EFX_BUG_ON_PARANOID(tx_queue
->write_count
!= tx_queue
->insert_count
);
1018 tso_start(&state
, skb
);
1020 /* Assume that skb header area contains exactly the headers, and
1021 * all payload is in the frag list.
1023 if (skb_headlen(skb
) == state
.p
.header_length
) {
1024 /* Grab the first payload fragment. */
1025 EFX_BUG_ON_PARANOID(skb_shinfo(skb
)->nr_frags
< 1);
1027 f
= &skb_shinfo(skb
)->frags
[frag_i
];
1028 rc
= tso_get_fragment(&state
, tx_queue
->efx
,
1029 f
->size
, f
->page
, f
->page_offset
);
1033 /* It may look like this code fragment assumes that the
1034 * skb->data portion does not cross a page boundary, but
1035 * that is not the case. It is guaranteed to be direct
1036 * mapped memory, and therefore is physically contiguous,
1037 * and so DMA will work fine. kmap_atomic() on this region
1038 * will just return the direct mapping, so that will work
1041 int page_off
= (unsigned long)skb
->data
& (PAGE_SIZE
- 1);
1042 int hl
= state
.p
.header_length
;
1043 rc
= tso_get_fragment(&state
, tx_queue
->efx
,
1044 skb_headlen(skb
) - hl
,
1045 virt_to_page(skb
->data
), page_off
+ hl
);
1051 if (tso_start_new_packet(tx_queue
, skb
, &state
) < 0)
1055 rc
= tso_fill_packet_with_fragment(tx_queue
, skb
, &state
);
1059 /* Move onto the next fragment? */
1060 if (state
.ifc
.len
== 0) {
1061 if (++frag_i
>= skb_shinfo(skb
)->nr_frags
)
1062 /* End of payload reached. */
1064 f
= &skb_shinfo(skb
)->frags
[frag_i
];
1065 rc
= tso_get_fragment(&state
, tx_queue
->efx
,
1066 f
->size
, f
->page
, f
->page_offset
);
1071 /* Start at new packet? */
1072 if (state
.packet_space
== 0 &&
1073 tso_start_new_packet(tx_queue
, skb
, &state
) < 0)
1077 /* Pass off to hardware */
1078 falcon_push_buffers(tx_queue
);
1080 tx_queue
->tso_bursts
++;
1081 return NETDEV_TX_OK
;
1084 EFX_ERR(tx_queue
->efx
, "Out of memory for TSO headers, or PCI mapping"
1086 dev_kfree_skb_any((struct sk_buff
*)skb
);
1090 rc2
= NETDEV_TX_BUSY
;
1092 /* Stop the queue if it wasn't stopped before. */
1093 if (tx_queue
->stopped
== 1)
1094 efx_stop_queue(tx_queue
->efx
);
1097 efx_enqueue_unwind(tx_queue
);
1103 * Free up all TSO datastructures associated with tx_queue. This
1104 * routine should be called only once the tx_queue is both empty and
1105 * will no longer be used.
1107 static void efx_fini_tso(struct efx_tx_queue
*tx_queue
)
1111 if (tx_queue
->buffer
) {
1112 for (i
= 0; i
<= tx_queue
->efx
->type
->txd_ring_mask
; ++i
)
1113 efx_tsoh_free(tx_queue
, &tx_queue
->buffer
[i
]);
1116 while (tx_queue
->tso_headers_free
!= NULL
)
1117 efx_tsoh_block_free(tx_queue
, tx_queue
->tso_headers_free
,
1118 tx_queue
->efx
->pci_dev
);