1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3 * Driver for Solarflare network controllers and boards
4 * Copyright 2018 Solarflare Communications Inc.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
11 #include "net_driver.h"
13 #include "nic_common.h"
14 #include "tx_common.h"
17 static unsigned int efx_tx_cb_page_count(struct efx_tx_queue
*tx_queue
)
19 return DIV_ROUND_UP(tx_queue
->ptr_mask
+ 1,
20 PAGE_SIZE
>> EFX_TX_CB_ORDER
);
23 int efx_probe_tx_queue(struct efx_tx_queue
*tx_queue
)
25 struct efx_nic
*efx
= tx_queue
->efx
;
29 /* Create the smallest power-of-two aligned ring */
30 entries
= max(roundup_pow_of_two(efx
->txq_entries
), EFX_MIN_DMAQ_SIZE
);
31 EFX_WARN_ON_PARANOID(entries
> EFX_MAX_DMAQ_SIZE
);
32 tx_queue
->ptr_mask
= entries
- 1;
34 netif_dbg(efx
, probe
, efx
->net_dev
,
35 "creating TX queue %d size %#x mask %#x\n",
36 tx_queue
->queue
, efx
->txq_entries
, tx_queue
->ptr_mask
);
38 /* Allocate software ring */
39 tx_queue
->buffer
= kcalloc(entries
, sizeof(*tx_queue
->buffer
),
41 if (!tx_queue
->buffer
)
44 tx_queue
->cb_page
= kcalloc(efx_tx_cb_page_count(tx_queue
),
45 sizeof(tx_queue
->cb_page
[0]), GFP_KERNEL
);
46 if (!tx_queue
->cb_page
) {
51 /* Allocate hardware ring, determine TXQ type */
52 rc
= efx_nic_probe_tx(tx_queue
);
56 tx_queue
->channel
->tx_queue_by_type
[tx_queue
->type
] = tx_queue
;
60 kfree(tx_queue
->cb_page
);
61 tx_queue
->cb_page
= NULL
;
63 kfree(tx_queue
->buffer
);
64 tx_queue
->buffer
= NULL
;
68 void efx_init_tx_queue(struct efx_tx_queue
*tx_queue
)
70 struct efx_nic
*efx
= tx_queue
->efx
;
72 netif_dbg(efx
, drv
, efx
->net_dev
,
73 "initialising TX queue %d\n", tx_queue
->queue
);
75 tx_queue
->insert_count
= 0;
76 tx_queue
->notify_count
= 0;
77 tx_queue
->write_count
= 0;
78 tx_queue
->packet_write_count
= 0;
79 tx_queue
->old_write_count
= 0;
80 tx_queue
->read_count
= 0;
81 tx_queue
->old_read_count
= 0;
82 tx_queue
->empty_read_count
= 0 | EFX_EMPTY_COUNT_VALID
;
83 tx_queue
->xmit_pending
= false;
84 tx_queue
->timestamping
= (efx_ptp_use_mac_tx_timestamps(efx
) &&
85 tx_queue
->channel
== efx_ptp_channel(efx
));
86 tx_queue
->completed_timestamp_major
= 0;
87 tx_queue
->completed_timestamp_minor
= 0;
89 tx_queue
->old_complete_packets
= tx_queue
->complete_packets
;
90 tx_queue
->old_complete_bytes
= tx_queue
->complete_bytes
;
91 tx_queue
->old_tso_bursts
= tx_queue
->tso_bursts
;
92 tx_queue
->old_tso_packets
= tx_queue
->tso_packets
;
94 tx_queue
->xdp_tx
= efx_channel_is_xdp_tx(tx_queue
->channel
);
95 tx_queue
->tso_version
= 0;
97 /* Set up TX descriptor ring */
98 efx_nic_init_tx(tx_queue
);
100 tx_queue
->initialised
= true;
103 void efx_fini_tx_queue(struct efx_tx_queue
*tx_queue
)
105 struct efx_tx_buffer
*buffer
;
107 netif_dbg(tx_queue
->efx
, drv
, tx_queue
->efx
->net_dev
,
108 "shutting down TX queue %d\n", tx_queue
->queue
);
110 tx_queue
->initialised
= false;
112 if (!tx_queue
->buffer
)
115 /* Free any buffers left in the ring */
116 while (tx_queue
->read_count
!= tx_queue
->write_count
) {
117 unsigned int xdp_pkts_compl
= 0, xdp_bytes_compl
= 0;
118 unsigned int pkts_compl
= 0, bytes_compl
= 0;
119 unsigned int efv_pkts_compl
= 0;
121 buffer
= &tx_queue
->buffer
[tx_queue
->read_count
& tx_queue
->ptr_mask
];
122 efx_dequeue_buffer(tx_queue
, buffer
, &pkts_compl
, &bytes_compl
,
123 &efv_pkts_compl
, &xdp_pkts_compl
,
126 ++tx_queue
->read_count
;
128 tx_queue
->xmit_pending
= false;
129 netdev_tx_reset_queue(tx_queue
->core_txq
);
132 void efx_remove_tx_queue(struct efx_tx_queue
*tx_queue
)
136 if (!tx_queue
->buffer
)
139 netif_dbg(tx_queue
->efx
, drv
, tx_queue
->efx
->net_dev
,
140 "destroying TX queue %d\n", tx_queue
->queue
);
141 efx_nic_remove_tx(tx_queue
);
143 if (tx_queue
->cb_page
) {
144 for (i
= 0; i
< efx_tx_cb_page_count(tx_queue
); i
++)
145 efx_nic_free_buffer(tx_queue
->efx
,
146 &tx_queue
->cb_page
[i
]);
147 kfree(tx_queue
->cb_page
);
148 tx_queue
->cb_page
= NULL
;
151 kfree(tx_queue
->buffer
);
152 tx_queue
->buffer
= NULL
;
153 tx_queue
->channel
->tx_queue_by_type
[tx_queue
->type
] = NULL
;
156 void efx_dequeue_buffer(struct efx_tx_queue
*tx_queue
,
157 struct efx_tx_buffer
*buffer
,
158 unsigned int *pkts_compl
,
159 unsigned int *bytes_compl
,
160 unsigned int *efv_pkts_compl
,
161 unsigned int *xdp_pkts
,
162 unsigned int *xdp_bytes
)
164 if (buffer
->unmap_len
) {
165 struct device
*dma_dev
= &tx_queue
->efx
->pci_dev
->dev
;
166 dma_addr_t unmap_addr
= buffer
->dma_addr
- buffer
->dma_offset
;
168 if (buffer
->flags
& EFX_TX_BUF_MAP_SINGLE
)
169 dma_unmap_single(dma_dev
, unmap_addr
, buffer
->unmap_len
,
172 dma_unmap_page(dma_dev
, unmap_addr
, buffer
->unmap_len
,
174 buffer
->unmap_len
= 0;
177 if (buffer
->flags
& EFX_TX_BUF_SKB
) {
178 struct sk_buff
*skb
= (struct sk_buff
*)buffer
->skb
;
180 if (unlikely(buffer
->flags
& EFX_TX_BUF_EFV
)) {
181 EFX_WARN_ON_PARANOID(!efv_pkts_compl
);
184 EFX_WARN_ON_PARANOID(!pkts_compl
|| !bytes_compl
);
186 (*bytes_compl
) += skb
->len
;
189 if (tx_queue
->timestamping
&&
190 (tx_queue
->completed_timestamp_major
||
191 tx_queue
->completed_timestamp_minor
)) {
192 struct skb_shared_hwtstamps hwtstamp
;
195 efx_ptp_nic_to_kernel_time(tx_queue
);
196 skb_tstamp_tx(skb
, &hwtstamp
);
198 tx_queue
->completed_timestamp_major
= 0;
199 tx_queue
->completed_timestamp_minor
= 0;
201 dev_consume_skb_any((struct sk_buff
*)buffer
->skb
);
202 netif_vdbg(tx_queue
->efx
, tx_done
, tx_queue
->efx
->net_dev
,
203 "TX queue %d transmission id %x complete\n",
204 tx_queue
->queue
, tx_queue
->read_count
);
205 } else if (buffer
->flags
& EFX_TX_BUF_XDP
) {
206 xdp_return_frame_rx_napi(buffer
->xdpf
);
210 (*xdp_bytes
) += buffer
->xdpf
->len
;
217 /* Remove packets from the TX queue
219 * This removes packets from the TX queue, up to and including the
222 static void efx_dequeue_buffers(struct efx_tx_queue
*tx_queue
,
224 unsigned int *pkts_compl
,
225 unsigned int *bytes_compl
,
226 unsigned int *efv_pkts_compl
,
227 unsigned int *xdp_pkts
,
228 unsigned int *xdp_bytes
)
230 struct efx_nic
*efx
= tx_queue
->efx
;
231 unsigned int stop_index
, read_ptr
;
233 stop_index
= (index
+ 1) & tx_queue
->ptr_mask
;
234 read_ptr
= tx_queue
->read_count
& tx_queue
->ptr_mask
;
236 while (read_ptr
!= stop_index
) {
237 struct efx_tx_buffer
*buffer
= &tx_queue
->buffer
[read_ptr
];
239 if (!efx_tx_buffer_in_use(buffer
)) {
240 netif_err(efx
, tx_err
, efx
->net_dev
,
241 "TX queue %d spurious TX completion id %d\n",
242 tx_queue
->queue
, read_ptr
);
243 efx_schedule_reset(efx
, RESET_TYPE_TX_SKIP
);
247 efx_dequeue_buffer(tx_queue
, buffer
, pkts_compl
, bytes_compl
,
248 efv_pkts_compl
, xdp_pkts
, xdp_bytes
);
250 ++tx_queue
->read_count
;
251 read_ptr
= tx_queue
->read_count
& tx_queue
->ptr_mask
;
255 void efx_xmit_done_check_empty(struct efx_tx_queue
*tx_queue
)
257 if ((int)(tx_queue
->read_count
- tx_queue
->old_write_count
) >= 0) {
258 tx_queue
->old_write_count
= READ_ONCE(tx_queue
->write_count
);
259 if (tx_queue
->read_count
== tx_queue
->old_write_count
) {
260 /* Ensure that read_count is flushed. */
262 tx_queue
->empty_read_count
=
263 tx_queue
->read_count
| EFX_EMPTY_COUNT_VALID
;
268 int efx_xmit_done(struct efx_tx_queue
*tx_queue
, unsigned int index
)
270 unsigned int fill_level
, pkts_compl
= 0, bytes_compl
= 0;
271 unsigned int xdp_pkts_compl
= 0, xdp_bytes_compl
= 0;
272 unsigned int efv_pkts_compl
= 0;
273 struct efx_nic
*efx
= tx_queue
->efx
;
275 EFX_WARN_ON_ONCE_PARANOID(index
> tx_queue
->ptr_mask
);
277 efx_dequeue_buffers(tx_queue
, index
, &pkts_compl
, &bytes_compl
,
278 &efv_pkts_compl
, &xdp_pkts_compl
, &xdp_bytes_compl
);
279 tx_queue
->pkts_compl
+= pkts_compl
;
280 tx_queue
->bytes_compl
+= bytes_compl
;
281 tx_queue
->complete_xdp_packets
+= xdp_pkts_compl
;
282 tx_queue
->complete_xdp_bytes
+= xdp_bytes_compl
;
284 if (pkts_compl
+ efv_pkts_compl
> 1)
285 ++tx_queue
->merge_events
;
287 /* See if we need to restart the netif queue. This memory
288 * barrier ensures that we write read_count (inside
289 * efx_dequeue_buffers()) before reading the queue status.
292 if (unlikely(netif_tx_queue_stopped(tx_queue
->core_txq
)) &&
293 likely(efx
->port_enabled
) &&
294 likely(netif_device_present(efx
->net_dev
))) {
295 fill_level
= efx_channel_tx_fill_level(tx_queue
->channel
);
296 if (fill_level
<= efx
->txq_wake_thresh
)
297 netif_tx_wake_queue(tx_queue
->core_txq
);
300 efx_xmit_done_check_empty(tx_queue
);
302 return pkts_compl
+ efv_pkts_compl
;
305 /* Remove buffers put into a tx_queue for the current packet.
306 * None of the buffers must have an skb attached.
308 void efx_enqueue_unwind(struct efx_tx_queue
*tx_queue
,
309 unsigned int insert_count
)
311 unsigned int xdp_bytes_compl
= 0;
312 unsigned int xdp_pkts_compl
= 0;
313 unsigned int efv_pkts_compl
= 0;
314 struct efx_tx_buffer
*buffer
;
315 unsigned int bytes_compl
= 0;
316 unsigned int pkts_compl
= 0;
318 /* Work backwards until we hit the original insert pointer value */
319 while (tx_queue
->insert_count
!= insert_count
) {
320 --tx_queue
->insert_count
;
321 buffer
= __efx_tx_queue_get_insert_buffer(tx_queue
);
322 efx_dequeue_buffer(tx_queue
, buffer
, &pkts_compl
, &bytes_compl
,
323 &efv_pkts_compl
, &xdp_pkts_compl
,
328 struct efx_tx_buffer
*efx_tx_map_chunk(struct efx_tx_queue
*tx_queue
,
329 dma_addr_t dma_addr
, size_t len
)
331 const struct efx_nic_type
*nic_type
= tx_queue
->efx
->type
;
332 struct efx_tx_buffer
*buffer
;
333 unsigned int dma_len
;
335 /* Map the fragment taking account of NIC-dependent DMA limits. */
337 buffer
= efx_tx_queue_get_insert_buffer(tx_queue
);
339 if (nic_type
->tx_limit_len
)
340 dma_len
= nic_type
->tx_limit_len(tx_queue
, dma_addr
, len
);
344 buffer
->len
= dma_len
;
345 buffer
->dma_addr
= dma_addr
;
346 buffer
->flags
= EFX_TX_BUF_CONT
;
349 ++tx_queue
->insert_count
;
355 int efx_tx_tso_header_length(struct sk_buff
*skb
)
359 if (skb
->encapsulation
)
360 header_len
= skb_inner_transport_offset(skb
) +
361 (inner_tcp_hdr(skb
)->doff
<< 2u);
363 header_len
= skb_transport_offset(skb
) +
364 (tcp_hdr(skb
)->doff
<< 2u);
368 /* Map all data from an SKB for DMA and create descriptors on the queue. */
369 int efx_tx_map_data(struct efx_tx_queue
*tx_queue
, struct sk_buff
*skb
,
370 unsigned int segment_count
)
372 struct efx_nic
*efx
= tx_queue
->efx
;
373 struct device
*dma_dev
= &efx
->pci_dev
->dev
;
374 unsigned int frag_index
, nr_frags
;
375 dma_addr_t dma_addr
, unmap_addr
;
376 unsigned short dma_flags
;
377 size_t len
, unmap_len
;
379 nr_frags
= skb_shinfo(skb
)->nr_frags
;
382 /* Map header data. */
383 len
= skb_headlen(skb
);
384 dma_addr
= dma_map_single(dma_dev
, skb
->data
, len
, DMA_TO_DEVICE
);
385 dma_flags
= EFX_TX_BUF_MAP_SINGLE
;
387 unmap_addr
= dma_addr
;
389 if (unlikely(dma_mapping_error(dma_dev
, dma_addr
)))
393 /* For TSO we need to put the header in to a separate
394 * descriptor. Map this separately if necessary.
396 size_t header_len
= efx_tx_tso_header_length(skb
);
398 if (header_len
!= len
) {
399 tx_queue
->tso_long_headers
++;
400 efx_tx_map_chunk(tx_queue
, dma_addr
, header_len
);
402 dma_addr
+= header_len
;
406 /* Add descriptors for each fragment. */
408 struct efx_tx_buffer
*buffer
;
409 skb_frag_t
*fragment
;
411 buffer
= efx_tx_map_chunk(tx_queue
, dma_addr
, len
);
413 /* The final descriptor for a fragment is responsible for
414 * unmapping the whole fragment.
416 buffer
->flags
= EFX_TX_BUF_CONT
| dma_flags
;
417 buffer
->unmap_len
= unmap_len
;
418 buffer
->dma_offset
= buffer
->dma_addr
- unmap_addr
;
420 if (frag_index
>= nr_frags
) {
421 /* Store SKB details with the final buffer for
425 buffer
->flags
= EFX_TX_BUF_SKB
| dma_flags
;
429 /* Move on to the next fragment. */
430 fragment
= &skb_shinfo(skb
)->frags
[frag_index
++];
431 len
= skb_frag_size(fragment
);
432 dma_addr
= skb_frag_dma_map(dma_dev
, fragment
, 0, len
,
436 unmap_addr
= dma_addr
;
438 if (unlikely(dma_mapping_error(dma_dev
, dma_addr
)))
443 unsigned int efx_tx_max_skb_descs(struct efx_nic
*efx
)
445 /* Header and payload descriptor for each output segment, plus
446 * one for every input fragment boundary within a segment
448 unsigned int max_descs
= EFX_TSO_MAX_SEGS
* 2 + MAX_SKB_FRAGS
;
450 /* Possibly one more per segment for option descriptors */
451 if (efx_nic_rev(efx
) >= EFX_REV_HUNT_A0
)
452 max_descs
+= EFX_TSO_MAX_SEGS
;
454 /* Possibly more for PCIe page boundaries within input fragments */
455 if (PAGE_SIZE
> EFX_PAGE_SIZE
)
456 max_descs
+= max_t(unsigned int, MAX_SKB_FRAGS
,
457 DIV_ROUND_UP(GSO_LEGACY_MAX_SIZE
,
464 * Fallback to software TSO.
466 * This is used if we are unable to send a GSO packet through hardware TSO.
467 * This should only ever happen due to per-queue restrictions - unsupported
468 * packets should first be filtered by the feature flags.
470 * Returns 0 on success, error code otherwise.
472 int efx_tx_tso_fallback(struct efx_tx_queue
*tx_queue
, struct sk_buff
*skb
)
474 struct sk_buff
*segments
, *next
;
476 segments
= skb_gso_segment(skb
, 0);
477 if (IS_ERR(segments
))
478 return PTR_ERR(segments
);
480 dev_consume_skb_any(skb
);
482 skb_list_walk_safe(segments
, skb
, next
) {
483 skb_mark_not_on_list(skb
);
484 efx_enqueue_skb(tx_queue
, skb
);