1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3 * Driver for Solarflare network controllers and boards
4 * Copyright 2018 Solarflare Communications Inc.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
11 #include "net_driver.h"
14 #include "tx_common.h"
16 static unsigned int efx_tx_cb_page_count(struct efx_tx_queue
*tx_queue
)
18 return DIV_ROUND_UP(tx_queue
->ptr_mask
+ 1,
19 PAGE_SIZE
>> EFX_TX_CB_ORDER
);
22 int efx_probe_tx_queue(struct efx_tx_queue
*tx_queue
)
24 struct efx_nic
*efx
= tx_queue
->efx
;
28 /* Create the smallest power-of-two aligned ring */
29 entries
= max(roundup_pow_of_two(efx
->txq_entries
), EFX_MIN_DMAQ_SIZE
);
30 EFX_WARN_ON_PARANOID(entries
> EFX_MAX_DMAQ_SIZE
);
31 tx_queue
->ptr_mask
= entries
- 1;
33 netif_dbg(efx
, probe
, efx
->net_dev
,
34 "creating TX queue %d size %#x mask %#x\n",
35 tx_queue
->queue
, efx
->txq_entries
, tx_queue
->ptr_mask
);
37 /* Allocate software ring */
38 tx_queue
->buffer
= kcalloc(entries
, sizeof(*tx_queue
->buffer
),
40 if (!tx_queue
->buffer
)
43 tx_queue
->cb_page
= kcalloc(efx_tx_cb_page_count(tx_queue
),
44 sizeof(tx_queue
->cb_page
[0]), GFP_KERNEL
);
45 if (!tx_queue
->cb_page
) {
50 /* Allocate hardware ring */
51 rc
= efx_nic_probe_tx(tx_queue
);
58 kfree(tx_queue
->cb_page
);
59 tx_queue
->cb_page
= NULL
;
61 kfree(tx_queue
->buffer
);
62 tx_queue
->buffer
= NULL
;
66 void efx_init_tx_queue(struct efx_tx_queue
*tx_queue
)
68 struct efx_nic
*efx
= tx_queue
->efx
;
70 netif_dbg(efx
, drv
, efx
->net_dev
,
71 "initialising TX queue %d\n", tx_queue
->queue
);
73 tx_queue
->insert_count
= 0;
74 tx_queue
->write_count
= 0;
75 tx_queue
->packet_write_count
= 0;
76 tx_queue
->old_write_count
= 0;
77 tx_queue
->read_count
= 0;
78 tx_queue
->old_read_count
= 0;
79 tx_queue
->empty_read_count
= 0 | EFX_EMPTY_COUNT_VALID
;
80 tx_queue
->xmit_more_available
= false;
81 tx_queue
->timestamping
= (efx_ptp_use_mac_tx_timestamps(efx
) &&
82 tx_queue
->channel
== efx_ptp_channel(efx
));
83 tx_queue
->completed_timestamp_major
= 0;
84 tx_queue
->completed_timestamp_minor
= 0;
86 tx_queue
->xdp_tx
= efx_channel_is_xdp_tx(tx_queue
->channel
);
88 /* Set up default function pointers. These may get replaced by
89 * efx_nic_init_tx() based off NIC/queue capabilities.
91 tx_queue
->handle_tso
= efx_enqueue_skb_tso
;
93 /* Set up TX descriptor ring */
94 efx_nic_init_tx(tx_queue
);
96 tx_queue
->initialised
= true;
99 void efx_fini_tx_queue(struct efx_tx_queue
*tx_queue
)
101 struct efx_tx_buffer
*buffer
;
103 netif_dbg(tx_queue
->efx
, drv
, tx_queue
->efx
->net_dev
,
104 "shutting down TX queue %d\n", tx_queue
->queue
);
106 if (!tx_queue
->buffer
)
109 /* Free any buffers left in the ring */
110 while (tx_queue
->read_count
!= tx_queue
->write_count
) {
111 unsigned int pkts_compl
= 0, bytes_compl
= 0;
113 buffer
= &tx_queue
->buffer
[tx_queue
->read_count
& tx_queue
->ptr_mask
];
114 efx_dequeue_buffer(tx_queue
, buffer
, &pkts_compl
, &bytes_compl
);
116 ++tx_queue
->read_count
;
118 tx_queue
->xmit_more_available
= false;
119 netdev_tx_reset_queue(tx_queue
->core_txq
);
122 void efx_remove_tx_queue(struct efx_tx_queue
*tx_queue
)
126 if (!tx_queue
->buffer
)
129 netif_dbg(tx_queue
->efx
, drv
, tx_queue
->efx
->net_dev
,
130 "destroying TX queue %d\n", tx_queue
->queue
);
131 efx_nic_remove_tx(tx_queue
);
133 if (tx_queue
->cb_page
) {
134 for (i
= 0; i
< efx_tx_cb_page_count(tx_queue
); i
++)
135 efx_nic_free_buffer(tx_queue
->efx
,
136 &tx_queue
->cb_page
[i
]);
137 kfree(tx_queue
->cb_page
);
138 tx_queue
->cb_page
= NULL
;
141 kfree(tx_queue
->buffer
);
142 tx_queue
->buffer
= NULL
;
145 void efx_dequeue_buffer(struct efx_tx_queue
*tx_queue
,
146 struct efx_tx_buffer
*buffer
,
147 unsigned int *pkts_compl
,
148 unsigned int *bytes_compl
)
150 if (buffer
->unmap_len
) {
151 struct device
*dma_dev
= &tx_queue
->efx
->pci_dev
->dev
;
152 dma_addr_t unmap_addr
= buffer
->dma_addr
- buffer
->dma_offset
;
154 if (buffer
->flags
& EFX_TX_BUF_MAP_SINGLE
)
155 dma_unmap_single(dma_dev
, unmap_addr
, buffer
->unmap_len
,
158 dma_unmap_page(dma_dev
, unmap_addr
, buffer
->unmap_len
,
160 buffer
->unmap_len
= 0;
163 if (buffer
->flags
& EFX_TX_BUF_SKB
) {
164 struct sk_buff
*skb
= (struct sk_buff
*)buffer
->skb
;
166 EFX_WARN_ON_PARANOID(!pkts_compl
|| !bytes_compl
);
168 (*bytes_compl
) += skb
->len
;
169 if (tx_queue
->timestamping
&&
170 (tx_queue
->completed_timestamp_major
||
171 tx_queue
->completed_timestamp_minor
)) {
172 struct skb_shared_hwtstamps hwtstamp
;
175 efx_ptp_nic_to_kernel_time(tx_queue
);
176 skb_tstamp_tx(skb
, &hwtstamp
);
178 tx_queue
->completed_timestamp_major
= 0;
179 tx_queue
->completed_timestamp_minor
= 0;
181 dev_consume_skb_any((struct sk_buff
*)buffer
->skb
);
182 netif_vdbg(tx_queue
->efx
, tx_done
, tx_queue
->efx
->net_dev
,
183 "TX queue %d transmission id %x complete\n",
184 tx_queue
->queue
, tx_queue
->read_count
);
185 } else if (buffer
->flags
& EFX_TX_BUF_XDP
) {
186 xdp_return_frame_rx_napi(buffer
->xdpf
);
193 /* Remove packets from the TX queue
195 * This removes packets from the TX queue, up to and including the
198 static void efx_dequeue_buffers(struct efx_tx_queue
*tx_queue
,
200 unsigned int *pkts_compl
,
201 unsigned int *bytes_compl
)
203 struct efx_nic
*efx
= tx_queue
->efx
;
204 unsigned int stop_index
, read_ptr
;
206 stop_index
= (index
+ 1) & tx_queue
->ptr_mask
;
207 read_ptr
= tx_queue
->read_count
& tx_queue
->ptr_mask
;
209 while (read_ptr
!= stop_index
) {
210 struct efx_tx_buffer
*buffer
= &tx_queue
->buffer
[read_ptr
];
212 if (!efx_tx_buffer_in_use(buffer
)) {
213 netif_err(efx
, tx_err
, efx
->net_dev
,
214 "TX queue %d spurious TX completion id %d\n",
215 tx_queue
->queue
, read_ptr
);
216 efx_schedule_reset(efx
, RESET_TYPE_TX_SKIP
);
220 efx_dequeue_buffer(tx_queue
, buffer
, pkts_compl
, bytes_compl
);
222 ++tx_queue
->read_count
;
223 read_ptr
= tx_queue
->read_count
& tx_queue
->ptr_mask
;
227 void efx_xmit_done_check_empty(struct efx_tx_queue
*tx_queue
)
229 if ((int)(tx_queue
->read_count
- tx_queue
->old_write_count
) >= 0) {
230 tx_queue
->old_write_count
= READ_ONCE(tx_queue
->write_count
);
231 if (tx_queue
->read_count
== tx_queue
->old_write_count
) {
232 /* Ensure that read_count is flushed. */
234 tx_queue
->empty_read_count
=
235 tx_queue
->read_count
| EFX_EMPTY_COUNT_VALID
;
240 void efx_xmit_done(struct efx_tx_queue
*tx_queue
, unsigned int index
)
242 unsigned int fill_level
, pkts_compl
= 0, bytes_compl
= 0;
243 struct efx_nic
*efx
= tx_queue
->efx
;
244 struct efx_tx_queue
*txq2
;
246 EFX_WARN_ON_ONCE_PARANOID(index
> tx_queue
->ptr_mask
);
248 efx_dequeue_buffers(tx_queue
, index
, &pkts_compl
, &bytes_compl
);
249 tx_queue
->pkts_compl
+= pkts_compl
;
250 tx_queue
->bytes_compl
+= bytes_compl
;
253 ++tx_queue
->merge_events
;
255 /* See if we need to restart the netif queue. This memory
256 * barrier ensures that we write read_count (inside
257 * efx_dequeue_buffers()) before reading the queue status.
260 if (unlikely(netif_tx_queue_stopped(tx_queue
->core_txq
)) &&
261 likely(efx
->port_enabled
) &&
262 likely(netif_device_present(efx
->net_dev
))) {
263 txq2
= efx_tx_queue_partner(tx_queue
);
264 fill_level
= max(tx_queue
->insert_count
- tx_queue
->read_count
,
265 txq2
->insert_count
- txq2
->read_count
);
266 if (fill_level
<= efx
->txq_wake_thresh
)
267 netif_tx_wake_queue(tx_queue
->core_txq
);
270 efx_xmit_done_check_empty(tx_queue
);
273 /* Remove buffers put into a tx_queue for the current packet.
274 * None of the buffers must have an skb attached.
276 void efx_enqueue_unwind(struct efx_tx_queue
*tx_queue
,
277 unsigned int insert_count
)
279 struct efx_tx_buffer
*buffer
;
280 unsigned int bytes_compl
= 0;
281 unsigned int pkts_compl
= 0;
283 /* Work backwards until we hit the original insert pointer value */
284 while (tx_queue
->insert_count
!= insert_count
) {
285 --tx_queue
->insert_count
;
286 buffer
= __efx_tx_queue_get_insert_buffer(tx_queue
);
287 efx_dequeue_buffer(tx_queue
, buffer
, &pkts_compl
, &bytes_compl
);
291 struct efx_tx_buffer
*efx_tx_map_chunk(struct efx_tx_queue
*tx_queue
,
292 dma_addr_t dma_addr
, size_t len
)
294 const struct efx_nic_type
*nic_type
= tx_queue
->efx
->type
;
295 struct efx_tx_buffer
*buffer
;
296 unsigned int dma_len
;
298 /* Map the fragment taking account of NIC-dependent DMA limits. */
300 buffer
= efx_tx_queue_get_insert_buffer(tx_queue
);
301 dma_len
= nic_type
->tx_limit_len(tx_queue
, dma_addr
, len
);
303 buffer
->len
= dma_len
;
304 buffer
->dma_addr
= dma_addr
;
305 buffer
->flags
= EFX_TX_BUF_CONT
;
308 ++tx_queue
->insert_count
;
314 /* Map all data from an SKB for DMA and create descriptors on the queue. */
315 int efx_tx_map_data(struct efx_tx_queue
*tx_queue
, struct sk_buff
*skb
,
316 unsigned int segment_count
)
318 struct efx_nic
*efx
= tx_queue
->efx
;
319 struct device
*dma_dev
= &efx
->pci_dev
->dev
;
320 unsigned int frag_index
, nr_frags
;
321 dma_addr_t dma_addr
, unmap_addr
;
322 unsigned short dma_flags
;
323 size_t len
, unmap_len
;
325 nr_frags
= skb_shinfo(skb
)->nr_frags
;
328 /* Map header data. */
329 len
= skb_headlen(skb
);
330 dma_addr
= dma_map_single(dma_dev
, skb
->data
, len
, DMA_TO_DEVICE
);
331 dma_flags
= EFX_TX_BUF_MAP_SINGLE
;
333 unmap_addr
= dma_addr
;
335 if (unlikely(dma_mapping_error(dma_dev
, dma_addr
)))
339 /* For TSO we need to put the header in to a separate
340 * descriptor. Map this separately if necessary.
342 size_t header_len
= skb_transport_header(skb
) - skb
->data
+
343 (tcp_hdr(skb
)->doff
<< 2u);
345 if (header_len
!= len
) {
346 tx_queue
->tso_long_headers
++;
347 efx_tx_map_chunk(tx_queue
, dma_addr
, header_len
);
349 dma_addr
+= header_len
;
353 /* Add descriptors for each fragment. */
355 struct efx_tx_buffer
*buffer
;
356 skb_frag_t
*fragment
;
358 buffer
= efx_tx_map_chunk(tx_queue
, dma_addr
, len
);
360 /* The final descriptor for a fragment is responsible for
361 * unmapping the whole fragment.
363 buffer
->flags
= EFX_TX_BUF_CONT
| dma_flags
;
364 buffer
->unmap_len
= unmap_len
;
365 buffer
->dma_offset
= buffer
->dma_addr
- unmap_addr
;
367 if (frag_index
>= nr_frags
) {
368 /* Store SKB details with the final buffer for
372 buffer
->flags
= EFX_TX_BUF_SKB
| dma_flags
;
376 /* Move on to the next fragment. */
377 fragment
= &skb_shinfo(skb
)->frags
[frag_index
++];
378 len
= skb_frag_size(fragment
);
379 dma_addr
= skb_frag_dma_map(dma_dev
, fragment
, 0, len
,
383 unmap_addr
= dma_addr
;
385 if (unlikely(dma_mapping_error(dma_dev
, dma_addr
)))
390 unsigned int efx_tx_max_skb_descs(struct efx_nic
*efx
)
392 /* Header and payload descriptor for each output segment, plus
393 * one for every input fragment boundary within a segment
395 unsigned int max_descs
= EFX_TSO_MAX_SEGS
* 2 + MAX_SKB_FRAGS
;
397 /* Possibly one more per segment for option descriptors */
398 if (efx_nic_rev(efx
) >= EFX_REV_HUNT_A0
)
399 max_descs
+= EFX_TSO_MAX_SEGS
;
401 /* Possibly more for PCIe page boundaries within input fragments */
402 if (PAGE_SIZE
> EFX_PAGE_SIZE
)
403 max_descs
+= max_t(unsigned int, MAX_SKB_FRAGS
,
404 DIV_ROUND_UP(GSO_MAX_SIZE
, EFX_PAGE_SIZE
));