1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3 * Driver for Solarflare network controllers and boards
4 * Copyright 2018 Solarflare Communications Inc.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
11 #include "net_driver.h"
14 #include "tx_common.h"
16 static unsigned int efx_tx_cb_page_count(struct efx_tx_queue
*tx_queue
)
18 return DIV_ROUND_UP(tx_queue
->ptr_mask
+ 1,
19 PAGE_SIZE
>> EFX_TX_CB_ORDER
);
22 int efx_probe_tx_queue(struct efx_tx_queue
*tx_queue
)
24 struct efx_nic
*efx
= tx_queue
->efx
;
28 /* Create the smallest power-of-two aligned ring */
29 entries
= max(roundup_pow_of_two(efx
->txq_entries
), EFX_MIN_DMAQ_SIZE
);
30 EFX_WARN_ON_PARANOID(entries
> EFX_MAX_DMAQ_SIZE
);
31 tx_queue
->ptr_mask
= entries
- 1;
33 netif_dbg(efx
, probe
, efx
->net_dev
,
34 "creating TX queue %d size %#x mask %#x\n",
35 tx_queue
->queue
, efx
->txq_entries
, tx_queue
->ptr_mask
);
37 /* Allocate software ring */
38 tx_queue
->buffer
= kcalloc(entries
, sizeof(*tx_queue
->buffer
),
40 if (!tx_queue
->buffer
)
43 tx_queue
->cb_page
= kcalloc(efx_tx_cb_page_count(tx_queue
),
44 sizeof(tx_queue
->cb_page
[0]), GFP_KERNEL
);
45 if (!tx_queue
->cb_page
) {
50 /* Allocate hardware ring */
51 rc
= efx_nic_probe_tx(tx_queue
);
58 kfree(tx_queue
->cb_page
);
59 tx_queue
->cb_page
= NULL
;
61 kfree(tx_queue
->buffer
);
62 tx_queue
->buffer
= NULL
;
66 void efx_init_tx_queue(struct efx_tx_queue
*tx_queue
)
68 struct efx_nic
*efx
= tx_queue
->efx
;
70 netif_dbg(efx
, drv
, efx
->net_dev
,
71 "initialising TX queue %d\n", tx_queue
->queue
);
73 tx_queue
->insert_count
= 0;
74 tx_queue
->write_count
= 0;
75 tx_queue
->packet_write_count
= 0;
76 tx_queue
->old_write_count
= 0;
77 tx_queue
->read_count
= 0;
78 tx_queue
->old_read_count
= 0;
79 tx_queue
->empty_read_count
= 0 | EFX_EMPTY_COUNT_VALID
;
80 tx_queue
->xmit_more_available
= false;
81 tx_queue
->timestamping
= (efx_ptp_use_mac_tx_timestamps(efx
) &&
82 tx_queue
->channel
== efx_ptp_channel(efx
));
83 tx_queue
->completed_desc_ptr
= tx_queue
->ptr_mask
;
84 tx_queue
->completed_timestamp_major
= 0;
85 tx_queue
->completed_timestamp_minor
= 0;
87 tx_queue
->xdp_tx
= efx_channel_is_xdp_tx(tx_queue
->channel
);
89 /* Set up default function pointers. These may get replaced by
90 * efx_nic_init_tx() based off NIC/queue capabilities.
92 tx_queue
->handle_tso
= efx_enqueue_skb_tso
;
94 /* Set up TX descriptor ring */
95 efx_nic_init_tx(tx_queue
);
97 tx_queue
->initialised
= true;
100 void efx_fini_tx_queue(struct efx_tx_queue
*tx_queue
)
102 struct efx_tx_buffer
*buffer
;
104 netif_dbg(tx_queue
->efx
, drv
, tx_queue
->efx
->net_dev
,
105 "shutting down TX queue %d\n", tx_queue
->queue
);
107 if (!tx_queue
->buffer
)
110 /* Free any buffers left in the ring */
111 while (tx_queue
->read_count
!= tx_queue
->write_count
) {
112 unsigned int pkts_compl
= 0, bytes_compl
= 0;
114 buffer
= &tx_queue
->buffer
[tx_queue
->read_count
& tx_queue
->ptr_mask
];
115 efx_dequeue_buffer(tx_queue
, buffer
, &pkts_compl
, &bytes_compl
);
117 ++tx_queue
->read_count
;
119 tx_queue
->xmit_more_available
= false;
120 netdev_tx_reset_queue(tx_queue
->core_txq
);
123 void efx_remove_tx_queue(struct efx_tx_queue
*tx_queue
)
127 if (!tx_queue
->buffer
)
130 netif_dbg(tx_queue
->efx
, drv
, tx_queue
->efx
->net_dev
,
131 "destroying TX queue %d\n", tx_queue
->queue
);
132 efx_nic_remove_tx(tx_queue
);
134 if (tx_queue
->cb_page
) {
135 for (i
= 0; i
< efx_tx_cb_page_count(tx_queue
); i
++)
136 efx_nic_free_buffer(tx_queue
->efx
,
137 &tx_queue
->cb_page
[i
]);
138 kfree(tx_queue
->cb_page
);
139 tx_queue
->cb_page
= NULL
;
142 kfree(tx_queue
->buffer
);
143 tx_queue
->buffer
= NULL
;
146 void efx_dequeue_buffer(struct efx_tx_queue
*tx_queue
,
147 struct efx_tx_buffer
*buffer
,
148 unsigned int *pkts_compl
,
149 unsigned int *bytes_compl
)
151 if (buffer
->unmap_len
) {
152 struct device
*dma_dev
= &tx_queue
->efx
->pci_dev
->dev
;
153 dma_addr_t unmap_addr
= buffer
->dma_addr
- buffer
->dma_offset
;
155 if (buffer
->flags
& EFX_TX_BUF_MAP_SINGLE
)
156 dma_unmap_single(dma_dev
, unmap_addr
, buffer
->unmap_len
,
159 dma_unmap_page(dma_dev
, unmap_addr
, buffer
->unmap_len
,
161 buffer
->unmap_len
= 0;
164 if (buffer
->flags
& EFX_TX_BUF_SKB
) {
165 struct sk_buff
*skb
= (struct sk_buff
*)buffer
->skb
;
167 EFX_WARN_ON_PARANOID(!pkts_compl
|| !bytes_compl
);
169 (*bytes_compl
) += skb
->len
;
170 if (tx_queue
->timestamping
&&
171 (tx_queue
->completed_timestamp_major
||
172 tx_queue
->completed_timestamp_minor
)) {
173 struct skb_shared_hwtstamps hwtstamp
;
176 efx_ptp_nic_to_kernel_time(tx_queue
);
177 skb_tstamp_tx(skb
, &hwtstamp
);
179 tx_queue
->completed_timestamp_major
= 0;
180 tx_queue
->completed_timestamp_minor
= 0;
182 dev_consume_skb_any((struct sk_buff
*)buffer
->skb
);
183 netif_vdbg(tx_queue
->efx
, tx_done
, tx_queue
->efx
->net_dev
,
184 "TX queue %d transmission id %x complete\n",
185 tx_queue
->queue
, tx_queue
->read_count
);
186 } else if (buffer
->flags
& EFX_TX_BUF_XDP
) {
187 xdp_return_frame_rx_napi(buffer
->xdpf
);
194 /* Remove packets from the TX queue
196 * This removes packets from the TX queue, up to and including the
199 static void efx_dequeue_buffers(struct efx_tx_queue
*tx_queue
,
201 unsigned int *pkts_compl
,
202 unsigned int *bytes_compl
)
204 struct efx_nic
*efx
= tx_queue
->efx
;
205 unsigned int stop_index
, read_ptr
;
207 stop_index
= (index
+ 1) & tx_queue
->ptr_mask
;
208 read_ptr
= tx_queue
->read_count
& tx_queue
->ptr_mask
;
210 while (read_ptr
!= stop_index
) {
211 struct efx_tx_buffer
*buffer
= &tx_queue
->buffer
[read_ptr
];
213 if (!(buffer
->flags
& EFX_TX_BUF_OPTION
) &&
214 unlikely(buffer
->len
== 0)) {
215 netif_err(efx
, tx_err
, efx
->net_dev
,
216 "TX queue %d spurious TX completion id %x\n",
217 tx_queue
->queue
, read_ptr
);
218 efx_schedule_reset(efx
, RESET_TYPE_TX_SKIP
);
222 efx_dequeue_buffer(tx_queue
, buffer
, pkts_compl
, bytes_compl
);
224 ++tx_queue
->read_count
;
225 read_ptr
= tx_queue
->read_count
& tx_queue
->ptr_mask
;
229 void efx_xmit_done(struct efx_tx_queue
*tx_queue
, unsigned int index
)
231 unsigned int fill_level
, pkts_compl
= 0, bytes_compl
= 0;
232 struct efx_nic
*efx
= tx_queue
->efx
;
233 struct efx_tx_queue
*txq2
;
235 EFX_WARN_ON_ONCE_PARANOID(index
> tx_queue
->ptr_mask
);
237 efx_dequeue_buffers(tx_queue
, index
, &pkts_compl
, &bytes_compl
);
238 tx_queue
->pkts_compl
+= pkts_compl
;
239 tx_queue
->bytes_compl
+= bytes_compl
;
242 ++tx_queue
->merge_events
;
244 /* See if we need to restart the netif queue. This memory
245 * barrier ensures that we write read_count (inside
246 * efx_dequeue_buffers()) before reading the queue status.
249 if (unlikely(netif_tx_queue_stopped(tx_queue
->core_txq
)) &&
250 likely(efx
->port_enabled
) &&
251 likely(netif_device_present(efx
->net_dev
))) {
252 txq2
= efx_tx_queue_partner(tx_queue
);
253 fill_level
= max(tx_queue
->insert_count
- tx_queue
->read_count
,
254 txq2
->insert_count
- txq2
->read_count
);
255 if (fill_level
<= efx
->txq_wake_thresh
)
256 netif_tx_wake_queue(tx_queue
->core_txq
);
259 /* Check whether the hardware queue is now empty */
260 if ((int)(tx_queue
->read_count
- tx_queue
->old_write_count
) >= 0) {
261 tx_queue
->old_write_count
= READ_ONCE(tx_queue
->write_count
);
262 if (tx_queue
->read_count
== tx_queue
->old_write_count
) {
264 tx_queue
->empty_read_count
=
265 tx_queue
->read_count
| EFX_EMPTY_COUNT_VALID
;
270 /* Remove buffers put into a tx_queue for the current packet.
271 * None of the buffers must have an skb attached.
273 void efx_enqueue_unwind(struct efx_tx_queue
*tx_queue
,
274 unsigned int insert_count
)
276 struct efx_tx_buffer
*buffer
;
277 unsigned int bytes_compl
= 0;
278 unsigned int pkts_compl
= 0;
280 /* Work backwards until we hit the original insert pointer value */
281 while (tx_queue
->insert_count
!= insert_count
) {
282 --tx_queue
->insert_count
;
283 buffer
= __efx_tx_queue_get_insert_buffer(tx_queue
);
284 efx_dequeue_buffer(tx_queue
, buffer
, &pkts_compl
, &bytes_compl
);
288 struct efx_tx_buffer
*efx_tx_map_chunk(struct efx_tx_queue
*tx_queue
,
289 dma_addr_t dma_addr
, size_t len
)
291 const struct efx_nic_type
*nic_type
= tx_queue
->efx
->type
;
292 struct efx_tx_buffer
*buffer
;
293 unsigned int dma_len
;
295 /* Map the fragment taking account of NIC-dependent DMA limits. */
297 buffer
= efx_tx_queue_get_insert_buffer(tx_queue
);
298 dma_len
= nic_type
->tx_limit_len(tx_queue
, dma_addr
, len
);
300 buffer
->len
= dma_len
;
301 buffer
->dma_addr
= dma_addr
;
302 buffer
->flags
= EFX_TX_BUF_CONT
;
305 ++tx_queue
->insert_count
;
311 /* Map all data from an SKB for DMA and create descriptors on the queue. */
312 int efx_tx_map_data(struct efx_tx_queue
*tx_queue
, struct sk_buff
*skb
,
313 unsigned int segment_count
)
315 struct efx_nic
*efx
= tx_queue
->efx
;
316 struct device
*dma_dev
= &efx
->pci_dev
->dev
;
317 unsigned int frag_index
, nr_frags
;
318 dma_addr_t dma_addr
, unmap_addr
;
319 unsigned short dma_flags
;
320 size_t len
, unmap_len
;
322 nr_frags
= skb_shinfo(skb
)->nr_frags
;
325 /* Map header data. */
326 len
= skb_headlen(skb
);
327 dma_addr
= dma_map_single(dma_dev
, skb
->data
, len
, DMA_TO_DEVICE
);
328 dma_flags
= EFX_TX_BUF_MAP_SINGLE
;
330 unmap_addr
= dma_addr
;
332 if (unlikely(dma_mapping_error(dma_dev
, dma_addr
)))
336 /* For TSO we need to put the header in to a separate
337 * descriptor. Map this separately if necessary.
339 size_t header_len
= skb_transport_header(skb
) - skb
->data
+
340 (tcp_hdr(skb
)->doff
<< 2u);
342 if (header_len
!= len
) {
343 tx_queue
->tso_long_headers
++;
344 efx_tx_map_chunk(tx_queue
, dma_addr
, header_len
);
346 dma_addr
+= header_len
;
350 /* Add descriptors for each fragment. */
352 struct efx_tx_buffer
*buffer
;
353 skb_frag_t
*fragment
;
355 buffer
= efx_tx_map_chunk(tx_queue
, dma_addr
, len
);
357 /* The final descriptor for a fragment is responsible for
358 * unmapping the whole fragment.
360 buffer
->flags
= EFX_TX_BUF_CONT
| dma_flags
;
361 buffer
->unmap_len
= unmap_len
;
362 buffer
->dma_offset
= buffer
->dma_addr
- unmap_addr
;
364 if (frag_index
>= nr_frags
) {
365 /* Store SKB details with the final buffer for
369 buffer
->flags
= EFX_TX_BUF_SKB
| dma_flags
;
373 /* Move on to the next fragment. */
374 fragment
= &skb_shinfo(skb
)->frags
[frag_index
++];
375 len
= skb_frag_size(fragment
);
376 dma_addr
= skb_frag_dma_map(dma_dev
, fragment
, 0, len
,
380 unmap_addr
= dma_addr
;
382 if (unlikely(dma_mapping_error(dma_dev
, dma_addr
)))
387 unsigned int efx_tx_max_skb_descs(struct efx_nic
*efx
)
389 /* Header and payload descriptor for each output segment, plus
390 * one for every input fragment boundary within a segment
392 unsigned int max_descs
= EFX_TSO_MAX_SEGS
* 2 + MAX_SKB_FRAGS
;
394 /* Possibly one more per segment for option descriptors */
395 if (efx_nic_rev(efx
) >= EFX_REV_HUNT_A0
)
396 max_descs
+= EFX_TSO_MAX_SEGS
;
398 /* Possibly more for PCIe page boundaries within input fragments */
399 if (PAGE_SIZE
> EFX_PAGE_SIZE
)
400 max_descs
+= max_t(unsigned int, MAX_SKB_FRAGS
,
401 DIV_ROUND_UP(GSO_MAX_SIZE
, EFX_PAGE_SIZE
));