1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3 * Driver for Solarflare network controllers and boards
4 * Copyright 2018 Solarflare Communications Inc.
5 * Copyright 2019-2020 Xilinx Inc.
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published
9 * by the Free Software Foundation, incorporated herein by reference.
12 #include <net/ip6_checksum.h>
14 #include "net_driver.h"
15 #include "tx_common.h"
16 #include "nic_common.h"
17 #include "mcdi_functions.h"
18 #include "ef100_regs.h"
21 #include "ef100_nic.h"
23 int ef100_tx_probe(struct efx_tx_queue
*tx_queue
)
25 /* Allocate an extra descriptor for the QMDA status completion entry */
26 return efx_nic_alloc_buffer(tx_queue
->efx
, &tx_queue
->txd
,
27 (tx_queue
->ptr_mask
+ 2) *
32 void ef100_tx_init(struct efx_tx_queue
*tx_queue
)
34 /* must be the inverse of lookup in efx_get_tx_channel */
36 netdev_get_tx_queue(tx_queue
->efx
->net_dev
,
37 tx_queue
->channel
->channel
-
38 tx_queue
->efx
->tx_channel_offset
);
40 /* This value is purely documentational; as EF100 never passes through
41 * the switch statement in tx.c:__efx_enqueue_skb(), that switch does
42 * not handle case 3. EF100's TSOv3 descriptors are generated by
43 * ef100_make_tso_desc().
44 * Meanwhile, all efx_mcdi_tx_init() cares about is that it's not 2.
46 tx_queue
->tso_version
= 3;
47 if (efx_mcdi_tx_init(tx_queue
))
48 netdev_WARN(tx_queue
->efx
->net_dev
,
49 "failed to initialise TXQ %d\n", tx_queue
->queue
);
52 static bool ef100_tx_can_tso(struct efx_tx_queue
*tx_queue
, struct sk_buff
*skb
)
54 struct efx_nic
*efx
= tx_queue
->efx
;
55 struct ef100_nic_data
*nic_data
;
56 struct efx_tx_buffer
*buffer
;
60 nic_data
= efx
->nic_data
;
62 if (!skb_is_gso_tcp(skb
))
64 if (!(efx
->net_dev
->features
& NETIF_F_TSO
))
67 mss
= skb_shinfo(skb
)->gso_size
;
68 if (unlikely(mss
< 4)) {
69 WARN_ONCE(1, "MSS of %u is too small for TSO\n", mss
);
73 header_len
= efx_tx_tso_header_length(skb
);
74 if (header_len
> nic_data
->tso_max_hdr_len
)
77 if (skb_shinfo(skb
)->gso_segs
> nic_data
->tso_max_payload_num_segs
) {
78 /* net_dev->gso_max_segs should've caught this */
83 if (skb
->data_len
/ mss
> nic_data
->tso_max_frames
)
86 /* net_dev->gso_max_size should've caught this */
87 if (WARN_ON_ONCE(skb
->data_len
> nic_data
->tso_max_payload_len
))
90 /* Reserve an empty buffer for the TSO V3 descriptor.
91 * Convey the length of the header since we already know it.
93 buffer
= efx_tx_queue_get_insert_buffer(tx_queue
);
94 buffer
->flags
= EFX_TX_BUF_TSO_V3
| EFX_TX_BUF_CONT
;
95 buffer
->len
= header_len
;
96 buffer
->unmap_len
= 0;
98 ++tx_queue
->insert_count
;
102 static efx_oword_t
*ef100_tx_desc(struct efx_tx_queue
*tx_queue
, unsigned int index
)
104 if (likely(tx_queue
->txd
.addr
))
105 return ((efx_oword_t
*)tx_queue
->txd
.addr
) + index
;
110 static void ef100_notify_tx_desc(struct efx_tx_queue
*tx_queue
)
112 unsigned int write_ptr
;
115 tx_queue
->xmit_pending
= false;
117 if (unlikely(tx_queue
->notify_count
== tx_queue
->write_count
))
120 write_ptr
= tx_queue
->write_count
& tx_queue
->ptr_mask
;
121 /* The write pointer goes into the high word */
122 EFX_POPULATE_DWORD_1(reg
, ERF_GZ_TX_RING_PIDX
, write_ptr
);
123 efx_writed_page(tx_queue
->efx
, ®
,
124 ER_GZ_TX_RING_DOORBELL
, tx_queue
->queue
);
125 tx_queue
->notify_count
= tx_queue
->write_count
;
128 static void ef100_tx_push_buffers(struct efx_tx_queue
*tx_queue
)
130 ef100_notify_tx_desc(tx_queue
);
134 static void ef100_set_tx_csum_partial(const struct sk_buff
*skb
,
135 struct efx_tx_buffer
*buffer
, efx_oword_t
*txd
)
140 if (!skb
|| skb
->ip_summed
!= CHECKSUM_PARTIAL
)
143 /* skb->csum_start has the offset from head, but we need the offset
146 csum_start
= skb_checksum_start_offset(skb
);
147 EFX_POPULATE_OWORD_3(csum
,
148 ESF_GZ_TX_SEND_CSO_PARTIAL_EN
, 1,
149 ESF_GZ_TX_SEND_CSO_PARTIAL_START_W
,
151 ESF_GZ_TX_SEND_CSO_PARTIAL_CSUM_W
,
152 skb
->csum_offset
>> 1);
153 EFX_OR_OWORD(*txd
, *txd
, csum
);
156 static void ef100_set_tx_hw_vlan(const struct sk_buff
*skb
, efx_oword_t
*txd
)
158 u16 vlan_tci
= skb_vlan_tag_get(skb
);
161 EFX_POPULATE_OWORD_2(vlan
,
162 ESF_GZ_TX_SEND_VLAN_INSERT_EN
, 1,
163 ESF_GZ_TX_SEND_VLAN_INSERT_TCI
, vlan_tci
);
164 EFX_OR_OWORD(*txd
, *txd
, vlan
);
167 static void ef100_make_send_desc(struct efx_nic
*efx
,
168 const struct sk_buff
*skb
,
169 struct efx_tx_buffer
*buffer
, efx_oword_t
*txd
,
170 unsigned int segment_count
)
172 /* TX send descriptor */
173 EFX_POPULATE_OWORD_3(*txd
,
174 ESF_GZ_TX_SEND_NUM_SEGS
, segment_count
,
175 ESF_GZ_TX_SEND_LEN
, buffer
->len
,
176 ESF_GZ_TX_SEND_ADDR
, buffer
->dma_addr
);
178 if (likely(efx
->net_dev
->features
& NETIF_F_HW_CSUM
))
179 ef100_set_tx_csum_partial(skb
, buffer
, txd
);
180 if (efx
->net_dev
->features
& NETIF_F_HW_VLAN_CTAG_TX
&&
181 skb
&& skb_vlan_tag_present(skb
))
182 ef100_set_tx_hw_vlan(skb
, txd
);
185 static void ef100_make_tso_desc(struct efx_nic
*efx
,
186 const struct sk_buff
*skb
,
187 struct efx_tx_buffer
*buffer
, efx_oword_t
*txd
,
188 unsigned int segment_count
)
190 bool gso_partial
= skb_shinfo(skb
)->gso_type
& SKB_GSO_PARTIAL
;
191 unsigned int len
, ip_offset
, tcp_offset
, payload_segs
;
192 u32 mangleid
= ESE_GZ_TX_DESC_IP4_ID_INC_MOD16
;
193 unsigned int outer_ip_offset
, outer_l4_offset
;
194 u16 vlan_tci
= skb_vlan_tag_get(skb
);
195 u32 mss
= skb_shinfo(skb
)->gso_size
;
196 bool encap
= skb
->encapsulation
;
197 bool udp_encap
= false;
203 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCP_FIXEDID
)
204 mangleid
= ESE_GZ_TX_DESC_IP4_ID_NO_OP
;
205 if (efx
->net_dev
->features
& NETIF_F_HW_VLAN_CTAG_TX
)
206 vlan_enable
= skb_vlan_tag_present(skb
);
208 len
= skb
->len
- buffer
->len
;
209 /* We use 1 for the TSO descriptor and 1 for the header */
210 payload_segs
= segment_count
- 2;
212 outer_ip_offset
= skb_network_offset(skb
);
213 outer_l4_offset
= skb_transport_offset(skb
);
214 ip_offset
= skb_inner_network_offset(skb
);
215 tcp_offset
= skb_inner_transport_offset(skb
);
216 if (skb_shinfo(skb
)->gso_type
&
217 (SKB_GSO_UDP_TUNNEL
| SKB_GSO_UDP_TUNNEL_CSUM
))
220 ip_offset
= skb_network_offset(skb
);
221 tcp_offset
= skb_transport_offset(skb
);
222 outer_ip_offset
= outer_l4_offset
= 0;
224 outer_csum
= skb_shinfo(skb
)->gso_type
& SKB_GSO_UDP_TUNNEL_CSUM
;
226 /* subtract TCP payload length from inner checksum */
227 tcp
= (void *)skb
->data
+ tcp_offset
;
228 paylen
= skb
->len
- tcp_offset
;
229 csum_replace_by_diff(&tcp
->check
, (__force __wsum
)htonl(paylen
));
231 EFX_POPULATE_OWORD_19(*txd
,
232 ESF_GZ_TX_DESC_TYPE
, ESE_GZ_TX_DESC_TYPE_TSO
,
233 ESF_GZ_TX_TSO_MSS
, mss
,
234 ESF_GZ_TX_TSO_HDR_NUM_SEGS
, 1,
235 ESF_GZ_TX_TSO_PAYLOAD_NUM_SEGS
, payload_segs
,
236 ESF_GZ_TX_TSO_HDR_LEN_W
, buffer
->len
>> 1,
237 ESF_GZ_TX_TSO_PAYLOAD_LEN
, len
,
238 ESF_GZ_TX_TSO_CSO_OUTER_L4
, outer_csum
,
239 ESF_GZ_TX_TSO_CSO_INNER_L4
, 1,
240 ESF_GZ_TX_TSO_INNER_L3_OFF_W
, ip_offset
>> 1,
241 ESF_GZ_TX_TSO_INNER_L4_OFF_W
, tcp_offset
>> 1,
242 ESF_GZ_TX_TSO_ED_INNER_IP4_ID
, mangleid
,
243 ESF_GZ_TX_TSO_ED_INNER_IP_LEN
, 1,
244 ESF_GZ_TX_TSO_OUTER_L3_OFF_W
, outer_ip_offset
>> 1,
245 ESF_GZ_TX_TSO_OUTER_L4_OFF_W
, outer_l4_offset
>> 1,
246 ESF_GZ_TX_TSO_ED_OUTER_UDP_LEN
, udp_encap
&& !gso_partial
,
247 ESF_GZ_TX_TSO_ED_OUTER_IP_LEN
, encap
&& !gso_partial
,
248 ESF_GZ_TX_TSO_ED_OUTER_IP4_ID
, encap
? mangleid
:
249 ESE_GZ_TX_DESC_IP4_ID_NO_OP
,
250 ESF_GZ_TX_TSO_VLAN_INSERT_EN
, vlan_enable
,
251 ESF_GZ_TX_TSO_VLAN_INSERT_TCI
, vlan_tci
255 static void ef100_tx_make_descriptors(struct efx_tx_queue
*tx_queue
,
256 const struct sk_buff
*skb
,
257 unsigned int segment_count
,
260 unsigned int old_write_count
= tx_queue
->write_count
;
261 unsigned int new_write_count
= old_write_count
;
262 struct efx_tx_buffer
*buffer
;
263 unsigned int next_desc_type
;
264 unsigned int write_ptr
;
266 unsigned int nr_descs
= tx_queue
->insert_count
- old_write_count
;
268 if (unlikely(nr_descs
== 0))
272 next_desc_type
= ESE_GZ_TX_DESC_TYPE_TSO
;
274 next_desc_type
= ESE_GZ_TX_DESC_TYPE_SEND
;
277 /* Create TX override descriptor */
278 write_ptr
= new_write_count
& tx_queue
->ptr_mask
;
279 txd
= ef100_tx_desc(tx_queue
, write_ptr
);
282 tx_queue
->packet_write_count
= new_write_count
;
283 EFX_POPULATE_OWORD_3(*txd
,
284 ESF_GZ_TX_DESC_TYPE
, ESE_GZ_TX_DESC_TYPE_PREFIX
,
285 ESF_GZ_TX_PREFIX_EGRESS_MPORT
, efv
->mport
,
286 ESF_GZ_TX_PREFIX_EGRESS_MPORT_EN
, 1);
290 /* if it's a raw write (such as XDP) then always SEND single frames */
295 write_ptr
= new_write_count
& tx_queue
->ptr_mask
;
296 buffer
= &tx_queue
->buffer
[write_ptr
];
297 txd
= ef100_tx_desc(tx_queue
, write_ptr
);
300 /* Create TX descriptor ring entry */
301 tx_queue
->packet_write_count
= new_write_count
;
303 switch (next_desc_type
) {
304 case ESE_GZ_TX_DESC_TYPE_SEND
:
305 ef100_make_send_desc(tx_queue
->efx
, skb
,
306 buffer
, txd
, nr_descs
);
308 case ESE_GZ_TX_DESC_TYPE_TSO
:
309 /* TX TSO descriptor */
310 WARN_ON_ONCE(!(buffer
->flags
& EFX_TX_BUF_TSO_V3
));
311 ef100_make_tso_desc(tx_queue
->efx
, skb
,
312 buffer
, txd
, nr_descs
);
315 /* TX segment descriptor */
316 EFX_POPULATE_OWORD_3(*txd
,
317 ESF_GZ_TX_DESC_TYPE
, ESE_GZ_TX_DESC_TYPE_SEG
,
318 ESF_GZ_TX_SEG_LEN
, buffer
->len
,
319 ESF_GZ_TX_SEG_ADDR
, buffer
->dma_addr
);
321 /* if it's a raw write (such as XDP) then always SEND */
322 next_desc_type
= skb
? ESE_GZ_TX_DESC_TYPE_SEG
:
323 ESE_GZ_TX_DESC_TYPE_SEND
;
324 /* mark as an EFV buffer if applicable */
326 buffer
->flags
|= EFX_TX_BUF_EFV
;
328 } while (new_write_count
!= tx_queue
->insert_count
);
330 wmb(); /* Ensure descriptors are written before they are fetched */
332 tx_queue
->write_count
= new_write_count
;
334 /* The write_count above must be updated before reading
335 * channel->holdoff_doorbell to avoid a race with the
336 * completion path, so ensure these operations are not
337 * re-ordered. This also flushes the update of write_count
338 * back into the cache.
343 void ef100_tx_write(struct efx_tx_queue
*tx_queue
)
345 ef100_tx_make_descriptors(tx_queue
, NULL
, 0, NULL
);
346 ef100_tx_push_buffers(tx_queue
);
349 int ef100_ev_tx(struct efx_channel
*channel
, const efx_qword_t
*p_event
)
351 unsigned int tx_done
=
352 EFX_QWORD_FIELD(*p_event
, ESF_GZ_EV_TXCMPL_NUM_DESC
);
353 unsigned int qlabel
=
354 EFX_QWORD_FIELD(*p_event
, ESF_GZ_EV_TXCMPL_Q_LABEL
);
355 struct efx_tx_queue
*tx_queue
=
356 efx_channel_get_tx_queue(channel
, qlabel
);
357 unsigned int tx_index
= (tx_queue
->read_count
+ tx_done
- 1) &
360 return efx_xmit_done(tx_queue
, tx_index
);
363 /* Add a socket buffer to a TX queue
365 * You must hold netif_tx_lock() to call this function.
367 * Returns 0 on success, error code otherwise. In case of an error this
368 * function will free the SKB.
370 netdev_tx_t
ef100_enqueue_skb(struct efx_tx_queue
*tx_queue
,
373 return __ef100_enqueue_skb(tx_queue
, skb
, NULL
);
376 int __ef100_enqueue_skb(struct efx_tx_queue
*tx_queue
, struct sk_buff
*skb
,
379 unsigned int old_insert_count
= tx_queue
->insert_count
;
380 struct efx_nic
*efx
= tx_queue
->efx
;
381 bool xmit_more
= netdev_xmit_more();
382 unsigned int fill_level
;
383 unsigned int segments
;
386 if (!tx_queue
->buffer
|| !tx_queue
->ptr_mask
) {
387 netif_stop_queue(efx
->net_dev
);
388 dev_kfree_skb_any(skb
);
392 segments
= skb_is_gso(skb
) ? skb_shinfo(skb
)->gso_segs
: 0;
394 segments
= 0; /* Don't use TSO/GSO for a single segment. */
395 if (segments
&& !ef100_tx_can_tso(tx_queue
, skb
)) {
396 rc
= efx_tx_tso_fallback(tx_queue
, skb
);
397 tx_queue
->tso_fallbacks
++;
405 struct efx_tx_buffer
*buffer
= __efx_tx_queue_get_insert_buffer(tx_queue
);
407 /* Drop representor packets if the queue is stopped.
408 * We currently don't assert backoff to representors so this is
409 * to make sure representor traffic can't starve the main
411 * And, of course, if there are no TX descriptors left.
413 if (netif_tx_queue_stopped(tx_queue
->core_txq
) ||
414 unlikely(efx_tx_buffer_in_use(buffer
))) {
415 atomic64_inc(&efv
->stats
.tx_errors
);
420 /* Also drop representor traffic if it could cause us to
421 * stop the queue. If we assert backoff and we haven't
422 * received traffic on the main net device recently then the
423 * TX watchdog can go off erroneously.
425 fill_level
= efx_channel_tx_old_fill_level(tx_queue
->channel
);
426 fill_level
+= efx_tx_max_skb_descs(efx
);
427 if (fill_level
> efx
->txq_stop_thresh
) {
428 struct efx_tx_queue
*txq2
;
430 /* Refresh cached fill level and re-check */
431 efx_for_each_channel_tx_queue(txq2
, tx_queue
->channel
)
432 txq2
->old_read_count
= READ_ONCE(txq2
->read_count
);
434 fill_level
= efx_channel_tx_old_fill_level(tx_queue
->channel
);
435 fill_level
+= efx_tx_max_skb_descs(efx
);
436 if (fill_level
> efx
->txq_stop_thresh
) {
437 atomic64_inc(&efv
->stats
.tx_errors
);
443 buffer
->flags
= EFX_TX_BUF_OPTION
| EFX_TX_BUF_EFV
;
444 tx_queue
->insert_count
++;
447 /* Map for DMA and create descriptors */
448 rc
= efx_tx_map_data(tx_queue
, skb
, segments
);
451 ef100_tx_make_descriptors(tx_queue
, skb
, segments
, efv
);
453 fill_level
= efx_channel_tx_old_fill_level(tx_queue
->channel
);
454 if (fill_level
> efx
->txq_stop_thresh
) {
455 struct efx_tx_queue
*txq2
;
457 /* Because of checks above, representor traffic should
458 * not be able to stop the queue.
462 netif_tx_stop_queue(tx_queue
->core_txq
);
463 /* Re-read after a memory barrier in case we've raced with
464 * the completion path. Otherwise there's a danger we'll never
465 * restart the queue if all completions have just happened.
468 efx_for_each_channel_tx_queue(txq2
, tx_queue
->channel
)
469 txq2
->old_read_count
= READ_ONCE(txq2
->read_count
);
470 fill_level
= efx_channel_tx_old_fill_level(tx_queue
->channel
);
471 if (fill_level
< efx
->txq_stop_thresh
)
472 netif_tx_start_queue(tx_queue
->core_txq
);
475 tx_queue
->xmit_pending
= true;
477 /* If xmit_more then we don't need to push the doorbell, unless there
478 * are 256 descriptors already queued in which case we have to push to
479 * ensure we never push more than 256 at once.
481 * Always push for representor traffic, and don't account it to parent
482 * PF netdevice's BQL.
485 __netdev_tx_sent_queue(tx_queue
->core_txq
, skb
->len
, xmit_more
) ||
486 tx_queue
->write_count
- tx_queue
->notify_count
> 255)
487 ef100_tx_push_buffers(tx_queue
);
490 tx_queue
->tso_bursts
++;
491 tx_queue
->tso_packets
+= segments
;
492 tx_queue
->tx_packets
+= segments
;
494 tx_queue
->tx_packets
++;
499 efx_enqueue_unwind(tx_queue
, old_insert_count
);
500 if (!IS_ERR_OR_NULL(skb
))
501 dev_kfree_skb_any(skb
);
503 /* If we're not expecting another transmit and we had something to push
504 * on this queue then we need to push here to get the previous packets
505 * out. We only enter this branch from before the xmit_more handling
506 * above, so xmit_pending still refers to the old state.
508 if (tx_queue
->xmit_pending
&& !xmit_more
)
509 ef100_tx_push_buffers(tx_queue
);