1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2015 Cavium, Inc.
7 #include <linux/netdevice.h>
9 #include <linux/etherdevice.h>
10 #include <linux/iommu.h>
17 #include "nicvf_queues.h"
19 static inline void nicvf_sq_add_gather_subdesc(struct snd_queue
*sq
, int qentry
,
21 static void nicvf_get_page(struct nicvf
*nic
)
23 if (!nic
->rb_pageref
|| !nic
->rb_page
)
26 page_ref_add(nic
->rb_page
, nic
->rb_pageref
);
30 /* Poll a register for a specific value */
31 static int nicvf_poll_reg(struct nicvf
*nic
, int qidx
,
32 u64 reg
, int bit_pos
, int bits
, int val
)
38 bit_mask
= (1ULL << bits
) - 1;
39 bit_mask
= (bit_mask
<< bit_pos
);
42 reg_val
= nicvf_queue_reg_read(nic
, reg
, qidx
);
43 if (((reg_val
& bit_mask
) >> bit_pos
) == val
)
45 usleep_range(1000, 2000);
48 netdev_err(nic
->netdev
, "Poll on reg 0x%llx failed\n", reg
);
52 /* Allocate memory for a queue's descriptors */
53 static int nicvf_alloc_q_desc_mem(struct nicvf
*nic
, struct q_desc_mem
*dmem
,
54 int q_len
, int desc_size
, int align_bytes
)
57 dmem
->size
= (desc_size
* q_len
) + align_bytes
;
58 /* Save address, need it while freeing */
59 dmem
->unalign_base
= dma_alloc_coherent(&nic
->pdev
->dev
, dmem
->size
,
60 &dmem
->dma
, GFP_KERNEL
);
61 if (!dmem
->unalign_base
)
64 /* Align memory address for 'align_bytes' */
65 dmem
->phys_base
= NICVF_ALIGNED_ADDR((u64
)dmem
->dma
, align_bytes
);
66 dmem
->base
= dmem
->unalign_base
+ (dmem
->phys_base
- dmem
->dma
);
70 /* Free queue's descriptor memory */
71 static void nicvf_free_q_desc_mem(struct nicvf
*nic
, struct q_desc_mem
*dmem
)
76 dma_free_coherent(&nic
->pdev
->dev
, dmem
->size
,
77 dmem
->unalign_base
, dmem
->dma
);
78 dmem
->unalign_base
= NULL
;
82 #define XDP_PAGE_REFCNT_REFILL 256
84 /* Allocate a new page or recycle one if possible
86 * We cannot optimize dma mapping here, since
87 * 1. It's only one RBDR ring for 8 Rx queues.
88 * 2. CQE_RX gives address of the buffer where pkt has been DMA'ed
89 * and not idx into RBDR ring, so can't refer to saved info.
90 * 3. There are multiple receive buffers per page
92 static inline struct pgcache
*nicvf_alloc_page(struct nicvf
*nic
,
93 struct rbdr
*rbdr
, gfp_t gfp
)
96 struct page
*page
= NULL
;
97 struct pgcache
*pgcache
, *next
;
99 /* Check if page is already allocated */
100 pgcache
= &rbdr
->pgcache
[rbdr
->pgidx
];
101 page
= pgcache
->page
;
102 /* Check if page can be recycled */
104 ref_count
= page_ref_count(page
);
105 /* This page can be recycled if internal ref_count and page's
106 * ref_count are equal, indicating that the page has been used
107 * once for packet transmission. For non-XDP mode, internal
108 * ref_count is always '1'.
111 if (ref_count
== pgcache
->ref_count
)
112 pgcache
->ref_count
--;
115 } else if (ref_count
!= 1) {
121 page
= alloc_pages(gfp
| __GFP_COMP
| __GFP_NOWARN
, 0);
125 this_cpu_inc(nic
->pnicvf
->drv_stats
->page_alloc
);
127 /* Check for space */
128 if (rbdr
->pgalloc
>= rbdr
->pgcnt
) {
129 /* Page can still be used */
134 /* Save the page in page cache */
135 pgcache
->page
= page
;
136 pgcache
->dma_addr
= 0;
137 pgcache
->ref_count
= 0;
141 /* Take additional page references for recycling */
143 /* Since there is single RBDR (i.e single core doing
144 * page recycling) per 8 Rx queues, in XDP mode adjusting
145 * page references atomically is the biggest bottleneck, so
146 * take bunch of references at a time.
148 * So here, below reference counts defer by '1'.
150 if (!pgcache
->ref_count
) {
151 pgcache
->ref_count
= XDP_PAGE_REFCNT_REFILL
;
152 page_ref_add(page
, XDP_PAGE_REFCNT_REFILL
);
155 /* In non-XDP case, single 64K page is divided across multiple
156 * receive buffers, so cost of recycling is less anyway.
157 * So we can do with just one extra reference.
159 page_ref_add(page
, 1);
163 rbdr
->pgidx
&= (rbdr
->pgcnt
- 1);
165 /* Prefetch refcount of next page in page cache */
166 next
= &rbdr
->pgcache
[rbdr
->pgidx
];
169 prefetch(&page
->_refcount
);
174 /* Allocate buffer for packet reception */
175 static inline int nicvf_alloc_rcv_buffer(struct nicvf
*nic
, struct rbdr
*rbdr
,
176 gfp_t gfp
, u32 buf_len
, u64
*rbuf
)
178 struct pgcache
*pgcache
= NULL
;
180 /* Check if request can be accomodated in previous allocated page.
181 * But in XDP mode only one buffer per page is permitted.
183 if (!rbdr
->is_xdp
&& nic
->rb_page
&&
184 ((nic
->rb_page_offset
+ buf_len
) <= PAGE_SIZE
)) {
192 /* Get new page, either recycled or new one */
193 pgcache
= nicvf_alloc_page(nic
, rbdr
, gfp
);
194 if (!pgcache
&& !nic
->rb_page
) {
195 this_cpu_inc(nic
->pnicvf
->drv_stats
->rcv_buffer_alloc_failures
);
199 nic
->rb_page_offset
= 0;
201 /* Reserve space for header modifications by BPF program */
203 buf_len
+= XDP_PACKET_HEADROOM
;
205 /* Check if it's recycled */
207 nic
->rb_page
= pgcache
->page
;
209 if (rbdr
->is_xdp
&& pgcache
&& pgcache
->dma_addr
) {
210 *rbuf
= pgcache
->dma_addr
;
212 /* HW will ensure data coherency, CPU sync not required */
213 *rbuf
= (u64
)dma_map_page_attrs(&nic
->pdev
->dev
, nic
->rb_page
,
214 nic
->rb_page_offset
, buf_len
,
216 DMA_ATTR_SKIP_CPU_SYNC
);
217 if (dma_mapping_error(&nic
->pdev
->dev
, (dma_addr_t
)*rbuf
)) {
218 if (!nic
->rb_page_offset
)
219 __free_pages(nic
->rb_page
, 0);
224 pgcache
->dma_addr
= *rbuf
+ XDP_PACKET_HEADROOM
;
225 nic
->rb_page_offset
+= buf_len
;
231 /* Build skb around receive buffer */
232 static struct sk_buff
*nicvf_rb_ptr_to_skb(struct nicvf
*nic
,
238 data
= phys_to_virt(rb_ptr
);
240 /* Now build an skb to give to stack */
241 skb
= build_skb(data
, RCV_FRAG_LEN
);
243 put_page(virt_to_page(data
));
251 /* Allocate RBDR ring and populate receive buffers */
252 static int nicvf_init_rbdr(struct nicvf
*nic
, struct rbdr
*rbdr
,
253 int ring_len
, int buf_size
)
257 struct rbdr_entry_t
*desc
;
260 err
= nicvf_alloc_q_desc_mem(nic
, &rbdr
->dmem
, ring_len
,
261 sizeof(struct rbdr_entry_t
),
262 NICVF_RCV_BUF_ALIGN_BYTES
);
266 rbdr
->desc
= rbdr
->dmem
.base
;
267 /* Buffer size has to be in multiples of 128 bytes */
268 rbdr
->dma_size
= buf_size
;
270 rbdr
->thresh
= RBDR_THRESH
;
274 /* Initialize page recycling stuff.
276 * Can't use single buffer per page especially with 64K pages.
277 * On embedded platforms i.e 81xx/83xx available memory itself
278 * is low and minimum ring size of RBDR is 8K, that takes away
281 * But for XDP it has to be a single buffer per page.
283 if (!nic
->pnicvf
->xdp_prog
) {
284 rbdr
->pgcnt
= ring_len
/ (PAGE_SIZE
/ buf_size
);
285 rbdr
->is_xdp
= false;
287 rbdr
->pgcnt
= ring_len
;
290 rbdr
->pgcnt
= roundup_pow_of_two(rbdr
->pgcnt
);
291 rbdr
->pgcache
= kcalloc(rbdr
->pgcnt
, sizeof(*rbdr
->pgcache
),
299 for (idx
= 0; idx
< ring_len
; idx
++) {
300 err
= nicvf_alloc_rcv_buffer(nic
, rbdr
, GFP_KERNEL
,
301 RCV_FRAG_LEN
, &rbuf
);
303 /* To free already allocated and mapped ones */
304 rbdr
->tail
= idx
- 1;
308 desc
= GET_RBDR_DESC(rbdr
, idx
);
309 desc
->buf_addr
= rbuf
& ~(NICVF_RCV_BUF_ALIGN_BYTES
- 1);
317 /* Free RBDR ring and its receive buffers */
318 static void nicvf_free_rbdr(struct nicvf
*nic
, struct rbdr
*rbdr
)
321 u64 buf_addr
, phys_addr
;
322 struct pgcache
*pgcache
;
323 struct rbdr_entry_t
*desc
;
328 rbdr
->enable
= false;
329 if (!rbdr
->dmem
.base
)
335 /* Release page references */
336 while (head
!= tail
) {
337 desc
= GET_RBDR_DESC(rbdr
, head
);
338 buf_addr
= desc
->buf_addr
;
339 phys_addr
= nicvf_iova_to_phys(nic
, buf_addr
);
340 dma_unmap_page_attrs(&nic
->pdev
->dev
, buf_addr
, RCV_FRAG_LEN
,
341 DMA_FROM_DEVICE
, DMA_ATTR_SKIP_CPU_SYNC
);
343 put_page(virt_to_page(phys_to_virt(phys_addr
)));
345 head
&= (rbdr
->dmem
.q_len
- 1);
347 /* Release buffer of tail desc */
348 desc
= GET_RBDR_DESC(rbdr
, tail
);
349 buf_addr
= desc
->buf_addr
;
350 phys_addr
= nicvf_iova_to_phys(nic
, buf_addr
);
351 dma_unmap_page_attrs(&nic
->pdev
->dev
, buf_addr
, RCV_FRAG_LEN
,
352 DMA_FROM_DEVICE
, DMA_ATTR_SKIP_CPU_SYNC
);
354 put_page(virt_to_page(phys_to_virt(phys_addr
)));
356 /* Sync page cache info */
359 /* Release additional page references held for recycling */
361 while (head
< rbdr
->pgcnt
) {
362 pgcache
= &rbdr
->pgcache
[head
];
363 if (pgcache
->page
&& page_ref_count(pgcache
->page
) != 0) {
365 page_ref_sub(pgcache
->page
,
366 pgcache
->ref_count
- 1);
368 put_page(pgcache
->page
);
374 nicvf_free_q_desc_mem(nic
, &rbdr
->dmem
);
377 /* Refill receive buffer descriptors with new buffers.
379 static void nicvf_refill_rbdr(struct nicvf
*nic
, gfp_t gfp
)
381 struct queue_set
*qs
= nic
->qs
;
382 int rbdr_idx
= qs
->rbdr_cnt
;
386 struct rbdr_entry_t
*desc
;
394 rbdr
= &qs
->rbdr
[rbdr_idx
];
395 /* Check if it's enabled */
399 /* Get no of desc's to be refilled */
400 qcount
= nicvf_queue_reg_read(nic
, NIC_QSET_RBDR_0_1_STATUS0
, rbdr_idx
);
402 /* Doorbell can be ringed with a max of ring size minus 1 */
403 if (qcount
>= (qs
->rbdr_len
- 1))
406 refill_rb_cnt
= qs
->rbdr_len
- qcount
- 1;
408 /* Sync page cache info */
411 /* Start filling descs from tail */
412 tail
= nicvf_queue_reg_read(nic
, NIC_QSET_RBDR_0_1_TAIL
, rbdr_idx
) >> 3;
413 while (refill_rb_cnt
) {
415 tail
&= (rbdr
->dmem
.q_len
- 1);
417 if (nicvf_alloc_rcv_buffer(nic
, rbdr
, gfp
, RCV_FRAG_LEN
, &rbuf
))
420 desc
= GET_RBDR_DESC(rbdr
, tail
);
421 desc
->buf_addr
= rbuf
& ~(NICVF_RCV_BUF_ALIGN_BYTES
- 1);
428 /* make sure all memory stores are done before ringing doorbell */
431 /* Check if buffer allocation failed */
433 nic
->rb_alloc_fail
= true;
435 nic
->rb_alloc_fail
= false;
438 nicvf_queue_reg_write(nic
, NIC_QSET_RBDR_0_1_DOOR
,
441 /* Re-enable RBDR interrupts only if buffer allocation is success */
442 if (!nic
->rb_alloc_fail
&& rbdr
->enable
&&
443 netif_running(nic
->pnicvf
->netdev
))
444 nicvf_enable_intr(nic
, NICVF_INTR_RBDR
, rbdr_idx
);
450 /* Alloc rcv buffers in non-atomic mode for better success */
451 void nicvf_rbdr_work(struct work_struct
*work
)
453 struct nicvf
*nic
= container_of(work
, struct nicvf
, rbdr_work
.work
);
455 nicvf_refill_rbdr(nic
, GFP_KERNEL
);
456 if (nic
->rb_alloc_fail
)
457 schedule_delayed_work(&nic
->rbdr_work
, msecs_to_jiffies(10));
459 nic
->rb_work_scheduled
= false;
462 /* In Softirq context, alloc rcv buffers in atomic mode */
463 void nicvf_rbdr_task(unsigned long data
)
465 struct nicvf
*nic
= (struct nicvf
*)data
;
467 nicvf_refill_rbdr(nic
, GFP_ATOMIC
);
468 if (nic
->rb_alloc_fail
) {
469 nic
->rb_work_scheduled
= true;
470 schedule_delayed_work(&nic
->rbdr_work
, msecs_to_jiffies(10));
474 /* Initialize completion queue */
475 static int nicvf_init_cmp_queue(struct nicvf
*nic
,
476 struct cmp_queue
*cq
, int q_len
)
480 err
= nicvf_alloc_q_desc_mem(nic
, &cq
->dmem
, q_len
, CMP_QUEUE_DESC_SIZE
,
481 NICVF_CQ_BASE_ALIGN_BYTES
);
485 cq
->desc
= cq
->dmem
.base
;
486 cq
->thresh
= pass1_silicon(nic
->pdev
) ? 0 : CMP_QUEUE_CQE_THRESH
;
487 nic
->cq_coalesce_usecs
= (CMP_QUEUE_TIMER_THRESH
* 0.05) - 1;
492 static void nicvf_free_cmp_queue(struct nicvf
*nic
, struct cmp_queue
*cq
)
499 nicvf_free_q_desc_mem(nic
, &cq
->dmem
);
502 /* Initialize transmit queue */
503 static int nicvf_init_snd_queue(struct nicvf
*nic
,
504 struct snd_queue
*sq
, int q_len
, int qidx
)
508 err
= nicvf_alloc_q_desc_mem(nic
, &sq
->dmem
, q_len
, SND_QUEUE_DESC_SIZE
,
509 NICVF_SQ_BASE_ALIGN_BYTES
);
513 sq
->desc
= sq
->dmem
.base
;
514 sq
->skbuff
= kcalloc(q_len
, sizeof(u64
), GFP_KERNEL
);
520 sq
->thresh
= SND_QUEUE_THRESH
;
522 /* Check if this SQ is a XDP TX queue */
524 qidx
+= ((nic
->sqs_id
+ 1) * MAX_SND_QUEUES_PER_QS
);
525 if (qidx
< nic
->pnicvf
->xdp_tx_queues
) {
526 /* Alloc memory to save page pointers for XDP_TX */
527 sq
->xdp_page
= kcalloc(q_len
, sizeof(u64
), GFP_KERNEL
);
530 sq
->xdp_desc_cnt
= 0;
531 sq
->xdp_free_cnt
= q_len
- 1;
535 sq
->xdp_desc_cnt
= 0;
536 sq
->xdp_free_cnt
= 0;
539 atomic_set(&sq
->free_cnt
, q_len
- 1);
541 /* Preallocate memory for TSO segment's header */
542 sq
->tso_hdrs
= dma_alloc_coherent(&nic
->pdev
->dev
,
543 q_len
* TSO_HEADER_SIZE
,
553 void nicvf_unmap_sndq_buffers(struct nicvf
*nic
, struct snd_queue
*sq
,
554 int hdr_sqe
, u8 subdesc_cnt
)
557 struct sq_gather_subdesc
*gather
;
559 /* Unmap DMA mapped skb data buffers */
560 for (idx
= 0; idx
< subdesc_cnt
; idx
++) {
562 hdr_sqe
&= (sq
->dmem
.q_len
- 1);
563 gather
= (struct sq_gather_subdesc
*)GET_SQ_DESC(sq
, hdr_sqe
);
564 /* HW will ensure data coherency, CPU sync not required */
565 dma_unmap_page_attrs(&nic
->pdev
->dev
, gather
->addr
,
566 gather
->size
, DMA_TO_DEVICE
,
567 DMA_ATTR_SKIP_CPU_SYNC
);
571 static void nicvf_free_snd_queue(struct nicvf
*nic
, struct snd_queue
*sq
)
575 struct sq_hdr_subdesc
*hdr
;
576 struct sq_hdr_subdesc
*tso_sqe
;
584 dma_free_coherent(&nic
->pdev
->dev
,
585 sq
->dmem
.q_len
* TSO_HEADER_SIZE
,
586 sq
->tso_hdrs
, sq
->tso_hdrs_phys
);
590 /* Free pending skbs in the queue */
592 while (sq
->head
!= sq
->tail
) {
593 skb
= (struct sk_buff
*)sq
->skbuff
[sq
->head
];
594 if (!skb
|| !sq
->xdp_page
)
597 page
= (struct page
*)sq
->xdp_page
[sq
->head
];
603 hdr
= (struct sq_hdr_subdesc
*)GET_SQ_DESC(sq
, sq
->head
);
604 /* Check for dummy descriptor used for HW TSO offload on 88xx */
605 if (hdr
->dont_send
) {
606 /* Get actual TSO descriptors and unmap them */
608 (struct sq_hdr_subdesc
*)GET_SQ_DESC(sq
, hdr
->rsvd2
);
609 nicvf_unmap_sndq_buffers(nic
, sq
, hdr
->rsvd2
,
610 tso_sqe
->subdesc_cnt
);
612 nicvf_unmap_sndq_buffers(nic
, sq
, sq
->head
,
616 dev_kfree_skb_any(skb
);
619 sq
->head
&= (sq
->dmem
.q_len
- 1);
623 nicvf_free_q_desc_mem(nic
, &sq
->dmem
);
626 static void nicvf_reclaim_snd_queue(struct nicvf
*nic
,
627 struct queue_set
*qs
, int qidx
)
629 /* Disable send queue */
630 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_CFG
, qidx
, 0);
631 /* Check if SQ is stopped */
632 if (nicvf_poll_reg(nic
, qidx
, NIC_QSET_SQ_0_7_STATUS
, 21, 1, 0x01))
634 /* Reset send queue */
635 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_CFG
, qidx
, NICVF_SQ_RESET
);
638 static void nicvf_reclaim_rcv_queue(struct nicvf
*nic
,
639 struct queue_set
*qs
, int qidx
)
641 union nic_mbx mbx
= {};
643 /* Make sure all packets in the pipeline are written back into mem */
644 mbx
.msg
.msg
= NIC_MBOX_MSG_RQ_SW_SYNC
;
645 nicvf_send_msg_to_pf(nic
, &mbx
);
648 static void nicvf_reclaim_cmp_queue(struct nicvf
*nic
,
649 struct queue_set
*qs
, int qidx
)
651 /* Disable timer threshold (doesn't get reset upon CQ reset */
652 nicvf_queue_reg_write(nic
, NIC_QSET_CQ_0_7_CFG2
, qidx
, 0);
653 /* Disable completion queue */
654 nicvf_queue_reg_write(nic
, NIC_QSET_CQ_0_7_CFG
, qidx
, 0);
655 /* Reset completion queue */
656 nicvf_queue_reg_write(nic
, NIC_QSET_CQ_0_7_CFG
, qidx
, NICVF_CQ_RESET
);
659 static void nicvf_reclaim_rbdr(struct nicvf
*nic
,
660 struct rbdr
*rbdr
, int qidx
)
665 /* Save head and tail pointers for feeing up buffers */
666 rbdr
->head
= nicvf_queue_reg_read(nic
,
667 NIC_QSET_RBDR_0_1_HEAD
,
669 rbdr
->tail
= nicvf_queue_reg_read(nic
,
670 NIC_QSET_RBDR_0_1_TAIL
,
673 /* If RBDR FIFO is in 'FAIL' state then do a reset first
676 fifo_state
= nicvf_queue_reg_read(nic
, NIC_QSET_RBDR_0_1_STATUS0
, qidx
);
677 if (((fifo_state
>> 62) & 0x03) == 0x3)
678 nicvf_queue_reg_write(nic
, NIC_QSET_RBDR_0_1_CFG
,
679 qidx
, NICVF_RBDR_RESET
);
682 nicvf_queue_reg_write(nic
, NIC_QSET_RBDR_0_1_CFG
, qidx
, 0);
683 if (nicvf_poll_reg(nic
, qidx
, NIC_QSET_RBDR_0_1_STATUS0
, 62, 2, 0x00))
686 tmp
= nicvf_queue_reg_read(nic
,
687 NIC_QSET_RBDR_0_1_PREFETCH_STATUS
,
689 if ((tmp
& 0xFFFFFFFF) == ((tmp
>> 32) & 0xFFFFFFFF))
691 usleep_range(1000, 2000);
694 netdev_err(nic
->netdev
,
695 "Failed polling on prefetch status\n");
699 nicvf_queue_reg_write(nic
, NIC_QSET_RBDR_0_1_CFG
,
700 qidx
, NICVF_RBDR_RESET
);
702 if (nicvf_poll_reg(nic
, qidx
, NIC_QSET_RBDR_0_1_STATUS0
, 62, 2, 0x02))
704 nicvf_queue_reg_write(nic
, NIC_QSET_RBDR_0_1_CFG
, qidx
, 0x00);
705 if (nicvf_poll_reg(nic
, qidx
, NIC_QSET_RBDR_0_1_STATUS0
, 62, 2, 0x00))
709 void nicvf_config_vlan_stripping(struct nicvf
*nic
, netdev_features_t features
)
714 rq_cfg
= nicvf_queue_reg_read(nic
, NIC_QSET_RQ_GEN_CFG
, 0);
716 /* Enable first VLAN stripping */
717 if (features
& NETIF_F_HW_VLAN_CTAG_RX
)
718 rq_cfg
|= (1ULL << 25);
720 rq_cfg
&= ~(1ULL << 25);
721 nicvf_queue_reg_write(nic
, NIC_QSET_RQ_GEN_CFG
, 0, rq_cfg
);
723 /* Configure Secondary Qsets, if any */
724 for (sqs
= 0; sqs
< nic
->sqs_count
; sqs
++)
725 if (nic
->snicvf
[sqs
])
726 nicvf_queue_reg_write(nic
->snicvf
[sqs
],
727 NIC_QSET_RQ_GEN_CFG
, 0, rq_cfg
);
730 static void nicvf_reset_rcv_queue_stats(struct nicvf
*nic
)
732 union nic_mbx mbx
= {};
734 /* Reset all RQ/SQ and VF stats */
735 mbx
.reset_stat
.msg
= NIC_MBOX_MSG_RESET_STAT_COUNTER
;
736 mbx
.reset_stat
.rx_stat_mask
= 0x3FFF;
737 mbx
.reset_stat
.tx_stat_mask
= 0x1F;
738 mbx
.reset_stat
.rq_stat_mask
= 0xFFFF;
739 mbx
.reset_stat
.sq_stat_mask
= 0xFFFF;
740 nicvf_send_msg_to_pf(nic
, &mbx
);
743 /* Configures receive queue */
744 static void nicvf_rcv_queue_config(struct nicvf
*nic
, struct queue_set
*qs
,
745 int qidx
, bool enable
)
747 union nic_mbx mbx
= {};
748 struct rcv_queue
*rq
;
749 struct rq_cfg rq_cfg
;
754 /* Disable receive queue */
755 nicvf_queue_reg_write(nic
, NIC_QSET_RQ_0_7_CFG
, qidx
, 0);
758 nicvf_reclaim_rcv_queue(nic
, qs
, qidx
);
759 xdp_rxq_info_unreg(&rq
->xdp_rxq
);
763 rq
->cq_qs
= qs
->vnic_id
;
765 rq
->start_rbdr_qs
= qs
->vnic_id
;
766 rq
->start_qs_rbdr_idx
= qs
->rbdr_cnt
- 1;
767 rq
->cont_rbdr_qs
= qs
->vnic_id
;
768 rq
->cont_qs_rbdr_idx
= qs
->rbdr_cnt
- 1;
769 /* all writes of RBDR data to be loaded into L2 Cache as well*/
772 /* Driver have no proper error path for failed XDP RX-queue info reg */
773 WARN_ON(xdp_rxq_info_reg(&rq
->xdp_rxq
, nic
->netdev
, qidx
) < 0);
775 /* Send a mailbox msg to PF to config RQ */
776 mbx
.rq
.msg
= NIC_MBOX_MSG_RQ_CFG
;
777 mbx
.rq
.qs_num
= qs
->vnic_id
;
778 mbx
.rq
.rq_num
= qidx
;
779 mbx
.rq
.cfg
= (rq
->caching
<< 26) | (rq
->cq_qs
<< 19) |
780 (rq
->cq_idx
<< 16) | (rq
->cont_rbdr_qs
<< 9) |
781 (rq
->cont_qs_rbdr_idx
<< 8) |
782 (rq
->start_rbdr_qs
<< 1) | (rq
->start_qs_rbdr_idx
);
783 nicvf_send_msg_to_pf(nic
, &mbx
);
785 mbx
.rq
.msg
= NIC_MBOX_MSG_RQ_BP_CFG
;
786 mbx
.rq
.cfg
= BIT_ULL(63) | BIT_ULL(62) |
787 (RQ_PASS_RBDR_LVL
<< 16) | (RQ_PASS_CQ_LVL
<< 8) |
789 nicvf_send_msg_to_pf(nic
, &mbx
);
792 * Enable CQ drop to reserve sufficient CQEs for all tx packets
794 mbx
.rq
.msg
= NIC_MBOX_MSG_RQ_DROP_CFG
;
795 mbx
.rq
.cfg
= BIT_ULL(63) | BIT_ULL(62) |
796 (RQ_PASS_RBDR_LVL
<< 40) | (RQ_DROP_RBDR_LVL
<< 32) |
797 (RQ_PASS_CQ_LVL
<< 16) | (RQ_DROP_CQ_LVL
<< 8);
798 nicvf_send_msg_to_pf(nic
, &mbx
);
800 if (!nic
->sqs_mode
&& (qidx
== 0)) {
801 /* Enable checking L3/L4 length and TCP/UDP checksums
802 * Also allow IPv6 pkts with zero UDP checksum.
804 nicvf_queue_reg_write(nic
, NIC_QSET_RQ_GEN_CFG
, 0,
805 (BIT(24) | BIT(23) | BIT(21) | BIT(20)));
806 nicvf_config_vlan_stripping(nic
, nic
->netdev
->features
);
809 /* Enable Receive queue */
810 memset(&rq_cfg
, 0, sizeof(struct rq_cfg
));
813 nicvf_queue_reg_write(nic
, NIC_QSET_RQ_0_7_CFG
, qidx
, *(u64
*)&rq_cfg
);
816 /* Configures completion queue */
817 void nicvf_cmp_queue_config(struct nicvf
*nic
, struct queue_set
*qs
,
818 int qidx
, bool enable
)
820 struct cmp_queue
*cq
;
821 struct cq_cfg cq_cfg
;
827 nicvf_reclaim_cmp_queue(nic
, qs
, qidx
);
831 /* Reset completion queue */
832 nicvf_queue_reg_write(nic
, NIC_QSET_CQ_0_7_CFG
, qidx
, NICVF_CQ_RESET
);
837 spin_lock_init(&cq
->lock
);
838 /* Set completion queue base address */
839 nicvf_queue_reg_write(nic
, NIC_QSET_CQ_0_7_BASE
,
840 qidx
, (u64
)(cq
->dmem
.phys_base
));
842 /* Enable Completion queue */
843 memset(&cq_cfg
, 0, sizeof(struct cq_cfg
));
847 cq_cfg
.qsize
= ilog2(qs
->cq_len
>> 10);
849 nicvf_queue_reg_write(nic
, NIC_QSET_CQ_0_7_CFG
, qidx
, *(u64
*)&cq_cfg
);
851 /* Set threshold value for interrupt generation */
852 nicvf_queue_reg_write(nic
, NIC_QSET_CQ_0_7_THRESH
, qidx
, cq
->thresh
);
853 nicvf_queue_reg_write(nic
, NIC_QSET_CQ_0_7_CFG2
,
854 qidx
, CMP_QUEUE_TIMER_THRESH
);
857 /* Configures transmit queue */
858 static void nicvf_snd_queue_config(struct nicvf
*nic
, struct queue_set
*qs
,
859 int qidx
, bool enable
)
861 union nic_mbx mbx
= {};
862 struct snd_queue
*sq
;
863 struct sq_cfg sq_cfg
;
869 nicvf_reclaim_snd_queue(nic
, qs
, qidx
);
873 /* Reset send queue */
874 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_CFG
, qidx
, NICVF_SQ_RESET
);
876 sq
->cq_qs
= qs
->vnic_id
;
879 /* Send a mailbox msg to PF to config SQ */
880 mbx
.sq
.msg
= NIC_MBOX_MSG_SQ_CFG
;
881 mbx
.sq
.qs_num
= qs
->vnic_id
;
882 mbx
.sq
.sq_num
= qidx
;
883 mbx
.sq
.sqs_mode
= nic
->sqs_mode
;
884 mbx
.sq
.cfg
= (sq
->cq_qs
<< 3) | sq
->cq_idx
;
885 nicvf_send_msg_to_pf(nic
, &mbx
);
887 /* Set queue base address */
888 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_BASE
,
889 qidx
, (u64
)(sq
->dmem
.phys_base
));
891 /* Enable send queue & set queue size */
892 memset(&sq_cfg
, 0, sizeof(struct sq_cfg
));
896 sq_cfg
.qsize
= ilog2(qs
->sq_len
>> 10);
897 sq_cfg
.tstmp_bgx_intf
= 0;
898 /* CQ's level at which HW will stop processing SQEs to avoid
899 * transmitting a pkt with no space in CQ to post CQE_TX.
901 sq_cfg
.cq_limit
= (CMP_QUEUE_PIPELINE_RSVD
* 256) / qs
->cq_len
;
902 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_CFG
, qidx
, *(u64
*)&sq_cfg
);
904 /* Set threshold value for interrupt generation */
905 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_THRESH
, qidx
, sq
->thresh
);
907 /* Set queue:cpu affinity for better load distribution */
908 if (cpu_online(qidx
)) {
909 cpumask_set_cpu(qidx
, &sq
->affinity_mask
);
910 netif_set_xps_queue(nic
->netdev
,
911 &sq
->affinity_mask
, qidx
);
915 /* Configures receive buffer descriptor ring */
916 static void nicvf_rbdr_config(struct nicvf
*nic
, struct queue_set
*qs
,
917 int qidx
, bool enable
)
920 struct rbdr_cfg rbdr_cfg
;
922 rbdr
= &qs
->rbdr
[qidx
];
923 nicvf_reclaim_rbdr(nic
, rbdr
, qidx
);
927 /* Set descriptor base address */
928 nicvf_queue_reg_write(nic
, NIC_QSET_RBDR_0_1_BASE
,
929 qidx
, (u64
)(rbdr
->dmem
.phys_base
));
931 /* Enable RBDR & set queue size */
932 /* Buffer size should be in multiples of 128 bytes */
933 memset(&rbdr_cfg
, 0, sizeof(struct rbdr_cfg
));
937 rbdr_cfg
.qsize
= RBDR_SIZE
;
938 rbdr_cfg
.avg_con
= 0;
939 rbdr_cfg
.lines
= rbdr
->dma_size
/ 128;
940 nicvf_queue_reg_write(nic
, NIC_QSET_RBDR_0_1_CFG
,
941 qidx
, *(u64
*)&rbdr_cfg
);
944 nicvf_queue_reg_write(nic
, NIC_QSET_RBDR_0_1_DOOR
,
945 qidx
, qs
->rbdr_len
- 1);
947 /* Set threshold value for interrupt generation */
948 nicvf_queue_reg_write(nic
, NIC_QSET_RBDR_0_1_THRESH
,
949 qidx
, rbdr
->thresh
- 1);
952 /* Requests PF to assign and enable Qset */
953 void nicvf_qset_config(struct nicvf
*nic
, bool enable
)
955 union nic_mbx mbx
= {};
956 struct queue_set
*qs
= nic
->qs
;
957 struct qs_cfg
*qs_cfg
;
960 netdev_warn(nic
->netdev
,
961 "Qset is still not allocated, don't init queues\n");
966 qs
->vnic_id
= nic
->vf_id
;
968 /* Send a mailbox msg to PF to config Qset */
969 mbx
.qs
.msg
= NIC_MBOX_MSG_QS_CFG
;
970 mbx
.qs
.num
= qs
->vnic_id
;
971 mbx
.qs
.sqs_count
= nic
->sqs_count
;
974 qs_cfg
= (struct qs_cfg
*)&mbx
.qs
.cfg
;
980 qs_cfg
->vnic
= qs
->vnic_id
;
981 /* Enable Tx timestamping capability */
983 qs_cfg
->send_tstmp_ena
= 1;
985 nicvf_send_msg_to_pf(nic
, &mbx
);
988 static void nicvf_free_resources(struct nicvf
*nic
)
991 struct queue_set
*qs
= nic
->qs
;
993 /* Free receive buffer descriptor ring */
994 for (qidx
= 0; qidx
< qs
->rbdr_cnt
; qidx
++)
995 nicvf_free_rbdr(nic
, &qs
->rbdr
[qidx
]);
997 /* Free completion queue */
998 for (qidx
= 0; qidx
< qs
->cq_cnt
; qidx
++)
999 nicvf_free_cmp_queue(nic
, &qs
->cq
[qidx
]);
1001 /* Free send queue */
1002 for (qidx
= 0; qidx
< qs
->sq_cnt
; qidx
++)
1003 nicvf_free_snd_queue(nic
, &qs
->sq
[qidx
]);
1006 static int nicvf_alloc_resources(struct nicvf
*nic
)
1009 struct queue_set
*qs
= nic
->qs
;
1011 /* Alloc receive buffer descriptor ring */
1012 for (qidx
= 0; qidx
< qs
->rbdr_cnt
; qidx
++) {
1013 if (nicvf_init_rbdr(nic
, &qs
->rbdr
[qidx
], qs
->rbdr_len
,
1018 /* Alloc send queue */
1019 for (qidx
= 0; qidx
< qs
->sq_cnt
; qidx
++) {
1020 if (nicvf_init_snd_queue(nic
, &qs
->sq
[qidx
], qs
->sq_len
, qidx
))
1024 /* Alloc completion queue */
1025 for (qidx
= 0; qidx
< qs
->cq_cnt
; qidx
++) {
1026 if (nicvf_init_cmp_queue(nic
, &qs
->cq
[qidx
], qs
->cq_len
))
1032 nicvf_free_resources(nic
);
1036 int nicvf_set_qset_resources(struct nicvf
*nic
)
1038 struct queue_set
*qs
;
1040 qs
= devm_kzalloc(&nic
->pdev
->dev
, sizeof(*qs
), GFP_KERNEL
);
1045 /* Set count of each queue */
1046 qs
->rbdr_cnt
= DEFAULT_RBDR_CNT
;
1047 qs
->rq_cnt
= min_t(u8
, MAX_RCV_QUEUES_PER_QS
, num_online_cpus());
1048 qs
->sq_cnt
= min_t(u8
, MAX_SND_QUEUES_PER_QS
, num_online_cpus());
1049 qs
->cq_cnt
= max_t(u8
, qs
->rq_cnt
, qs
->sq_cnt
);
1051 /* Set queue lengths */
1052 qs
->rbdr_len
= RCV_BUF_COUNT
;
1053 qs
->sq_len
= SND_QUEUE_LEN
;
1054 qs
->cq_len
= CMP_QUEUE_LEN
;
1056 nic
->rx_queues
= qs
->rq_cnt
;
1057 nic
->tx_queues
= qs
->sq_cnt
;
1058 nic
->xdp_tx_queues
= 0;
1063 int nicvf_config_data_transfer(struct nicvf
*nic
, bool enable
)
1065 bool disable
= false;
1066 struct queue_set
*qs
= nic
->qs
;
1067 struct queue_set
*pqs
= nic
->pnicvf
->qs
;
1073 /* Take primary VF's queue lengths.
1074 * This is needed to take queue lengths set from ethtool
1075 * into consideration.
1077 if (nic
->sqs_mode
&& pqs
) {
1078 qs
->cq_len
= pqs
->cq_len
;
1079 qs
->sq_len
= pqs
->sq_len
;
1083 if (nicvf_alloc_resources(nic
))
1086 for (qidx
= 0; qidx
< qs
->sq_cnt
; qidx
++)
1087 nicvf_snd_queue_config(nic
, qs
, qidx
, enable
);
1088 for (qidx
= 0; qidx
< qs
->cq_cnt
; qidx
++)
1089 nicvf_cmp_queue_config(nic
, qs
, qidx
, enable
);
1090 for (qidx
= 0; qidx
< qs
->rbdr_cnt
; qidx
++)
1091 nicvf_rbdr_config(nic
, qs
, qidx
, enable
);
1092 for (qidx
= 0; qidx
< qs
->rq_cnt
; qidx
++)
1093 nicvf_rcv_queue_config(nic
, qs
, qidx
, enable
);
1095 for (qidx
= 0; qidx
< qs
->rq_cnt
; qidx
++)
1096 nicvf_rcv_queue_config(nic
, qs
, qidx
, disable
);
1097 for (qidx
= 0; qidx
< qs
->rbdr_cnt
; qidx
++)
1098 nicvf_rbdr_config(nic
, qs
, qidx
, disable
);
1099 for (qidx
= 0; qidx
< qs
->sq_cnt
; qidx
++)
1100 nicvf_snd_queue_config(nic
, qs
, qidx
, disable
);
1101 for (qidx
= 0; qidx
< qs
->cq_cnt
; qidx
++)
1102 nicvf_cmp_queue_config(nic
, qs
, qidx
, disable
);
1104 nicvf_free_resources(nic
);
1107 /* Reset RXQ's stats.
1108 * SQ's stats will get reset automatically once SQ is reset.
1110 nicvf_reset_rcv_queue_stats(nic
);
1115 /* Get a free desc from SQ
1116 * returns descriptor ponter & descriptor number
1118 static inline int nicvf_get_sq_desc(struct snd_queue
*sq
, int desc_cnt
)
1124 atomic_sub(desc_cnt
, &sq
->free_cnt
);
1126 sq
->xdp_free_cnt
-= desc_cnt
;
1127 sq
->tail
+= desc_cnt
;
1128 sq
->tail
&= (sq
->dmem
.q_len
- 1);
1133 /* Rollback to previous tail pointer when descriptors not used */
1134 static inline void nicvf_rollback_sq_desc(struct snd_queue
*sq
,
1135 int qentry
, int desc_cnt
)
1138 atomic_add(desc_cnt
, &sq
->free_cnt
);
1141 /* Free descriptor back to SQ for future use */
1142 void nicvf_put_sq_desc(struct snd_queue
*sq
, int desc_cnt
)
1145 atomic_add(desc_cnt
, &sq
->free_cnt
);
1147 sq
->xdp_free_cnt
+= desc_cnt
;
1148 sq
->head
+= desc_cnt
;
1149 sq
->head
&= (sq
->dmem
.q_len
- 1);
1152 static inline int nicvf_get_nxt_sqentry(struct snd_queue
*sq
, int qentry
)
1155 qentry
&= (sq
->dmem
.q_len
- 1);
1159 void nicvf_sq_enable(struct nicvf
*nic
, struct snd_queue
*sq
, int qidx
)
1163 sq_cfg
= nicvf_queue_reg_read(nic
, NIC_QSET_SQ_0_7_CFG
, qidx
);
1164 sq_cfg
|= NICVF_SQ_EN
;
1165 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_CFG
, qidx
, sq_cfg
);
1166 /* Ring doorbell so that H/W restarts processing SQEs */
1167 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_DOOR
, qidx
, 0);
1170 void nicvf_sq_disable(struct nicvf
*nic
, int qidx
)
1174 sq_cfg
= nicvf_queue_reg_read(nic
, NIC_QSET_SQ_0_7_CFG
, qidx
);
1175 sq_cfg
&= ~NICVF_SQ_EN
;
1176 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_CFG
, qidx
, sq_cfg
);
1179 void nicvf_sq_free_used_descs(struct net_device
*netdev
, struct snd_queue
*sq
,
1183 struct sk_buff
*skb
;
1184 struct nicvf
*nic
= netdev_priv(netdev
);
1185 struct sq_hdr_subdesc
*hdr
;
1187 head
= nicvf_queue_reg_read(nic
, NIC_QSET_SQ_0_7_HEAD
, qidx
) >> 4;
1188 tail
= nicvf_queue_reg_read(nic
, NIC_QSET_SQ_0_7_TAIL
, qidx
) >> 4;
1189 while (sq
->head
!= head
) {
1190 hdr
= (struct sq_hdr_subdesc
*)GET_SQ_DESC(sq
, sq
->head
);
1191 if (hdr
->subdesc_type
!= SQ_DESC_TYPE_HEADER
) {
1192 nicvf_put_sq_desc(sq
, 1);
1195 skb
= (struct sk_buff
*)sq
->skbuff
[sq
->head
];
1197 dev_kfree_skb_any(skb
);
1198 atomic64_add(1, (atomic64_t
*)&netdev
->stats
.tx_packets
);
1199 atomic64_add(hdr
->tot_len
,
1200 (atomic64_t
*)&netdev
->stats
.tx_bytes
);
1201 nicvf_put_sq_desc(sq
, hdr
->subdesc_cnt
+ 1);
1205 /* XDP Transmit APIs */
1206 void nicvf_xdp_sq_doorbell(struct nicvf
*nic
,
1207 struct snd_queue
*sq
, int sq_num
)
1209 if (!sq
->xdp_desc_cnt
)
1212 /* make sure all memory stores are done before ringing doorbell */
1215 /* Inform HW to xmit all TSO segments */
1216 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_DOOR
,
1217 sq_num
, sq
->xdp_desc_cnt
);
1218 sq
->xdp_desc_cnt
= 0;
1222 nicvf_xdp_sq_add_hdr_subdesc(struct snd_queue
*sq
, int qentry
,
1223 int subdesc_cnt
, u64 data
, int len
)
1225 struct sq_hdr_subdesc
*hdr
;
1227 hdr
= (struct sq_hdr_subdesc
*)GET_SQ_DESC(sq
, qentry
);
1228 memset(hdr
, 0, SND_QUEUE_DESC_SIZE
);
1229 hdr
->subdesc_type
= SQ_DESC_TYPE_HEADER
;
1230 hdr
->subdesc_cnt
= subdesc_cnt
;
1233 sq
->xdp_page
[qentry
] = (u64
)virt_to_page((void *)data
);
1236 int nicvf_xdp_sq_append_pkt(struct nicvf
*nic
, struct snd_queue
*sq
,
1237 u64 bufaddr
, u64 dma_addr
, u16 len
)
1239 int subdesc_cnt
= MIN_SQ_DESC_PER_PKT_XMIT
;
1242 if (subdesc_cnt
> sq
->xdp_free_cnt
)
1245 qentry
= nicvf_get_sq_desc(sq
, subdesc_cnt
);
1247 nicvf_xdp_sq_add_hdr_subdesc(sq
, qentry
, subdesc_cnt
- 1, bufaddr
, len
);
1249 qentry
= nicvf_get_nxt_sqentry(sq
, qentry
);
1250 nicvf_sq_add_gather_subdesc(sq
, qentry
, len
, dma_addr
);
1252 sq
->xdp_desc_cnt
+= subdesc_cnt
;
1257 /* Calculate no of SQ subdescriptors needed to transmit all
1258 * segments of this TSO packet.
1259 * Taken from 'Tilera network driver' with a minor modification.
1261 static int nicvf_tso_count_subdescs(struct sk_buff
*skb
)
1263 struct skb_shared_info
*sh
= skb_shinfo(skb
);
1264 unsigned int sh_len
= skb_transport_offset(skb
) + tcp_hdrlen(skb
);
1265 unsigned int data_len
= skb
->len
- sh_len
;
1266 unsigned int p_len
= sh
->gso_size
;
1267 long f_id
= -1; /* id of the current fragment */
1268 long f_size
= skb_headlen(skb
) - sh_len
; /* current fragment size */
1269 long f_used
= 0; /* bytes used from the current fragment */
1270 long n
; /* size of the current piece of payload */
1274 for (segment
= 0; segment
< sh
->gso_segs
; segment
++) {
1275 unsigned int p_used
= 0;
1277 /* One edesc for header and for each piece of the payload. */
1278 for (num_edescs
++; p_used
< p_len
; num_edescs
++) {
1279 /* Advance as needed. */
1280 while (f_used
>= f_size
) {
1282 f_size
= skb_frag_size(&sh
->frags
[f_id
]);
1286 /* Use bytes from the current fragment. */
1288 if (n
> f_size
- f_used
)
1289 n
= f_size
- f_used
;
1294 /* The last segment may be less than gso_size. */
1296 if (data_len
< p_len
)
1300 /* '+ gso_segs' for SQ_HDR_SUDESCs for each segment */
1301 return num_edescs
+ sh
->gso_segs
;
1304 #define POST_CQE_DESC_COUNT 2
1306 /* Get the number of SQ descriptors needed to xmit this skb */
1307 static int nicvf_sq_subdesc_required(struct nicvf
*nic
, struct sk_buff
*skb
)
1309 int subdesc_cnt
= MIN_SQ_DESC_PER_PKT_XMIT
;
1311 if (skb_shinfo(skb
)->gso_size
&& !nic
->hw_tso
) {
1312 subdesc_cnt
= nicvf_tso_count_subdescs(skb
);
1316 /* Dummy descriptors to get TSO pkt completion notification */
1317 if (nic
->t88
&& nic
->hw_tso
&& skb_shinfo(skb
)->gso_size
)
1318 subdesc_cnt
+= POST_CQE_DESC_COUNT
;
1320 if (skb_shinfo(skb
)->nr_frags
)
1321 subdesc_cnt
+= skb_shinfo(skb
)->nr_frags
;
1326 /* Add SQ HEADER subdescriptor.
1327 * First subdescriptor for every send descriptor.
1330 nicvf_sq_add_hdr_subdesc(struct nicvf
*nic
, struct snd_queue
*sq
, int qentry
,
1331 int subdesc_cnt
, struct sk_buff
*skb
, int len
)
1334 struct sq_hdr_subdesc
*hdr
;
1341 ip
.hdr
= skb_network_header(skb
);
1342 hdr
= (struct sq_hdr_subdesc
*)GET_SQ_DESC(sq
, qentry
);
1343 memset(hdr
, 0, SND_QUEUE_DESC_SIZE
);
1344 hdr
->subdesc_type
= SQ_DESC_TYPE_HEADER
;
1346 if (nic
->t88
&& nic
->hw_tso
&& skb_shinfo(skb
)->gso_size
) {
1347 /* post_cqe = 0, to avoid HW posting a CQE for every TSO
1348 * segment transmitted on 88xx.
1350 hdr
->subdesc_cnt
= subdesc_cnt
- POST_CQE_DESC_COUNT
;
1352 sq
->skbuff
[qentry
] = (u64
)skb
;
1353 /* Enable notification via CQE after processing SQE */
1355 /* No of subdescriptors following this */
1356 hdr
->subdesc_cnt
= subdesc_cnt
;
1360 /* Offload checksum calculation to HW */
1361 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
1362 if (ip
.v4
->version
== 4)
1363 hdr
->csum_l3
= 1; /* Enable IP csum calculation */
1364 hdr
->l3_offset
= skb_network_offset(skb
);
1365 hdr
->l4_offset
= skb_transport_offset(skb
);
1367 proto
= (ip
.v4
->version
== 4) ? ip
.v4
->protocol
:
1372 hdr
->csum_l4
= SEND_L4_CSUM_TCP
;
1375 hdr
->csum_l4
= SEND_L4_CSUM_UDP
;
1378 hdr
->csum_l4
= SEND_L4_CSUM_SCTP
;
1383 if (nic
->hw_tso
&& skb_shinfo(skb
)->gso_size
) {
1385 hdr
->tso_start
= skb_transport_offset(skb
) + tcp_hdrlen(skb
);
1386 hdr
->tso_max_paysize
= skb_shinfo(skb
)->gso_size
;
1387 /* For non-tunneled pkts, point this to L2 ethertype */
1388 hdr
->inner_l3_offset
= skb_network_offset(skb
) - 2;
1389 this_cpu_inc(nic
->pnicvf
->drv_stats
->tx_tso
);
1392 /* Check if timestamp is requested */
1393 if (!(skb_shinfo(skb
)->tx_flags
& SKBTX_HW_TSTAMP
)) {
1394 skb_tx_timestamp(skb
);
1398 /* Tx timestamping not supported along with TSO, so ignore request */
1399 if (skb_shinfo(skb
)->gso_size
)
1402 /* HW supports only a single outstanding packet to timestamp */
1403 if (!atomic_add_unless(&nic
->pnicvf
->tx_ptp_skbs
, 1, 1))
1406 /* Mark the SKB for later reference */
1407 skb_shinfo(skb
)->tx_flags
|= SKBTX_IN_PROGRESS
;
1409 /* Finally enable timestamp generation
1410 * Since 'post_cqe' is also set, two CQEs will be posted
1411 * for this packet i.e CQE_TYPE_SEND and CQE_TYPE_SEND_PTP.
1416 /* SQ GATHER subdescriptor
1417 * Must follow HDR descriptor
1419 static inline void nicvf_sq_add_gather_subdesc(struct snd_queue
*sq
, int qentry
,
1422 struct sq_gather_subdesc
*gather
;
1424 qentry
&= (sq
->dmem
.q_len
- 1);
1425 gather
= (struct sq_gather_subdesc
*)GET_SQ_DESC(sq
, qentry
);
1427 memset(gather
, 0, SND_QUEUE_DESC_SIZE
);
1428 gather
->subdesc_type
= SQ_DESC_TYPE_GATHER
;
1429 gather
->ld_type
= NIC_SEND_LD_TYPE_E_LDD
;
1430 gather
->size
= size
;
1431 gather
->addr
= data
;
1434 /* Add HDR + IMMEDIATE subdescriptors right after descriptors of a TSO
1435 * packet so that a CQE is posted as a notifation for transmission of
1438 static inline void nicvf_sq_add_cqe_subdesc(struct snd_queue
*sq
, int qentry
,
1439 int tso_sqe
, struct sk_buff
*skb
)
1441 struct sq_imm_subdesc
*imm
;
1442 struct sq_hdr_subdesc
*hdr
;
1444 sq
->skbuff
[qentry
] = (u64
)skb
;
1446 hdr
= (struct sq_hdr_subdesc
*)GET_SQ_DESC(sq
, qentry
);
1447 memset(hdr
, 0, SND_QUEUE_DESC_SIZE
);
1448 hdr
->subdesc_type
= SQ_DESC_TYPE_HEADER
;
1449 /* Enable notification via CQE after processing SQE */
1451 /* There is no packet to transmit here */
1453 hdr
->subdesc_cnt
= POST_CQE_DESC_COUNT
- 1;
1455 /* Actual TSO header SQE index, needed for cleanup */
1456 hdr
->rsvd2
= tso_sqe
;
1458 qentry
= nicvf_get_nxt_sqentry(sq
, qentry
);
1459 imm
= (struct sq_imm_subdesc
*)GET_SQ_DESC(sq
, qentry
);
1460 memset(imm
, 0, SND_QUEUE_DESC_SIZE
);
1461 imm
->subdesc_type
= SQ_DESC_TYPE_IMMEDIATE
;
1465 static inline void nicvf_sq_doorbell(struct nicvf
*nic
, struct sk_buff
*skb
,
1466 int sq_num
, int desc_cnt
)
1468 struct netdev_queue
*txq
;
1470 txq
= netdev_get_tx_queue(nic
->pnicvf
->netdev
,
1471 skb_get_queue_mapping(skb
));
1473 netdev_tx_sent_queue(txq
, skb
->len
);
1475 /* make sure all memory stores are done before ringing doorbell */
1478 /* Inform HW to xmit all TSO segments */
1479 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_DOOR
,
1483 /* Segment a TSO packet into 'gso_size' segments and append
1484 * them to SQ for transfer
1486 static int nicvf_sq_append_tso(struct nicvf
*nic
, struct snd_queue
*sq
,
1487 int sq_num
, int qentry
, struct sk_buff
*skb
)
1490 int seg_subdescs
= 0, desc_cnt
= 0;
1491 int seg_len
, total_len
, data_left
;
1492 int hdr_qentry
= qentry
;
1493 int hdr_len
= skb_transport_offset(skb
) + tcp_hdrlen(skb
);
1495 tso_start(skb
, &tso
);
1496 total_len
= skb
->len
- hdr_len
;
1497 while (total_len
> 0) {
1500 /* Save Qentry for adding HDR_SUBDESC at the end */
1501 hdr_qentry
= qentry
;
1503 data_left
= min_t(int, skb_shinfo(skb
)->gso_size
, total_len
);
1504 total_len
-= data_left
;
1506 /* Add segment's header */
1507 qentry
= nicvf_get_nxt_sqentry(sq
, qentry
);
1508 hdr
= sq
->tso_hdrs
+ qentry
* TSO_HEADER_SIZE
;
1509 tso_build_hdr(skb
, hdr
, &tso
, data_left
, total_len
== 0);
1510 nicvf_sq_add_gather_subdesc(sq
, qentry
, hdr_len
,
1512 qentry
* TSO_HEADER_SIZE
);
1513 /* HDR_SUDESC + GATHER */
1517 /* Add segment's payload fragments */
1518 while (data_left
> 0) {
1521 size
= min_t(int, tso
.size
, data_left
);
1523 qentry
= nicvf_get_nxt_sqentry(sq
, qentry
);
1524 nicvf_sq_add_gather_subdesc(sq
, qentry
, size
,
1525 virt_to_phys(tso
.data
));
1530 tso_build_data(skb
, &tso
, size
);
1532 nicvf_sq_add_hdr_subdesc(nic
, sq
, hdr_qentry
,
1533 seg_subdescs
- 1, skb
, seg_len
);
1534 sq
->skbuff
[hdr_qentry
] = (u64
)NULL
;
1535 qentry
= nicvf_get_nxt_sqentry(sq
, qentry
);
1537 desc_cnt
+= seg_subdescs
;
1539 /* Save SKB in the last segment for freeing */
1540 sq
->skbuff
[hdr_qentry
] = (u64
)skb
;
1542 nicvf_sq_doorbell(nic
, skb
, sq_num
, desc_cnt
);
1544 this_cpu_inc(nic
->pnicvf
->drv_stats
->tx_tso
);
1548 /* Append an skb to a SQ for packet transfer. */
1549 int nicvf_sq_append_skb(struct nicvf
*nic
, struct snd_queue
*sq
,
1550 struct sk_buff
*skb
, u8 sq_num
)
1553 int subdesc_cnt
, hdr_sqe
= 0;
1557 subdesc_cnt
= nicvf_sq_subdesc_required(nic
, skb
);
1558 if (subdesc_cnt
> atomic_read(&sq
->free_cnt
))
1561 qentry
= nicvf_get_sq_desc(sq
, subdesc_cnt
);
1563 /* Check if its a TSO packet */
1564 if (skb_shinfo(skb
)->gso_size
&& !nic
->hw_tso
)
1565 return nicvf_sq_append_tso(nic
, sq
, sq_num
, qentry
, skb
);
1567 /* Add SQ header subdesc */
1568 nicvf_sq_add_hdr_subdesc(nic
, sq
, qentry
, subdesc_cnt
- 1,
1572 /* Add SQ gather subdescs */
1573 qentry
= nicvf_get_nxt_sqentry(sq
, qentry
);
1574 size
= skb_is_nonlinear(skb
) ? skb_headlen(skb
) : skb
->len
;
1575 /* HW will ensure data coherency, CPU sync not required */
1576 dma_addr
= dma_map_page_attrs(&nic
->pdev
->dev
, virt_to_page(skb
->data
),
1577 offset_in_page(skb
->data
), size
,
1578 DMA_TO_DEVICE
, DMA_ATTR_SKIP_CPU_SYNC
);
1579 if (dma_mapping_error(&nic
->pdev
->dev
, dma_addr
)) {
1580 nicvf_rollback_sq_desc(sq
, qentry
, subdesc_cnt
);
1584 nicvf_sq_add_gather_subdesc(sq
, qentry
, size
, dma_addr
);
1586 /* Check for scattered buffer */
1587 if (!skb_is_nonlinear(skb
))
1590 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
1591 const skb_frag_t
*frag
= &skb_shinfo(skb
)->frags
[i
];
1593 qentry
= nicvf_get_nxt_sqentry(sq
, qentry
);
1594 size
= skb_frag_size(frag
);
1595 dma_addr
= dma_map_page_attrs(&nic
->pdev
->dev
,
1596 skb_frag_page(frag
),
1597 skb_frag_off(frag
), size
,
1599 DMA_ATTR_SKIP_CPU_SYNC
);
1600 if (dma_mapping_error(&nic
->pdev
->dev
, dma_addr
)) {
1601 /* Free entire chain of mapped buffers
1602 * here 'i' = frags mapped + above mapped skb->data
1604 nicvf_unmap_sndq_buffers(nic
, sq
, hdr_sqe
, i
);
1605 nicvf_rollback_sq_desc(sq
, qentry
, subdesc_cnt
);
1608 nicvf_sq_add_gather_subdesc(sq
, qentry
, size
, dma_addr
);
1612 if (nic
->t88
&& skb_shinfo(skb
)->gso_size
) {
1613 qentry
= nicvf_get_nxt_sqentry(sq
, qentry
);
1614 nicvf_sq_add_cqe_subdesc(sq
, qentry
, hdr_sqe
, skb
);
1617 nicvf_sq_doorbell(nic
, skb
, sq_num
, subdesc_cnt
);
1622 /* Use original PCI dev for debug log */
1624 netdev_dbg(nic
->netdev
, "Not enough SQ descriptors to xmit pkt\n");
1628 static inline unsigned frag_num(unsigned i
)
1631 return (i
& ~3) + 3 - (i
& 3);
1637 static void nicvf_unmap_rcv_buffer(struct nicvf
*nic
, u64 dma_addr
,
1638 u64 buf_addr
, bool xdp
)
1640 struct page
*page
= NULL
;
1641 int len
= RCV_FRAG_LEN
;
1644 page
= virt_to_page(phys_to_virt(buf_addr
));
1645 /* Check if it's a recycled page, if not
1646 * unmap the DMA mapping.
1648 * Recycled page holds an extra reference.
1650 if (page_ref_count(page
) != 1)
1653 len
+= XDP_PACKET_HEADROOM
;
1654 /* Receive buffers in XDP mode are mapped from page start */
1655 dma_addr
&= PAGE_MASK
;
1657 dma_unmap_page_attrs(&nic
->pdev
->dev
, dma_addr
, len
,
1658 DMA_FROM_DEVICE
, DMA_ATTR_SKIP_CPU_SYNC
);
1661 /* Returns SKB for a received packet */
1662 struct sk_buff
*nicvf_get_rcv_skb(struct nicvf
*nic
,
1663 struct cqe_rx_t
*cqe_rx
, bool xdp
)
1666 int payload_len
= 0;
1667 struct sk_buff
*skb
= NULL
;
1670 u16
*rb_lens
= NULL
;
1671 u64
*rb_ptrs
= NULL
;
1674 rb_lens
= (void *)cqe_rx
+ (3 * sizeof(u64
));
1675 /* Except 88xx pass1 on all other chips CQE_RX2_S is added to
1676 * CQE_RX at word6, hence buffer pointers move by word
1678 * Use existing 'hw_tso' flag which will be set for all chips
1679 * except 88xx pass1 instead of a additional cache line
1680 * access (or miss) by using pci dev's revision.
1683 rb_ptrs
= (void *)cqe_rx
+ (6 * sizeof(u64
));
1685 rb_ptrs
= (void *)cqe_rx
+ (7 * sizeof(u64
));
1687 for (frag
= 0; frag
< cqe_rx
->rb_cnt
; frag
++) {
1688 payload_len
= rb_lens
[frag_num(frag
)];
1689 phys_addr
= nicvf_iova_to_phys(nic
, *rb_ptrs
);
1692 dev_kfree_skb_any(skb
);
1697 /* First fragment */
1698 nicvf_unmap_rcv_buffer(nic
,
1699 *rb_ptrs
- cqe_rx
->align_pad
,
1701 skb
= nicvf_rb_ptr_to_skb(nic
,
1702 phys_addr
- cqe_rx
->align_pad
,
1706 skb_reserve(skb
, cqe_rx
->align_pad
);
1707 skb_put(skb
, payload_len
);
1710 nicvf_unmap_rcv_buffer(nic
, *rb_ptrs
, phys_addr
, xdp
);
1711 page
= virt_to_page(phys_to_virt(phys_addr
));
1712 offset
= phys_to_virt(phys_addr
) - page_address(page
);
1713 skb_add_rx_frag(skb
, skb_shinfo(skb
)->nr_frags
, page
,
1714 offset
, payload_len
, RCV_FRAG_LEN
);
1716 /* Next buffer pointer */
1722 static u64
nicvf_int_type_to_mask(int int_type
, int q_idx
)
1728 reg_val
= ((1ULL << q_idx
) << NICVF_INTR_CQ_SHIFT
);
1731 reg_val
= ((1ULL << q_idx
) << NICVF_INTR_SQ_SHIFT
);
1733 case NICVF_INTR_RBDR
:
1734 reg_val
= ((1ULL << q_idx
) << NICVF_INTR_RBDR_SHIFT
);
1736 case NICVF_INTR_PKT_DROP
:
1737 reg_val
= (1ULL << NICVF_INTR_PKT_DROP_SHIFT
);
1739 case NICVF_INTR_TCP_TIMER
:
1740 reg_val
= (1ULL << NICVF_INTR_TCP_TIMER_SHIFT
);
1742 case NICVF_INTR_MBOX
:
1743 reg_val
= (1ULL << NICVF_INTR_MBOX_SHIFT
);
1745 case NICVF_INTR_QS_ERR
:
1746 reg_val
= (1ULL << NICVF_INTR_QS_ERR_SHIFT
);
1755 /* Enable interrupt */
1756 void nicvf_enable_intr(struct nicvf
*nic
, int int_type
, int q_idx
)
1758 u64 mask
= nicvf_int_type_to_mask(int_type
, q_idx
);
1761 netdev_dbg(nic
->netdev
,
1762 "Failed to enable interrupt: unknown type\n");
1765 nicvf_reg_write(nic
, NIC_VF_ENA_W1S
,
1766 nicvf_reg_read(nic
, NIC_VF_ENA_W1S
) | mask
);
1769 /* Disable interrupt */
1770 void nicvf_disable_intr(struct nicvf
*nic
, int int_type
, int q_idx
)
1772 u64 mask
= nicvf_int_type_to_mask(int_type
, q_idx
);
1775 netdev_dbg(nic
->netdev
,
1776 "Failed to disable interrupt: unknown type\n");
1780 nicvf_reg_write(nic
, NIC_VF_ENA_W1C
, mask
);
1783 /* Clear interrupt */
1784 void nicvf_clear_intr(struct nicvf
*nic
, int int_type
, int q_idx
)
1786 u64 mask
= nicvf_int_type_to_mask(int_type
, q_idx
);
1789 netdev_dbg(nic
->netdev
,
1790 "Failed to clear interrupt: unknown type\n");
1794 nicvf_reg_write(nic
, NIC_VF_INT
, mask
);
1797 /* Check if interrupt is enabled */
1798 int nicvf_is_intr_enabled(struct nicvf
*nic
, int int_type
, int q_idx
)
1800 u64 mask
= nicvf_int_type_to_mask(int_type
, q_idx
);
1801 /* If interrupt type is unknown, we treat it disabled. */
1803 netdev_dbg(nic
->netdev
,
1804 "Failed to check interrupt enable: unknown type\n");
1808 return mask
& nicvf_reg_read(nic
, NIC_VF_ENA_W1S
);
1811 void nicvf_update_rq_stats(struct nicvf
*nic
, int rq_idx
)
1813 struct rcv_queue
*rq
;
1815 #define GET_RQ_STATS(reg) \
1816 nicvf_reg_read(nic, NIC_QSET_RQ_0_7_STAT_0_1 |\
1817 (rq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
1819 rq
= &nic
->qs
->rq
[rq_idx
];
1820 rq
->stats
.bytes
= GET_RQ_STATS(RQ_SQ_STATS_OCTS
);
1821 rq
->stats
.pkts
= GET_RQ_STATS(RQ_SQ_STATS_PKTS
);
1824 void nicvf_update_sq_stats(struct nicvf
*nic
, int sq_idx
)
1826 struct snd_queue
*sq
;
1828 #define GET_SQ_STATS(reg) \
1829 nicvf_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1 |\
1830 (sq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
1832 sq
= &nic
->qs
->sq
[sq_idx
];
1833 sq
->stats
.bytes
= GET_SQ_STATS(RQ_SQ_STATS_OCTS
);
1834 sq
->stats
.pkts
= GET_SQ_STATS(RQ_SQ_STATS_PKTS
);
1837 /* Check for errors in the receive cmp.queue entry */
1838 int nicvf_check_cqe_rx_errs(struct nicvf
*nic
, struct cqe_rx_t
*cqe_rx
)
1840 netif_err(nic
, rx_err
, nic
->netdev
,
1841 "RX error CQE err_level 0x%x err_opcode 0x%x\n",
1842 cqe_rx
->err_level
, cqe_rx
->err_opcode
);
1844 switch (cqe_rx
->err_opcode
) {
1845 case CQ_RX_ERROP_RE_PARTIAL
:
1846 this_cpu_inc(nic
->drv_stats
->rx_bgx_truncated_pkts
);
1848 case CQ_RX_ERROP_RE_JABBER
:
1849 this_cpu_inc(nic
->drv_stats
->rx_jabber_errs
);
1851 case CQ_RX_ERROP_RE_FCS
:
1852 this_cpu_inc(nic
->drv_stats
->rx_fcs_errs
);
1854 case CQ_RX_ERROP_RE_RX_CTL
:
1855 this_cpu_inc(nic
->drv_stats
->rx_bgx_errs
);
1857 case CQ_RX_ERROP_PREL2_ERR
:
1858 this_cpu_inc(nic
->drv_stats
->rx_prel2_errs
);
1860 case CQ_RX_ERROP_L2_MAL
:
1861 this_cpu_inc(nic
->drv_stats
->rx_l2_hdr_malformed
);
1863 case CQ_RX_ERROP_L2_OVERSIZE
:
1864 this_cpu_inc(nic
->drv_stats
->rx_oversize
);
1866 case CQ_RX_ERROP_L2_UNDERSIZE
:
1867 this_cpu_inc(nic
->drv_stats
->rx_undersize
);
1869 case CQ_RX_ERROP_L2_LENMISM
:
1870 this_cpu_inc(nic
->drv_stats
->rx_l2_len_mismatch
);
1872 case CQ_RX_ERROP_L2_PCLP
:
1873 this_cpu_inc(nic
->drv_stats
->rx_l2_pclp
);
1875 case CQ_RX_ERROP_IP_NOT
:
1876 this_cpu_inc(nic
->drv_stats
->rx_ip_ver_errs
);
1878 case CQ_RX_ERROP_IP_CSUM_ERR
:
1879 this_cpu_inc(nic
->drv_stats
->rx_ip_csum_errs
);
1881 case CQ_RX_ERROP_IP_MAL
:
1882 this_cpu_inc(nic
->drv_stats
->rx_ip_hdr_malformed
);
1884 case CQ_RX_ERROP_IP_MALD
:
1885 this_cpu_inc(nic
->drv_stats
->rx_ip_payload_malformed
);
1887 case CQ_RX_ERROP_IP_HOP
:
1888 this_cpu_inc(nic
->drv_stats
->rx_ip_ttl_errs
);
1890 case CQ_RX_ERROP_L3_PCLP
:
1891 this_cpu_inc(nic
->drv_stats
->rx_l3_pclp
);
1893 case CQ_RX_ERROP_L4_MAL
:
1894 this_cpu_inc(nic
->drv_stats
->rx_l4_malformed
);
1896 case CQ_RX_ERROP_L4_CHK
:
1897 this_cpu_inc(nic
->drv_stats
->rx_l4_csum_errs
);
1899 case CQ_RX_ERROP_UDP_LEN
:
1900 this_cpu_inc(nic
->drv_stats
->rx_udp_len_errs
);
1902 case CQ_RX_ERROP_L4_PORT
:
1903 this_cpu_inc(nic
->drv_stats
->rx_l4_port_errs
);
1905 case CQ_RX_ERROP_TCP_FLAG
:
1906 this_cpu_inc(nic
->drv_stats
->rx_tcp_flag_errs
);
1908 case CQ_RX_ERROP_TCP_OFFSET
:
1909 this_cpu_inc(nic
->drv_stats
->rx_tcp_offset_errs
);
1911 case CQ_RX_ERROP_L4_PCLP
:
1912 this_cpu_inc(nic
->drv_stats
->rx_l4_pclp
);
1914 case CQ_RX_ERROP_RBDR_TRUNC
:
1915 this_cpu_inc(nic
->drv_stats
->rx_truncated_pkts
);
1922 /* Check for errors in the send cmp.queue entry */
1923 int nicvf_check_cqe_tx_errs(struct nicvf
*nic
, struct cqe_send_t
*cqe_tx
)
1925 switch (cqe_tx
->send_status
) {
1926 case CQ_TX_ERROP_DESC_FAULT
:
1927 this_cpu_inc(nic
->drv_stats
->tx_desc_fault
);
1929 case CQ_TX_ERROP_HDR_CONS_ERR
:
1930 this_cpu_inc(nic
->drv_stats
->tx_hdr_cons_err
);
1932 case CQ_TX_ERROP_SUBDC_ERR
:
1933 this_cpu_inc(nic
->drv_stats
->tx_subdesc_err
);
1935 case CQ_TX_ERROP_MAX_SIZE_VIOL
:
1936 this_cpu_inc(nic
->drv_stats
->tx_max_size_exceeded
);
1938 case CQ_TX_ERROP_IMM_SIZE_OFLOW
:
1939 this_cpu_inc(nic
->drv_stats
->tx_imm_size_oflow
);
1941 case CQ_TX_ERROP_DATA_SEQUENCE_ERR
:
1942 this_cpu_inc(nic
->drv_stats
->tx_data_seq_err
);
1944 case CQ_TX_ERROP_MEM_SEQUENCE_ERR
:
1945 this_cpu_inc(nic
->drv_stats
->tx_mem_seq_err
);
1947 case CQ_TX_ERROP_LOCK_VIOL
:
1948 this_cpu_inc(nic
->drv_stats
->tx_lock_viol
);
1950 case CQ_TX_ERROP_DATA_FAULT
:
1951 this_cpu_inc(nic
->drv_stats
->tx_data_fault
);
1953 case CQ_TX_ERROP_TSTMP_CONFLICT
:
1954 this_cpu_inc(nic
->drv_stats
->tx_tstmp_conflict
);
1956 case CQ_TX_ERROP_TSTMP_TIMEOUT
:
1957 this_cpu_inc(nic
->drv_stats
->tx_tstmp_timeout
);
1959 case CQ_TX_ERROP_MEM_FAULT
:
1960 this_cpu_inc(nic
->drv_stats
->tx_mem_fault
);
1962 case CQ_TX_ERROP_CK_OVERLAP
:
1963 this_cpu_inc(nic
->drv_stats
->tx_csum_overlap
);
1965 case CQ_TX_ERROP_CK_OFLOW
:
1966 this_cpu_inc(nic
->drv_stats
->tx_csum_overflow
);