Complete the renaming to TuxOnIce with function names, vars etc.
[linux-2.6/suspend2-head.git] / drivers / net / cxgb3 / sge.c
bloba2cfd68ac757555e42b13309f019146f6a6f00e4
1 /*
2 * Copyright (c) 2005-2007 Chelsio, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
32 #include <linux/skbuff.h>
33 #include <linux/netdevice.h>
34 #include <linux/etherdevice.h>
35 #include <linux/if_vlan.h>
36 #include <linux/ip.h>
37 #include <linux/tcp.h>
38 #include <linux/dma-mapping.h>
39 #include "common.h"
40 #include "regs.h"
41 #include "sge_defs.h"
42 #include "t3_cpl.h"
43 #include "firmware_exports.h"
45 #define USE_GTS 0
47 #define SGE_RX_SM_BUF_SIZE 1536
49 #define SGE_RX_COPY_THRES 256
50 #define SGE_RX_PULL_LEN 128
53 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
54 * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs
55 * directly.
57 #define FL0_PG_CHUNK_SIZE 2048
59 #define SGE_RX_DROP_THRES 16
62 * Period of the Tx buffer reclaim timer. This timer does not need to run
63 * frequently as Tx buffers are usually reclaimed by new Tx packets.
65 #define TX_RECLAIM_PERIOD (HZ / 4)
67 /* WR size in bytes */
68 #define WR_LEN (WR_FLITS * 8)
71 * Types of Tx queues in each queue set. Order here matters, do not change.
73 enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
75 /* Values for sge_txq.flags */
76 enum {
77 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
78 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
81 struct tx_desc {
82 u64 flit[TX_DESC_FLITS];
85 struct rx_desc {
86 __be32 addr_lo;
87 __be32 len_gen;
88 __be32 gen2;
89 __be32 addr_hi;
92 struct tx_sw_desc { /* SW state per Tx descriptor */
93 struct sk_buff *skb;
96 struct rx_sw_desc { /* SW state per Rx descriptor */
97 union {
98 struct sk_buff *skb;
99 struct fl_pg_chunk pg_chunk;
101 DECLARE_PCI_UNMAP_ADDR(dma_addr);
104 struct rsp_desc { /* response queue descriptor */
105 struct rss_header rss_hdr;
106 __be32 flags;
107 __be32 len_cq;
108 u8 imm_data[47];
109 u8 intr_gen;
112 struct unmap_info { /* packet unmapping info, overlays skb->cb */
113 int sflit; /* start flit of first SGL entry in Tx descriptor */
114 u16 fragidx; /* first page fragment in current Tx descriptor */
115 u16 addr_idx; /* buffer index of first SGL entry in descriptor */
116 u32 len; /* mapped length of skb main body */
120 * Holds unmapping information for Tx packets that need deferred unmapping.
121 * This structure lives at skb->head and must be allocated by callers.
123 struct deferred_unmap_info {
124 struct pci_dev *pdev;
125 dma_addr_t addr[MAX_SKB_FRAGS + 1];
129 * Maps a number of flits to the number of Tx descriptors that can hold them.
130 * The formula is
132 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
134 * HW allows up to 4 descriptors to be combined into a WR.
136 static u8 flit_desc_map[] = {
138 #if SGE_NUM_GENBITS == 1
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
143 #elif SGE_NUM_GENBITS == 2
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
146 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
147 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
148 #else
149 # error "SGE_NUM_GENBITS must be 1 or 2"
150 #endif
153 static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
155 return container_of(q, struct sge_qset, fl[qidx]);
158 static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
160 return container_of(q, struct sge_qset, rspq);
163 static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
165 return container_of(q, struct sge_qset, txq[qidx]);
169 * refill_rspq - replenish an SGE response queue
170 * @adapter: the adapter
171 * @q: the response queue to replenish
172 * @credits: how many new responses to make available
174 * Replenishes a response queue by making the supplied number of responses
175 * available to HW.
177 static inline void refill_rspq(struct adapter *adapter,
178 const struct sge_rspq *q, unsigned int credits)
180 t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
181 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
185 * need_skb_unmap - does the platform need unmapping of sk_buffs?
187 * Returns true if the platfrom needs sk_buff unmapping. The compiler
188 * optimizes away unecessary code if this returns true.
190 static inline int need_skb_unmap(void)
193 * This structure is used to tell if the platfrom needs buffer
194 * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
196 struct dummy {
197 DECLARE_PCI_UNMAP_ADDR(addr);
200 return sizeof(struct dummy) != 0;
204 * unmap_skb - unmap a packet main body and its page fragments
205 * @skb: the packet
206 * @q: the Tx queue containing Tx descriptors for the packet
207 * @cidx: index of Tx descriptor
208 * @pdev: the PCI device
210 * Unmap the main body of an sk_buff and its page fragments, if any.
211 * Because of the fairly complicated structure of our SGLs and the desire
212 * to conserve space for metadata, we keep the information necessary to
213 * unmap an sk_buff partly in the sk_buff itself (in its cb), and partly
214 * in the Tx descriptors (the physical addresses of the various data
215 * buffers). The send functions initialize the state in skb->cb so we
216 * can unmap the buffers held in the first Tx descriptor here, and we
217 * have enough information at this point to update the state for the next
218 * Tx descriptor.
220 static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
221 unsigned int cidx, struct pci_dev *pdev)
223 const struct sg_ent *sgp;
224 struct unmap_info *ui = (struct unmap_info *)skb->cb;
225 int nfrags, frag_idx, curflit, j = ui->addr_idx;
227 sgp = (struct sg_ent *)&q->desc[cidx].flit[ui->sflit];
229 if (ui->len) {
230 pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]), ui->len,
231 PCI_DMA_TODEVICE);
232 ui->len = 0; /* so we know for next descriptor for this skb */
233 j = 1;
236 frag_idx = ui->fragidx;
237 curflit = ui->sflit + 1 + j;
238 nfrags = skb_shinfo(skb)->nr_frags;
240 while (frag_idx < nfrags && curflit < WR_FLITS) {
241 pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
242 skb_shinfo(skb)->frags[frag_idx].size,
243 PCI_DMA_TODEVICE);
244 j ^= 1;
245 if (j == 0) {
246 sgp++;
247 curflit++;
249 curflit++;
250 frag_idx++;
253 if (frag_idx < nfrags) { /* SGL continues into next Tx descriptor */
254 ui->fragidx = frag_idx;
255 ui->addr_idx = j;
256 ui->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
261 * free_tx_desc - reclaims Tx descriptors and their buffers
262 * @adapter: the adapter
263 * @q: the Tx queue to reclaim descriptors from
264 * @n: the number of descriptors to reclaim
266 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
267 * Tx buffers. Called with the Tx queue lock held.
269 static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
270 unsigned int n)
272 struct tx_sw_desc *d;
273 struct pci_dev *pdev = adapter->pdev;
274 unsigned int cidx = q->cidx;
276 const int need_unmap = need_skb_unmap() &&
277 q->cntxt_id >= FW_TUNNEL_SGEEC_START;
279 d = &q->sdesc[cidx];
280 while (n--) {
281 if (d->skb) { /* an SGL is present */
282 if (need_unmap)
283 unmap_skb(d->skb, q, cidx, pdev);
284 if (d->skb->priority == cidx)
285 kfree_skb(d->skb);
287 ++d;
288 if (++cidx == q->size) {
289 cidx = 0;
290 d = q->sdesc;
293 q->cidx = cidx;
297 * reclaim_completed_tx - reclaims completed Tx descriptors
298 * @adapter: the adapter
299 * @q: the Tx queue to reclaim completed descriptors from
301 * Reclaims Tx descriptors that the SGE has indicated it has processed,
302 * and frees the associated buffers if possible. Called with the Tx
303 * queue's lock held.
305 static inline void reclaim_completed_tx(struct adapter *adapter,
306 struct sge_txq *q)
308 unsigned int reclaim = q->processed - q->cleaned;
310 if (reclaim) {
311 free_tx_desc(adapter, q, reclaim);
312 q->cleaned += reclaim;
313 q->in_use -= reclaim;
318 * should_restart_tx - are there enough resources to restart a Tx queue?
319 * @q: the Tx queue
321 * Checks if there are enough descriptors to restart a suspended Tx queue.
323 static inline int should_restart_tx(const struct sge_txq *q)
325 unsigned int r = q->processed - q->cleaned;
327 return q->in_use - r < (q->size >> 1);
331 * free_rx_bufs - free the Rx buffers on an SGE free list
332 * @pdev: the PCI device associated with the adapter
333 * @rxq: the SGE free list to clean up
335 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
336 * this queue should be stopped before calling this function.
338 static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
340 unsigned int cidx = q->cidx;
342 while (q->credits--) {
343 struct rx_sw_desc *d = &q->sdesc[cidx];
345 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
346 q->buf_size, PCI_DMA_FROMDEVICE);
347 if (q->use_pages) {
348 put_page(d->pg_chunk.page);
349 d->pg_chunk.page = NULL;
350 } else {
351 kfree_skb(d->skb);
352 d->skb = NULL;
354 if (++cidx == q->size)
355 cidx = 0;
358 if (q->pg_chunk.page) {
359 __free_page(q->pg_chunk.page);
360 q->pg_chunk.page = NULL;
365 * add_one_rx_buf - add a packet buffer to a free-buffer list
366 * @va: buffer start VA
367 * @len: the buffer length
368 * @d: the HW Rx descriptor to write
369 * @sd: the SW Rx descriptor to write
370 * @gen: the generation bit value
371 * @pdev: the PCI device associated with the adapter
373 * Add a buffer of the given length to the supplied HW and SW Rx
374 * descriptors.
376 static inline void add_one_rx_buf(void *va, unsigned int len,
377 struct rx_desc *d, struct rx_sw_desc *sd,
378 unsigned int gen, struct pci_dev *pdev)
380 dma_addr_t mapping;
382 mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
383 pci_unmap_addr_set(sd, dma_addr, mapping);
385 d->addr_lo = cpu_to_be32(mapping);
386 d->addr_hi = cpu_to_be32((u64) mapping >> 32);
387 wmb();
388 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
389 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
392 static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp)
394 if (!q->pg_chunk.page) {
395 q->pg_chunk.page = alloc_page(gfp);
396 if (unlikely(!q->pg_chunk.page))
397 return -ENOMEM;
398 q->pg_chunk.va = page_address(q->pg_chunk.page);
399 q->pg_chunk.offset = 0;
401 sd->pg_chunk = q->pg_chunk;
403 q->pg_chunk.offset += q->buf_size;
404 if (q->pg_chunk.offset == PAGE_SIZE)
405 q->pg_chunk.page = NULL;
406 else {
407 q->pg_chunk.va += q->buf_size;
408 get_page(q->pg_chunk.page);
410 return 0;
414 * refill_fl - refill an SGE free-buffer list
415 * @adapter: the adapter
416 * @q: the free-list to refill
417 * @n: the number of new buffers to allocate
418 * @gfp: the gfp flags for allocating new buffers
420 * (Re)populate an SGE free-buffer list with up to @n new packet buffers,
421 * allocated with the supplied gfp flags. The caller must assure that
422 * @n does not exceed the queue's capacity.
424 static void refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
426 void *buf_start;
427 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
428 struct rx_desc *d = &q->desc[q->pidx];
430 while (n--) {
431 if (q->use_pages) {
432 if (unlikely(alloc_pg_chunk(q, sd, gfp))) {
433 nomem: q->alloc_failed++;
434 break;
436 buf_start = sd->pg_chunk.va;
437 } else {
438 struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
440 if (!skb)
441 goto nomem;
443 sd->skb = skb;
444 buf_start = skb->data;
447 add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen,
448 adap->pdev);
449 d++;
450 sd++;
451 if (++q->pidx == q->size) {
452 q->pidx = 0;
453 q->gen ^= 1;
454 sd = q->sdesc;
455 d = q->desc;
457 q->credits++;
460 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
463 static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
465 refill_fl(adap, fl, min(16U, fl->size - fl->credits), GFP_ATOMIC);
469 * recycle_rx_buf - recycle a receive buffer
470 * @adapter: the adapter
471 * @q: the SGE free list
472 * @idx: index of buffer to recycle
474 * Recycles the specified buffer on the given free list by adding it at
475 * the next available slot on the list.
477 static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
478 unsigned int idx)
480 struct rx_desc *from = &q->desc[idx];
481 struct rx_desc *to = &q->desc[q->pidx];
483 q->sdesc[q->pidx] = q->sdesc[idx];
484 to->addr_lo = from->addr_lo; /* already big endian */
485 to->addr_hi = from->addr_hi; /* likewise */
486 wmb();
487 to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
488 to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
489 q->credits++;
491 if (++q->pidx == q->size) {
492 q->pidx = 0;
493 q->gen ^= 1;
495 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
499 * alloc_ring - allocate resources for an SGE descriptor ring
500 * @pdev: the PCI device
501 * @nelem: the number of descriptors
502 * @elem_size: the size of each descriptor
503 * @sw_size: the size of the SW state associated with each ring element
504 * @phys: the physical address of the allocated ring
505 * @metadata: address of the array holding the SW state for the ring
507 * Allocates resources for an SGE descriptor ring, such as Tx queues,
508 * free buffer lists, or response queues. Each SGE ring requires
509 * space for its HW descriptors plus, optionally, space for the SW state
510 * associated with each HW entry (the metadata). The function returns
511 * three values: the virtual address for the HW ring (the return value
512 * of the function), the physical address of the HW ring, and the address
513 * of the SW ring.
515 static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
516 size_t sw_size, dma_addr_t * phys, void *metadata)
518 size_t len = nelem * elem_size;
519 void *s = NULL;
520 void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
522 if (!p)
523 return NULL;
524 if (sw_size) {
525 s = kcalloc(nelem, sw_size, GFP_KERNEL);
527 if (!s) {
528 dma_free_coherent(&pdev->dev, len, p, *phys);
529 return NULL;
532 if (metadata)
533 *(void **)metadata = s;
534 memset(p, 0, len);
535 return p;
539 * free_qset - free the resources of an SGE queue set
540 * @adapter: the adapter owning the queue set
541 * @q: the queue set
543 * Release the HW and SW resources associated with an SGE queue set, such
544 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
545 * queue set must be quiesced prior to calling this.
547 void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
549 int i;
550 struct pci_dev *pdev = adapter->pdev;
552 if (q->tx_reclaim_timer.function)
553 del_timer_sync(&q->tx_reclaim_timer);
555 for (i = 0; i < SGE_RXQ_PER_SET; ++i)
556 if (q->fl[i].desc) {
557 spin_lock(&adapter->sge.reg_lock);
558 t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
559 spin_unlock(&adapter->sge.reg_lock);
560 free_rx_bufs(pdev, &q->fl[i]);
561 kfree(q->fl[i].sdesc);
562 dma_free_coherent(&pdev->dev,
563 q->fl[i].size *
564 sizeof(struct rx_desc), q->fl[i].desc,
565 q->fl[i].phys_addr);
568 for (i = 0; i < SGE_TXQ_PER_SET; ++i)
569 if (q->txq[i].desc) {
570 spin_lock(&adapter->sge.reg_lock);
571 t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
572 spin_unlock(&adapter->sge.reg_lock);
573 if (q->txq[i].sdesc) {
574 free_tx_desc(adapter, &q->txq[i],
575 q->txq[i].in_use);
576 kfree(q->txq[i].sdesc);
578 dma_free_coherent(&pdev->dev,
579 q->txq[i].size *
580 sizeof(struct tx_desc),
581 q->txq[i].desc, q->txq[i].phys_addr);
582 __skb_queue_purge(&q->txq[i].sendq);
585 if (q->rspq.desc) {
586 spin_lock(&adapter->sge.reg_lock);
587 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
588 spin_unlock(&adapter->sge.reg_lock);
589 dma_free_coherent(&pdev->dev,
590 q->rspq.size * sizeof(struct rsp_desc),
591 q->rspq.desc, q->rspq.phys_addr);
594 if (q->netdev)
595 q->netdev->atalk_ptr = NULL;
597 memset(q, 0, sizeof(*q));
601 * init_qset_cntxt - initialize an SGE queue set context info
602 * @qs: the queue set
603 * @id: the queue set id
605 * Initializes the TIDs and context ids for the queues of a queue set.
607 static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
609 qs->rspq.cntxt_id = id;
610 qs->fl[0].cntxt_id = 2 * id;
611 qs->fl[1].cntxt_id = 2 * id + 1;
612 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
613 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
614 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
615 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
616 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
620 * sgl_len - calculates the size of an SGL of the given capacity
621 * @n: the number of SGL entries
623 * Calculates the number of flits needed for a scatter/gather list that
624 * can hold the given number of entries.
626 static inline unsigned int sgl_len(unsigned int n)
628 /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
629 return (3 * n) / 2 + (n & 1);
633 * flits_to_desc - returns the num of Tx descriptors for the given flits
634 * @n: the number of flits
636 * Calculates the number of Tx descriptors needed for the supplied number
637 * of flits.
639 static inline unsigned int flits_to_desc(unsigned int n)
641 BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
642 return flit_desc_map[n];
646 * get_packet - return the next ingress packet buffer from a free list
647 * @adap: the adapter that received the packet
648 * @fl: the SGE free list holding the packet
649 * @len: the packet length including any SGE padding
650 * @drop_thres: # of remaining buffers before we start dropping packets
652 * Get the next packet from a free list and complete setup of the
653 * sk_buff. If the packet is small we make a copy and recycle the
654 * original buffer, otherwise we use the original buffer itself. If a
655 * positive drop threshold is supplied packets are dropped and their
656 * buffers recycled if (a) the number of remaining buffers is under the
657 * threshold and the packet is too big to copy, or (b) the packet should
658 * be copied but there is no memory for the copy.
660 static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
661 unsigned int len, unsigned int drop_thres)
663 struct sk_buff *skb = NULL;
664 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
666 prefetch(sd->skb->data);
667 fl->credits--;
669 if (len <= SGE_RX_COPY_THRES) {
670 skb = alloc_skb(len, GFP_ATOMIC);
671 if (likely(skb != NULL)) {
672 __skb_put(skb, len);
673 pci_dma_sync_single_for_cpu(adap->pdev,
674 pci_unmap_addr(sd, dma_addr), len,
675 PCI_DMA_FROMDEVICE);
676 memcpy(skb->data, sd->skb->data, len);
677 pci_dma_sync_single_for_device(adap->pdev,
678 pci_unmap_addr(sd, dma_addr), len,
679 PCI_DMA_FROMDEVICE);
680 } else if (!drop_thres)
681 goto use_orig_buf;
682 recycle:
683 recycle_rx_buf(adap, fl, fl->cidx);
684 return skb;
687 if (unlikely(fl->credits < drop_thres))
688 goto recycle;
690 use_orig_buf:
691 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
692 fl->buf_size, PCI_DMA_FROMDEVICE);
693 skb = sd->skb;
694 skb_put(skb, len);
695 __refill_fl(adap, fl);
696 return skb;
700 * get_packet_pg - return the next ingress packet buffer from a free list
701 * @adap: the adapter that received the packet
702 * @fl: the SGE free list holding the packet
703 * @len: the packet length including any SGE padding
704 * @drop_thres: # of remaining buffers before we start dropping packets
706 * Get the next packet from a free list populated with page chunks.
707 * If the packet is small we make a copy and recycle the original buffer,
708 * otherwise we attach the original buffer as a page fragment to a fresh
709 * sk_buff. If a positive drop threshold is supplied packets are dropped
710 * and their buffers recycled if (a) the number of remaining buffers is
711 * under the threshold and the packet is too big to copy, or (b) there's
712 * no system memory.
714 * Note: this function is similar to @get_packet but deals with Rx buffers
715 * that are page chunks rather than sk_buffs.
717 static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
718 unsigned int len, unsigned int drop_thres)
720 struct sk_buff *skb = NULL;
721 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
723 if (len <= SGE_RX_COPY_THRES) {
724 skb = alloc_skb(len, GFP_ATOMIC);
725 if (likely(skb != NULL)) {
726 __skb_put(skb, len);
727 pci_dma_sync_single_for_cpu(adap->pdev,
728 pci_unmap_addr(sd, dma_addr), len,
729 PCI_DMA_FROMDEVICE);
730 memcpy(skb->data, sd->pg_chunk.va, len);
731 pci_dma_sync_single_for_device(adap->pdev,
732 pci_unmap_addr(sd, dma_addr), len,
733 PCI_DMA_FROMDEVICE);
734 } else if (!drop_thres)
735 return NULL;
736 recycle:
737 fl->credits--;
738 recycle_rx_buf(adap, fl, fl->cidx);
739 return skb;
742 if (unlikely(fl->credits <= drop_thres))
743 goto recycle;
745 skb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
746 if (unlikely(!skb)) {
747 if (!drop_thres)
748 return NULL;
749 goto recycle;
752 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
753 fl->buf_size, PCI_DMA_FROMDEVICE);
754 __skb_put(skb, SGE_RX_PULL_LEN);
755 memcpy(skb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
756 skb_fill_page_desc(skb, 0, sd->pg_chunk.page,
757 sd->pg_chunk.offset + SGE_RX_PULL_LEN,
758 len - SGE_RX_PULL_LEN);
759 skb->len = len;
760 skb->data_len = len - SGE_RX_PULL_LEN;
761 skb->truesize += skb->data_len;
763 fl->credits--;
765 * We do not refill FLs here, we let the caller do it to overlap a
766 * prefetch.
768 return skb;
772 * get_imm_packet - return the next ingress packet buffer from a response
773 * @resp: the response descriptor containing the packet data
775 * Return a packet containing the immediate data of the given response.
777 static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
779 struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
781 if (skb) {
782 __skb_put(skb, IMMED_PKT_SIZE);
783 skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
785 return skb;
789 * calc_tx_descs - calculate the number of Tx descriptors for a packet
790 * @skb: the packet
792 * Returns the number of Tx descriptors needed for the given Ethernet
793 * packet. Ethernet packets require addition of WR and CPL headers.
795 static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
797 unsigned int flits;
799 if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
800 return 1;
802 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
803 if (skb_shinfo(skb)->gso_size)
804 flits++;
805 return flits_to_desc(flits);
809 * make_sgl - populate a scatter/gather list for a packet
810 * @skb: the packet
811 * @sgp: the SGL to populate
812 * @start: start address of skb main body data to include in the SGL
813 * @len: length of skb main body data to include in the SGL
814 * @pdev: the PCI device
816 * Generates a scatter/gather list for the buffers that make up a packet
817 * and returns the SGL size in 8-byte words. The caller must size the SGL
818 * appropriately.
820 static inline unsigned int make_sgl(const struct sk_buff *skb,
821 struct sg_ent *sgp, unsigned char *start,
822 unsigned int len, struct pci_dev *pdev)
824 dma_addr_t mapping;
825 unsigned int i, j = 0, nfrags;
827 if (len) {
828 mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
829 sgp->len[0] = cpu_to_be32(len);
830 sgp->addr[0] = cpu_to_be64(mapping);
831 j = 1;
834 nfrags = skb_shinfo(skb)->nr_frags;
835 for (i = 0; i < nfrags; i++) {
836 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
838 mapping = pci_map_page(pdev, frag->page, frag->page_offset,
839 frag->size, PCI_DMA_TODEVICE);
840 sgp->len[j] = cpu_to_be32(frag->size);
841 sgp->addr[j] = cpu_to_be64(mapping);
842 j ^= 1;
843 if (j == 0)
844 ++sgp;
846 if (j)
847 sgp->len[j] = 0;
848 return ((nfrags + (len != 0)) * 3) / 2 + j;
852 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
853 * @adap: the adapter
854 * @q: the Tx queue
856 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
857 * where the HW is going to sleep just after we checked, however,
858 * then the interrupt handler will detect the outstanding TX packet
859 * and ring the doorbell for us.
861 * When GTS is disabled we unconditionally ring the doorbell.
863 static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
865 #if USE_GTS
866 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
867 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
868 set_bit(TXQ_LAST_PKT_DB, &q->flags);
869 t3_write_reg(adap, A_SG_KDOORBELL,
870 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
872 #else
873 wmb(); /* write descriptors before telling HW */
874 t3_write_reg(adap, A_SG_KDOORBELL,
875 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
876 #endif
879 static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
881 #if SGE_NUM_GENBITS == 2
882 d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
883 #endif
887 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
888 * @ndesc: number of Tx descriptors spanned by the SGL
889 * @skb: the packet corresponding to the WR
890 * @d: first Tx descriptor to be written
891 * @pidx: index of above descriptors
892 * @q: the SGE Tx queue
893 * @sgl: the SGL
894 * @flits: number of flits to the start of the SGL in the first descriptor
895 * @sgl_flits: the SGL size in flits
896 * @gen: the Tx descriptor generation
897 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
898 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
900 * Write a work request header and an associated SGL. If the SGL is
901 * small enough to fit into one Tx descriptor it has already been written
902 * and we just need to write the WR header. Otherwise we distribute the
903 * SGL across the number of descriptors it spans.
905 static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
906 struct tx_desc *d, unsigned int pidx,
907 const struct sge_txq *q,
908 const struct sg_ent *sgl,
909 unsigned int flits, unsigned int sgl_flits,
910 unsigned int gen, unsigned int wr_hi,
911 unsigned int wr_lo)
913 struct work_request_hdr *wrp = (struct work_request_hdr *)d;
914 struct tx_sw_desc *sd = &q->sdesc[pidx];
916 sd->skb = skb;
917 if (need_skb_unmap()) {
918 struct unmap_info *ui = (struct unmap_info *)skb->cb;
920 ui->fragidx = 0;
921 ui->addr_idx = 0;
922 ui->sflit = flits;
925 if (likely(ndesc == 1)) {
926 skb->priority = pidx;
927 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
928 V_WR_SGLSFLT(flits)) | wr_hi;
929 wmb();
930 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
931 V_WR_GEN(gen)) | wr_lo;
932 wr_gen2(d, gen);
933 } else {
934 unsigned int ogen = gen;
935 const u64 *fp = (const u64 *)sgl;
936 struct work_request_hdr *wp = wrp;
938 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
939 V_WR_SGLSFLT(flits)) | wr_hi;
941 while (sgl_flits) {
942 unsigned int avail = WR_FLITS - flits;
944 if (avail > sgl_flits)
945 avail = sgl_flits;
946 memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
947 sgl_flits -= avail;
948 ndesc--;
949 if (!sgl_flits)
950 break;
952 fp += avail;
953 d++;
954 sd++;
955 if (++pidx == q->size) {
956 pidx = 0;
957 gen ^= 1;
958 d = q->desc;
959 sd = q->sdesc;
962 sd->skb = skb;
963 wrp = (struct work_request_hdr *)d;
964 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
965 V_WR_SGLSFLT(1)) | wr_hi;
966 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
967 sgl_flits + 1)) |
968 V_WR_GEN(gen)) | wr_lo;
969 wr_gen2(d, gen);
970 flits = 1;
972 skb->priority = pidx;
973 wrp->wr_hi |= htonl(F_WR_EOP);
974 wmb();
975 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
976 wr_gen2((struct tx_desc *)wp, ogen);
977 WARN_ON(ndesc != 0);
982 * write_tx_pkt_wr - write a TX_PKT work request
983 * @adap: the adapter
984 * @skb: the packet to send
985 * @pi: the egress interface
986 * @pidx: index of the first Tx descriptor to write
987 * @gen: the generation value to use
988 * @q: the Tx queue
989 * @ndesc: number of descriptors the packet will occupy
990 * @compl: the value of the COMPL bit to use
992 * Generate a TX_PKT work request to send the supplied packet.
994 static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
995 const struct port_info *pi,
996 unsigned int pidx, unsigned int gen,
997 struct sge_txq *q, unsigned int ndesc,
998 unsigned int compl)
1000 unsigned int flits, sgl_flits, cntrl, tso_info;
1001 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1002 struct tx_desc *d = &q->desc[pidx];
1003 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1005 cpl->len = htonl(skb->len | 0x80000000);
1006 cntrl = V_TXPKT_INTF(pi->port_id);
1008 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1009 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1011 tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1012 if (tso_info) {
1013 int eth_type;
1014 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1016 d->flit[2] = 0;
1017 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1018 hdr->cntrl = htonl(cntrl);
1019 eth_type = skb_network_offset(skb) == ETH_HLEN ?
1020 CPL_ETH_II : CPL_ETH_II_VLAN;
1021 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1022 V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
1023 V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
1024 hdr->lso_info = htonl(tso_info);
1025 flits = 3;
1026 } else {
1027 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1028 cntrl |= F_TXPKT_IPCSUM_DIS; /* SW calculates IP csum */
1029 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1030 cpl->cntrl = htonl(cntrl);
1032 if (skb->len <= WR_LEN - sizeof(*cpl)) {
1033 q->sdesc[pidx].skb = NULL;
1034 if (!skb->data_len)
1035 skb_copy_from_linear_data(skb, &d->flit[2],
1036 skb->len);
1037 else
1038 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1040 flits = (skb->len + 7) / 8 + 2;
1041 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1042 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1043 | F_WR_SOP | F_WR_EOP | compl);
1044 wmb();
1045 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1046 V_WR_TID(q->token));
1047 wr_gen2(d, gen);
1048 kfree_skb(skb);
1049 return;
1052 flits = 2;
1055 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1056 sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
1057 if (need_skb_unmap())
1058 ((struct unmap_info *)skb->cb)->len = skb_headlen(skb);
1060 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1061 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1062 htonl(V_WR_TID(q->token)));
1066 * eth_xmit - add a packet to the Ethernet Tx queue
1067 * @skb: the packet
1068 * @dev: the egress net device
1070 * Add a packet to an SGE Tx queue. Runs with softirqs disabled.
1072 int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1074 unsigned int ndesc, pidx, credits, gen, compl;
1075 const struct port_info *pi = netdev_priv(dev);
1076 struct adapter *adap = dev->priv;
1077 struct sge_qset *qs = dev2qset(dev);
1078 struct sge_txq *q = &qs->txq[TXQ_ETH];
1081 * The chip min packet length is 9 octets but play safe and reject
1082 * anything shorter than an Ethernet header.
1084 if (unlikely(skb->len < ETH_HLEN)) {
1085 dev_kfree_skb(skb);
1086 return NETDEV_TX_OK;
1089 spin_lock(&q->lock);
1090 reclaim_completed_tx(adap, q);
1092 credits = q->size - q->in_use;
1093 ndesc = calc_tx_descs(skb);
1095 if (unlikely(credits < ndesc)) {
1096 if (!netif_queue_stopped(dev)) {
1097 netif_stop_queue(dev);
1098 set_bit(TXQ_ETH, &qs->txq_stopped);
1099 q->stops++;
1100 dev_err(&adap->pdev->dev,
1101 "%s: Tx ring %u full while queue awake!\n",
1102 dev->name, q->cntxt_id & 7);
1104 spin_unlock(&q->lock);
1105 return NETDEV_TX_BUSY;
1108 q->in_use += ndesc;
1109 if (unlikely(credits - ndesc < q->stop_thres)) {
1110 q->stops++;
1111 netif_stop_queue(dev);
1112 set_bit(TXQ_ETH, &qs->txq_stopped);
1113 #if !USE_GTS
1114 if (should_restart_tx(q) &&
1115 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1116 q->restarts++;
1117 netif_wake_queue(dev);
1119 #endif
1122 gen = q->gen;
1123 q->unacked += ndesc;
1124 compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1125 q->unacked &= 7;
1126 pidx = q->pidx;
1127 q->pidx += ndesc;
1128 if (q->pidx >= q->size) {
1129 q->pidx -= q->size;
1130 q->gen ^= 1;
1133 /* update port statistics */
1134 if (skb->ip_summed == CHECKSUM_COMPLETE)
1135 qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1136 if (skb_shinfo(skb)->gso_size)
1137 qs->port_stats[SGE_PSTAT_TSO]++;
1138 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1139 qs->port_stats[SGE_PSTAT_VLANINS]++;
1141 dev->trans_start = jiffies;
1142 spin_unlock(&q->lock);
1145 * We do not use Tx completion interrupts to free DMAd Tx packets.
1146 * This is good for performamce but means that we rely on new Tx
1147 * packets arriving to run the destructors of completed packets,
1148 * which open up space in their sockets' send queues. Sometimes
1149 * we do not get such new packets causing Tx to stall. A single
1150 * UDP transmitter is a good example of this situation. We have
1151 * a clean up timer that periodically reclaims completed packets
1152 * but it doesn't run often enough (nor do we want it to) to prevent
1153 * lengthy stalls. A solution to this problem is to run the
1154 * destructor early, after the packet is queued but before it's DMAd.
1155 * A cons is that we lie to socket memory accounting, but the amount
1156 * of extra memory is reasonable (limited by the number of Tx
1157 * descriptors), the packets do actually get freed quickly by new
1158 * packets almost always, and for protocols like TCP that wait for
1159 * acks to really free up the data the extra memory is even less.
1160 * On the positive side we run the destructors on the sending CPU
1161 * rather than on a potentially different completing CPU, usually a
1162 * good thing. We also run them without holding our Tx queue lock,
1163 * unlike what reclaim_completed_tx() would otherwise do.
1165 * Run the destructor before telling the DMA engine about the packet
1166 * to make sure it doesn't complete and get freed prematurely.
1168 if (likely(!skb_shared(skb)))
1169 skb_orphan(skb);
1171 write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1172 check_ring_tx_db(adap, q);
1173 return NETDEV_TX_OK;
1177 * write_imm - write a packet into a Tx descriptor as immediate data
1178 * @d: the Tx descriptor to write
1179 * @skb: the packet
1180 * @len: the length of packet data to write as immediate data
1181 * @gen: the generation bit value to write
1183 * Writes a packet as immediate data into a Tx descriptor. The packet
1184 * contains a work request at its beginning. We must write the packet
1185 * carefully so the SGE doesn't read accidentally before it's written in
1186 * its entirety.
1188 static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1189 unsigned int len, unsigned int gen)
1191 struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1192 struct work_request_hdr *to = (struct work_request_hdr *)d;
1194 memcpy(&to[1], &from[1], len - sizeof(*from));
1195 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1196 V_WR_BCNTLFLT(len & 7));
1197 wmb();
1198 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1199 V_WR_LEN((len + 7) / 8));
1200 wr_gen2(d, gen);
1201 kfree_skb(skb);
1205 * check_desc_avail - check descriptor availability on a send queue
1206 * @adap: the adapter
1207 * @q: the send queue
1208 * @skb: the packet needing the descriptors
1209 * @ndesc: the number of Tx descriptors needed
1210 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1212 * Checks if the requested number of Tx descriptors is available on an
1213 * SGE send queue. If the queue is already suspended or not enough
1214 * descriptors are available the packet is queued for later transmission.
1215 * Must be called with the Tx queue locked.
1217 * Returns 0 if enough descriptors are available, 1 if there aren't
1218 * enough descriptors and the packet has been queued, and 2 if the caller
1219 * needs to retry because there weren't enough descriptors at the
1220 * beginning of the call but some freed up in the mean time.
1222 static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1223 struct sk_buff *skb, unsigned int ndesc,
1224 unsigned int qid)
1226 if (unlikely(!skb_queue_empty(&q->sendq))) {
1227 addq_exit:__skb_queue_tail(&q->sendq, skb);
1228 return 1;
1230 if (unlikely(q->size - q->in_use < ndesc)) {
1231 struct sge_qset *qs = txq_to_qset(q, qid);
1233 set_bit(qid, &qs->txq_stopped);
1234 smp_mb__after_clear_bit();
1236 if (should_restart_tx(q) &&
1237 test_and_clear_bit(qid, &qs->txq_stopped))
1238 return 2;
1240 q->stops++;
1241 goto addq_exit;
1243 return 0;
1247 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1248 * @q: the SGE control Tx queue
1250 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1251 * that send only immediate data (presently just the control queues) and
1252 * thus do not have any sk_buffs to release.
1254 static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1256 unsigned int reclaim = q->processed - q->cleaned;
1258 q->in_use -= reclaim;
1259 q->cleaned += reclaim;
1262 static inline int immediate(const struct sk_buff *skb)
1264 return skb->len <= WR_LEN && !skb->data_len;
1268 * ctrl_xmit - send a packet through an SGE control Tx queue
1269 * @adap: the adapter
1270 * @q: the control queue
1271 * @skb: the packet
1273 * Send a packet through an SGE control Tx queue. Packets sent through
1274 * a control queue must fit entirely as immediate data in a single Tx
1275 * descriptor and have no page fragments.
1277 static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1278 struct sk_buff *skb)
1280 int ret;
1281 struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1283 if (unlikely(!immediate(skb))) {
1284 WARN_ON(1);
1285 dev_kfree_skb(skb);
1286 return NET_XMIT_SUCCESS;
1289 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1290 wrp->wr_lo = htonl(V_WR_TID(q->token));
1292 spin_lock(&q->lock);
1293 again:reclaim_completed_tx_imm(q);
1295 ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1296 if (unlikely(ret)) {
1297 if (ret == 1) {
1298 spin_unlock(&q->lock);
1299 return NET_XMIT_CN;
1301 goto again;
1304 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1306 q->in_use++;
1307 if (++q->pidx >= q->size) {
1308 q->pidx = 0;
1309 q->gen ^= 1;
1311 spin_unlock(&q->lock);
1312 wmb();
1313 t3_write_reg(adap, A_SG_KDOORBELL,
1314 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1315 return NET_XMIT_SUCCESS;
1319 * restart_ctrlq - restart a suspended control queue
1320 * @qs: the queue set cotaining the control queue
1322 * Resumes transmission on a suspended Tx control queue.
1324 static void restart_ctrlq(unsigned long data)
1326 struct sk_buff *skb;
1327 struct sge_qset *qs = (struct sge_qset *)data;
1328 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1329 struct adapter *adap = qs->netdev->priv;
1331 spin_lock(&q->lock);
1332 again:reclaim_completed_tx_imm(q);
1334 while (q->in_use < q->size && (skb = __skb_dequeue(&q->sendq)) != NULL) {
1336 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1338 if (++q->pidx >= q->size) {
1339 q->pidx = 0;
1340 q->gen ^= 1;
1342 q->in_use++;
1345 if (!skb_queue_empty(&q->sendq)) {
1346 set_bit(TXQ_CTRL, &qs->txq_stopped);
1347 smp_mb__after_clear_bit();
1349 if (should_restart_tx(q) &&
1350 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1351 goto again;
1352 q->stops++;
1355 spin_unlock(&q->lock);
1356 t3_write_reg(adap, A_SG_KDOORBELL,
1357 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1361 * Send a management message through control queue 0
1363 int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1365 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1369 * deferred_unmap_destructor - unmap a packet when it is freed
1370 * @skb: the packet
1372 * This is the packet destructor used for Tx packets that need to remain
1373 * mapped until they are freed rather than until their Tx descriptors are
1374 * freed.
1376 static void deferred_unmap_destructor(struct sk_buff *skb)
1378 int i;
1379 const dma_addr_t *p;
1380 const struct skb_shared_info *si;
1381 const struct deferred_unmap_info *dui;
1382 const struct unmap_info *ui = (struct unmap_info *)skb->cb;
1384 dui = (struct deferred_unmap_info *)skb->head;
1385 p = dui->addr;
1387 if (ui->len)
1388 pci_unmap_single(dui->pdev, *p++, ui->len, PCI_DMA_TODEVICE);
1390 si = skb_shinfo(skb);
1391 for (i = 0; i < si->nr_frags; i++)
1392 pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1393 PCI_DMA_TODEVICE);
1396 static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1397 const struct sg_ent *sgl, int sgl_flits)
1399 dma_addr_t *p;
1400 struct deferred_unmap_info *dui;
1402 dui = (struct deferred_unmap_info *)skb->head;
1403 dui->pdev = pdev;
1404 for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1405 *p++ = be64_to_cpu(sgl->addr[0]);
1406 *p++ = be64_to_cpu(sgl->addr[1]);
1408 if (sgl_flits)
1409 *p = be64_to_cpu(sgl->addr[0]);
1413 * write_ofld_wr - write an offload work request
1414 * @adap: the adapter
1415 * @skb: the packet to send
1416 * @q: the Tx queue
1417 * @pidx: index of the first Tx descriptor to write
1418 * @gen: the generation value to use
1419 * @ndesc: number of descriptors the packet will occupy
1421 * Write an offload work request to send the supplied packet. The packet
1422 * data already carry the work request with most fields populated.
1424 static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1425 struct sge_txq *q, unsigned int pidx,
1426 unsigned int gen, unsigned int ndesc)
1428 unsigned int sgl_flits, flits;
1429 struct work_request_hdr *from;
1430 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1431 struct tx_desc *d = &q->desc[pidx];
1433 if (immediate(skb)) {
1434 q->sdesc[pidx].skb = NULL;
1435 write_imm(d, skb, skb->len, gen);
1436 return;
1439 /* Only TX_DATA builds SGLs */
1441 from = (struct work_request_hdr *)skb->data;
1442 memcpy(&d->flit[1], &from[1],
1443 skb_transport_offset(skb) - sizeof(*from));
1445 flits = skb_transport_offset(skb) / 8;
1446 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1447 sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
1448 skb->tail - skb->transport_header,
1449 adap->pdev);
1450 if (need_skb_unmap()) {
1451 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1452 skb->destructor = deferred_unmap_destructor;
1453 ((struct unmap_info *)skb->cb)->len = (skb->tail -
1454 skb->transport_header);
1457 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1458 gen, from->wr_hi, from->wr_lo);
1462 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1463 * @skb: the packet
1465 * Returns the number of Tx descriptors needed for the given offload
1466 * packet. These packets are already fully constructed.
1468 static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1470 unsigned int flits, cnt = skb_shinfo(skb)->nr_frags;
1472 if (skb->len <= WR_LEN && cnt == 0)
1473 return 1; /* packet fits as immediate data */
1475 flits = skb_transport_offset(skb) / 8; /* headers */
1476 if (skb->tail != skb->transport_header)
1477 cnt++;
1478 return flits_to_desc(flits + sgl_len(cnt));
1482 * ofld_xmit - send a packet through an offload queue
1483 * @adap: the adapter
1484 * @q: the Tx offload queue
1485 * @skb: the packet
1487 * Send an offload packet through an SGE offload queue.
1489 static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1490 struct sk_buff *skb)
1492 int ret;
1493 unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1495 spin_lock(&q->lock);
1496 again:reclaim_completed_tx(adap, q);
1498 ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1499 if (unlikely(ret)) {
1500 if (ret == 1) {
1501 skb->priority = ndesc; /* save for restart */
1502 spin_unlock(&q->lock);
1503 return NET_XMIT_CN;
1505 goto again;
1508 gen = q->gen;
1509 q->in_use += ndesc;
1510 pidx = q->pidx;
1511 q->pidx += ndesc;
1512 if (q->pidx >= q->size) {
1513 q->pidx -= q->size;
1514 q->gen ^= 1;
1516 spin_unlock(&q->lock);
1518 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1519 check_ring_tx_db(adap, q);
1520 return NET_XMIT_SUCCESS;
1524 * restart_offloadq - restart a suspended offload queue
1525 * @qs: the queue set cotaining the offload queue
1527 * Resumes transmission on a suspended Tx offload queue.
1529 static void restart_offloadq(unsigned long data)
1531 struct sk_buff *skb;
1532 struct sge_qset *qs = (struct sge_qset *)data;
1533 struct sge_txq *q = &qs->txq[TXQ_OFLD];
1534 struct adapter *adap = qs->netdev->priv;
1536 spin_lock(&q->lock);
1537 again:reclaim_completed_tx(adap, q);
1539 while ((skb = skb_peek(&q->sendq)) != NULL) {
1540 unsigned int gen, pidx;
1541 unsigned int ndesc = skb->priority;
1543 if (unlikely(q->size - q->in_use < ndesc)) {
1544 set_bit(TXQ_OFLD, &qs->txq_stopped);
1545 smp_mb__after_clear_bit();
1547 if (should_restart_tx(q) &&
1548 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1549 goto again;
1550 q->stops++;
1551 break;
1554 gen = q->gen;
1555 q->in_use += ndesc;
1556 pidx = q->pidx;
1557 q->pidx += ndesc;
1558 if (q->pidx >= q->size) {
1559 q->pidx -= q->size;
1560 q->gen ^= 1;
1562 __skb_unlink(skb, &q->sendq);
1563 spin_unlock(&q->lock);
1565 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1566 spin_lock(&q->lock);
1568 spin_unlock(&q->lock);
1570 #if USE_GTS
1571 set_bit(TXQ_RUNNING, &q->flags);
1572 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1573 #endif
1574 t3_write_reg(adap, A_SG_KDOORBELL,
1575 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1579 * queue_set - return the queue set a packet should use
1580 * @skb: the packet
1582 * Maps a packet to the SGE queue set it should use. The desired queue
1583 * set is carried in bits 1-3 in the packet's priority.
1585 static inline int queue_set(const struct sk_buff *skb)
1587 return skb->priority >> 1;
1591 * is_ctrl_pkt - return whether an offload packet is a control packet
1592 * @skb: the packet
1594 * Determines whether an offload packet should use an OFLD or a CTRL
1595 * Tx queue. This is indicated by bit 0 in the packet's priority.
1597 static inline int is_ctrl_pkt(const struct sk_buff *skb)
1599 return skb->priority & 1;
1603 * t3_offload_tx - send an offload packet
1604 * @tdev: the offload device to send to
1605 * @skb: the packet
1607 * Sends an offload packet. We use the packet priority to select the
1608 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1609 * should be sent as regular or control, bits 1-3 select the queue set.
1611 int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1613 struct adapter *adap = tdev2adap(tdev);
1614 struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1616 if (unlikely(is_ctrl_pkt(skb)))
1617 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1619 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1623 * offload_enqueue - add an offload packet to an SGE offload receive queue
1624 * @q: the SGE response queue
1625 * @skb: the packet
1627 * Add a new offload packet to an SGE response queue's offload packet
1628 * queue. If the packet is the first on the queue it schedules the RX
1629 * softirq to process the queue.
1631 static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1633 skb->next = skb->prev = NULL;
1634 if (q->rx_tail)
1635 q->rx_tail->next = skb;
1636 else {
1637 struct sge_qset *qs = rspq_to_qset(q);
1639 if (__netif_rx_schedule_prep(qs->netdev))
1640 __netif_rx_schedule(qs->netdev);
1641 q->rx_head = skb;
1643 q->rx_tail = skb;
1647 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1648 * @tdev: the offload device that will be receiving the packets
1649 * @q: the SGE response queue that assembled the bundle
1650 * @skbs: the partial bundle
1651 * @n: the number of packets in the bundle
1653 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1655 static inline void deliver_partial_bundle(struct t3cdev *tdev,
1656 struct sge_rspq *q,
1657 struct sk_buff *skbs[], int n)
1659 if (n) {
1660 q->offload_bundles++;
1661 tdev->recv(tdev, skbs, n);
1666 * ofld_poll - NAPI handler for offload packets in interrupt mode
1667 * @dev: the network device doing the polling
1668 * @budget: polling budget
1670 * The NAPI handler for offload packets when a response queue is serviced
1671 * by the hard interrupt handler, i.e., when it's operating in non-polling
1672 * mode. Creates small packet batches and sends them through the offload
1673 * receive handler. Batches need to be of modest size as we do prefetches
1674 * on the packets in each.
1676 static int ofld_poll(struct net_device *dev, int *budget)
1678 struct adapter *adapter = dev->priv;
1679 struct sge_qset *qs = dev2qset(dev);
1680 struct sge_rspq *q = &qs->rspq;
1681 int work_done, limit = min(*budget, dev->quota), avail = limit;
1683 while (avail) {
1684 struct sk_buff *head, *tail, *skbs[RX_BUNDLE_SIZE];
1685 int ngathered;
1687 spin_lock_irq(&q->lock);
1688 head = q->rx_head;
1689 if (!head) {
1690 work_done = limit - avail;
1691 *budget -= work_done;
1692 dev->quota -= work_done;
1693 __netif_rx_complete(dev);
1694 spin_unlock_irq(&q->lock);
1695 return 0;
1698 tail = q->rx_tail;
1699 q->rx_head = q->rx_tail = NULL;
1700 spin_unlock_irq(&q->lock);
1702 for (ngathered = 0; avail && head; avail--) {
1703 prefetch(head->data);
1704 skbs[ngathered] = head;
1705 head = head->next;
1706 skbs[ngathered]->next = NULL;
1707 if (++ngathered == RX_BUNDLE_SIZE) {
1708 q->offload_bundles++;
1709 adapter->tdev.recv(&adapter->tdev, skbs,
1710 ngathered);
1711 ngathered = 0;
1714 if (head) { /* splice remaining packets back onto Rx queue */
1715 spin_lock_irq(&q->lock);
1716 tail->next = q->rx_head;
1717 if (!q->rx_head)
1718 q->rx_tail = tail;
1719 q->rx_head = head;
1720 spin_unlock_irq(&q->lock);
1722 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1724 work_done = limit - avail;
1725 *budget -= work_done;
1726 dev->quota -= work_done;
1727 return 1;
1731 * rx_offload - process a received offload packet
1732 * @tdev: the offload device receiving the packet
1733 * @rq: the response queue that received the packet
1734 * @skb: the packet
1735 * @rx_gather: a gather list of packets if we are building a bundle
1736 * @gather_idx: index of the next available slot in the bundle
1738 * Process an ingress offload pakcet and add it to the offload ingress
1739 * queue. Returns the index of the next available slot in the bundle.
1741 static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1742 struct sk_buff *skb, struct sk_buff *rx_gather[],
1743 unsigned int gather_idx)
1745 rq->offload_pkts++;
1746 skb_reset_mac_header(skb);
1747 skb_reset_network_header(skb);
1748 skb_reset_transport_header(skb);
1750 if (rq->polling) {
1751 rx_gather[gather_idx++] = skb;
1752 if (gather_idx == RX_BUNDLE_SIZE) {
1753 tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1754 gather_idx = 0;
1755 rq->offload_bundles++;
1757 } else
1758 offload_enqueue(rq, skb);
1760 return gather_idx;
1764 * restart_tx - check whether to restart suspended Tx queues
1765 * @qs: the queue set to resume
1767 * Restarts suspended Tx queues of an SGE queue set if they have enough
1768 * free resources to resume operation.
1770 static void restart_tx(struct sge_qset *qs)
1772 if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1773 should_restart_tx(&qs->txq[TXQ_ETH]) &&
1774 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1775 qs->txq[TXQ_ETH].restarts++;
1776 if (netif_running(qs->netdev))
1777 netif_wake_queue(qs->netdev);
1780 if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1781 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1782 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1783 qs->txq[TXQ_OFLD].restarts++;
1784 tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1786 if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1787 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1788 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1789 qs->txq[TXQ_CTRL].restarts++;
1790 tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1795 * rx_eth - process an ingress ethernet packet
1796 * @adap: the adapter
1797 * @rq: the response queue that received the packet
1798 * @skb: the packet
1799 * @pad: amount of padding at the start of the buffer
1801 * Process an ingress ethernet pakcet and deliver it to the stack.
1802 * The padding is 2 if the packet was delivered in an Rx buffer and 0
1803 * if it was immediate data in a response.
1805 static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
1806 struct sk_buff *skb, int pad)
1808 struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
1809 struct port_info *pi;
1811 skb_pull(skb, sizeof(*p) + pad);
1812 skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
1813 skb->dev->last_rx = jiffies;
1814 pi = netdev_priv(skb->dev);
1815 if (pi->rx_csum_offload && p->csum_valid && p->csum == 0xffff &&
1816 !p->fragment) {
1817 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
1818 skb->ip_summed = CHECKSUM_UNNECESSARY;
1819 } else
1820 skb->ip_summed = CHECKSUM_NONE;
1822 if (unlikely(p->vlan_valid)) {
1823 struct vlan_group *grp = pi->vlan_grp;
1825 rspq_to_qset(rq)->port_stats[SGE_PSTAT_VLANEX]++;
1826 if (likely(grp))
1827 __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
1828 rq->polling);
1829 else
1830 dev_kfree_skb_any(skb);
1831 } else if (rq->polling)
1832 netif_receive_skb(skb);
1833 else
1834 netif_rx(skb);
1838 * handle_rsp_cntrl_info - handles control information in a response
1839 * @qs: the queue set corresponding to the response
1840 * @flags: the response control flags
1842 * Handles the control information of an SGE response, such as GTS
1843 * indications and completion credits for the queue set's Tx queues.
1844 * HW coalesces credits, we don't do any extra SW coalescing.
1846 static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
1848 unsigned int credits;
1850 #if USE_GTS
1851 if (flags & F_RSPD_TXQ0_GTS)
1852 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
1853 #endif
1855 credits = G_RSPD_TXQ0_CR(flags);
1856 if (credits)
1857 qs->txq[TXQ_ETH].processed += credits;
1859 credits = G_RSPD_TXQ2_CR(flags);
1860 if (credits)
1861 qs->txq[TXQ_CTRL].processed += credits;
1863 # if USE_GTS
1864 if (flags & F_RSPD_TXQ1_GTS)
1865 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
1866 # endif
1867 credits = G_RSPD_TXQ1_CR(flags);
1868 if (credits)
1869 qs->txq[TXQ_OFLD].processed += credits;
1873 * check_ring_db - check if we need to ring any doorbells
1874 * @adapter: the adapter
1875 * @qs: the queue set whose Tx queues are to be examined
1876 * @sleeping: indicates which Tx queue sent GTS
1878 * Checks if some of a queue set's Tx queues need to ring their doorbells
1879 * to resume transmission after idling while they still have unprocessed
1880 * descriptors.
1882 static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
1883 unsigned int sleeping)
1885 if (sleeping & F_RSPD_TXQ0_GTS) {
1886 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1888 if (txq->cleaned + txq->in_use != txq->processed &&
1889 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
1890 set_bit(TXQ_RUNNING, &txq->flags);
1891 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
1892 V_EGRCNTX(txq->cntxt_id));
1896 if (sleeping & F_RSPD_TXQ1_GTS) {
1897 struct sge_txq *txq = &qs->txq[TXQ_OFLD];
1899 if (txq->cleaned + txq->in_use != txq->processed &&
1900 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
1901 set_bit(TXQ_RUNNING, &txq->flags);
1902 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
1903 V_EGRCNTX(txq->cntxt_id));
1909 * is_new_response - check if a response is newly written
1910 * @r: the response descriptor
1911 * @q: the response queue
1913 * Returns true if a response descriptor contains a yet unprocessed
1914 * response.
1916 static inline int is_new_response(const struct rsp_desc *r,
1917 const struct sge_rspq *q)
1919 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1922 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1923 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1924 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1925 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1926 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1928 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1929 #define NOMEM_INTR_DELAY 2500
1932 * process_responses - process responses from an SGE response queue
1933 * @adap: the adapter
1934 * @qs: the queue set to which the response queue belongs
1935 * @budget: how many responses can be processed in this round
1937 * Process responses from an SGE response queue up to the supplied budget.
1938 * Responses include received packets as well as credits and other events
1939 * for the queues that belong to the response queue's queue set.
1940 * A negative budget is effectively unlimited.
1942 * Additionally choose the interrupt holdoff time for the next interrupt
1943 * on this queue. If the system is under memory shortage use a fairly
1944 * long delay to help recovery.
1946 static int process_responses(struct adapter *adap, struct sge_qset *qs,
1947 int budget)
1949 struct sge_rspq *q = &qs->rspq;
1950 struct rsp_desc *r = &q->desc[q->cidx];
1951 int budget_left = budget;
1952 unsigned int sleeping = 0;
1953 struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
1954 int ngathered = 0;
1956 q->next_holdoff = q->holdoff_tmr;
1958 while (likely(budget_left && is_new_response(r, q))) {
1959 int eth, ethpad = 2;
1960 struct sk_buff *skb = NULL;
1961 u32 len, flags = ntohl(r->flags);
1962 u32 rss_hi = *(const u32 *)r, rss_lo = r->rss_hdr.rss_hash_val;
1964 eth = r->rss_hdr.opcode == CPL_RX_PKT;
1966 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
1967 skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
1968 if (!skb)
1969 goto no_mem;
1971 memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
1972 skb->data[0] = CPL_ASYNC_NOTIF;
1973 rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
1974 q->async_notif++;
1975 } else if (flags & F_RSPD_IMM_DATA_VALID) {
1976 skb = get_imm_packet(r);
1977 if (unlikely(!skb)) {
1978 no_mem:
1979 q->next_holdoff = NOMEM_INTR_DELAY;
1980 q->nomem++;
1981 /* consume one credit since we tried */
1982 budget_left--;
1983 break;
1985 q->imm_data++;
1986 ethpad = 0;
1987 } else if ((len = ntohl(r->len_cq)) != 0) {
1988 struct sge_fl *fl;
1990 fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
1991 if (fl->use_pages) {
1992 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
1994 prefetch(addr);
1995 #if L1_CACHE_BYTES < 128
1996 prefetch(addr + L1_CACHE_BYTES);
1997 #endif
1998 __refill_fl(adap, fl);
2000 skb = get_packet_pg(adap, fl, G_RSPD_LEN(len),
2001 eth ? SGE_RX_DROP_THRES : 0);
2002 } else
2003 skb = get_packet(adap, fl, G_RSPD_LEN(len),
2004 eth ? SGE_RX_DROP_THRES : 0);
2005 if (unlikely(!skb)) {
2006 if (!eth)
2007 goto no_mem;
2008 q->rx_drops++;
2009 } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2010 __skb_pull(skb, 2);
2012 if (++fl->cidx == fl->size)
2013 fl->cidx = 0;
2014 } else
2015 q->pure_rsps++;
2017 if (flags & RSPD_CTRL_MASK) {
2018 sleeping |= flags & RSPD_GTS_MASK;
2019 handle_rsp_cntrl_info(qs, flags);
2022 r++;
2023 if (unlikely(++q->cidx == q->size)) {
2024 q->cidx = 0;
2025 q->gen ^= 1;
2026 r = q->desc;
2028 prefetch(r);
2030 if (++q->credits >= (q->size / 4)) {
2031 refill_rspq(adap, q, q->credits);
2032 q->credits = 0;
2035 if (likely(skb != NULL)) {
2036 if (eth)
2037 rx_eth(adap, q, skb, ethpad);
2038 else {
2039 /* Preserve the RSS info in csum & priority */
2040 skb->csum = rss_hi;
2041 skb->priority = rss_lo;
2042 ngathered = rx_offload(&adap->tdev, q, skb,
2043 offload_skbs,
2044 ngathered);
2047 --budget_left;
2050 deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2051 if (sleeping)
2052 check_ring_db(adap, qs, sleeping);
2054 smp_mb(); /* commit Tx queue .processed updates */
2055 if (unlikely(qs->txq_stopped != 0))
2056 restart_tx(qs);
2058 budget -= budget_left;
2059 return budget;
2062 static inline int is_pure_response(const struct rsp_desc *r)
2064 u32 n = ntohl(r->flags) & (F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2066 return (n | r->len_cq) == 0;
2070 * napi_rx_handler - the NAPI handler for Rx processing
2071 * @dev: the net device
2072 * @budget: how many packets we can process in this round
2074 * Handler for new data events when using NAPI.
2076 static int napi_rx_handler(struct net_device *dev, int *budget)
2078 struct adapter *adap = dev->priv;
2079 struct sge_qset *qs = dev2qset(dev);
2080 int effective_budget = min(*budget, dev->quota);
2082 int work_done = process_responses(adap, qs, effective_budget);
2083 *budget -= work_done;
2084 dev->quota -= work_done;
2086 if (work_done >= effective_budget)
2087 return 1;
2089 netif_rx_complete(dev);
2092 * Because we don't atomically flush the following write it is
2093 * possible that in very rare cases it can reach the device in a way
2094 * that races with a new response being written plus an error interrupt
2095 * causing the NAPI interrupt handler below to return unhandled status
2096 * to the OS. To protect against this would require flushing the write
2097 * and doing both the write and the flush with interrupts off. Way too
2098 * expensive and unjustifiable given the rarity of the race.
2100 * The race cannot happen at all with MSI-X.
2102 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2103 V_NEWTIMER(qs->rspq.next_holdoff) |
2104 V_NEWINDEX(qs->rspq.cidx));
2105 return 0;
2109 * Returns true if the device is already scheduled for polling.
2111 static inline int napi_is_scheduled(struct net_device *dev)
2113 return test_bit(__LINK_STATE_RX_SCHED, &dev->state);
2117 * process_pure_responses - process pure responses from a response queue
2118 * @adap: the adapter
2119 * @qs: the queue set owning the response queue
2120 * @r: the first pure response to process
2122 * A simpler version of process_responses() that handles only pure (i.e.,
2123 * non data-carrying) responses. Such respones are too light-weight to
2124 * justify calling a softirq under NAPI, so we handle them specially in
2125 * the interrupt handler. The function is called with a pointer to a
2126 * response, which the caller must ensure is a valid pure response.
2128 * Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2130 static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2131 struct rsp_desc *r)
2133 struct sge_rspq *q = &qs->rspq;
2134 unsigned int sleeping = 0;
2136 do {
2137 u32 flags = ntohl(r->flags);
2139 r++;
2140 if (unlikely(++q->cidx == q->size)) {
2141 q->cidx = 0;
2142 q->gen ^= 1;
2143 r = q->desc;
2145 prefetch(r);
2147 if (flags & RSPD_CTRL_MASK) {
2148 sleeping |= flags & RSPD_GTS_MASK;
2149 handle_rsp_cntrl_info(qs, flags);
2152 q->pure_rsps++;
2153 if (++q->credits >= (q->size / 4)) {
2154 refill_rspq(adap, q, q->credits);
2155 q->credits = 0;
2157 } while (is_new_response(r, q) && is_pure_response(r));
2159 if (sleeping)
2160 check_ring_db(adap, qs, sleeping);
2162 smp_mb(); /* commit Tx queue .processed updates */
2163 if (unlikely(qs->txq_stopped != 0))
2164 restart_tx(qs);
2166 return is_new_response(r, q);
2170 * handle_responses - decide what to do with new responses in NAPI mode
2171 * @adap: the adapter
2172 * @q: the response queue
2174 * This is used by the NAPI interrupt handlers to decide what to do with
2175 * new SGE responses. If there are no new responses it returns -1. If
2176 * there are new responses and they are pure (i.e., non-data carrying)
2177 * it handles them straight in hard interrupt context as they are very
2178 * cheap and don't deliver any packets. Finally, if there are any data
2179 * signaling responses it schedules the NAPI handler. Returns 1 if it
2180 * schedules NAPI, 0 if all new responses were pure.
2182 * The caller must ascertain NAPI is not already running.
2184 static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2186 struct sge_qset *qs = rspq_to_qset(q);
2187 struct rsp_desc *r = &q->desc[q->cidx];
2189 if (!is_new_response(r, q))
2190 return -1;
2191 if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2192 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2193 V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2194 return 0;
2196 if (likely(__netif_rx_schedule_prep(qs->netdev)))
2197 __netif_rx_schedule(qs->netdev);
2198 return 1;
2202 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2203 * (i.e., response queue serviced in hard interrupt).
2205 irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2207 struct sge_qset *qs = cookie;
2208 struct adapter *adap = qs->netdev->priv;
2209 struct sge_rspq *q = &qs->rspq;
2211 spin_lock(&q->lock);
2212 if (process_responses(adap, qs, -1) == 0)
2213 q->unhandled_irqs++;
2214 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2215 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2216 spin_unlock(&q->lock);
2217 return IRQ_HANDLED;
2221 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2222 * (i.e., response queue serviced by NAPI polling).
2224 irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2226 struct sge_qset *qs = cookie;
2227 struct adapter *adap = qs->netdev->priv;
2228 struct sge_rspq *q = &qs->rspq;
2230 spin_lock(&q->lock);
2232 if (handle_responses(adap, q) < 0)
2233 q->unhandled_irqs++;
2234 spin_unlock(&q->lock);
2235 return IRQ_HANDLED;
2239 * The non-NAPI MSI interrupt handler. This needs to handle data events from
2240 * SGE response queues as well as error and other async events as they all use
2241 * the same MSI vector. We use one SGE response queue per port in this mode
2242 * and protect all response queues with queue 0's lock.
2244 static irqreturn_t t3_intr_msi(int irq, void *cookie)
2246 int new_packets = 0;
2247 struct adapter *adap = cookie;
2248 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2250 spin_lock(&q->lock);
2252 if (process_responses(adap, &adap->sge.qs[0], -1)) {
2253 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2254 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2255 new_packets = 1;
2258 if (adap->params.nports == 2 &&
2259 process_responses(adap, &adap->sge.qs[1], -1)) {
2260 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2262 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2263 V_NEWTIMER(q1->next_holdoff) |
2264 V_NEWINDEX(q1->cidx));
2265 new_packets = 1;
2268 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2269 q->unhandled_irqs++;
2271 spin_unlock(&q->lock);
2272 return IRQ_HANDLED;
2275 static int rspq_check_napi(struct net_device *dev, struct sge_rspq *q)
2277 if (!napi_is_scheduled(dev) && is_new_response(&q->desc[q->cidx], q)) {
2278 if (likely(__netif_rx_schedule_prep(dev)))
2279 __netif_rx_schedule(dev);
2280 return 1;
2282 return 0;
2286 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2287 * by NAPI polling). Handles data events from SGE response queues as well as
2288 * error and other async events as they all use the same MSI vector. We use
2289 * one SGE response queue per port in this mode and protect all response
2290 * queues with queue 0's lock.
2292 irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2294 int new_packets;
2295 struct adapter *adap = cookie;
2296 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2298 spin_lock(&q->lock);
2300 new_packets = rspq_check_napi(adap->sge.qs[0].netdev, q);
2301 if (adap->params.nports == 2)
2302 new_packets += rspq_check_napi(adap->sge.qs[1].netdev,
2303 &adap->sge.qs[1].rspq);
2304 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2305 q->unhandled_irqs++;
2307 spin_unlock(&q->lock);
2308 return IRQ_HANDLED;
2312 * A helper function that processes responses and issues GTS.
2314 static inline int process_responses_gts(struct adapter *adap,
2315 struct sge_rspq *rq)
2317 int work;
2319 work = process_responses(adap, rspq_to_qset(rq), -1);
2320 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2321 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2322 return work;
2326 * The legacy INTx interrupt handler. This needs to handle data events from
2327 * SGE response queues as well as error and other async events as they all use
2328 * the same interrupt pin. We use one SGE response queue per port in this mode
2329 * and protect all response queues with queue 0's lock.
2331 static irqreturn_t t3_intr(int irq, void *cookie)
2333 int work_done, w0, w1;
2334 struct adapter *adap = cookie;
2335 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2336 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2338 spin_lock(&q0->lock);
2340 w0 = is_new_response(&q0->desc[q0->cidx], q0);
2341 w1 = adap->params.nports == 2 &&
2342 is_new_response(&q1->desc[q1->cidx], q1);
2344 if (likely(w0 | w1)) {
2345 t3_write_reg(adap, A_PL_CLI, 0);
2346 t3_read_reg(adap, A_PL_CLI); /* flush */
2348 if (likely(w0))
2349 process_responses_gts(adap, q0);
2351 if (w1)
2352 process_responses_gts(adap, q1);
2354 work_done = w0 | w1;
2355 } else
2356 work_done = t3_slow_intr_handler(adap);
2358 spin_unlock(&q0->lock);
2359 return IRQ_RETVAL(work_done != 0);
2363 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2364 * Handles data events from SGE response queues as well as error and other
2365 * async events as they all use the same interrupt pin. We use one SGE
2366 * response queue per port in this mode and protect all response queues with
2367 * queue 0's lock.
2369 static irqreturn_t t3b_intr(int irq, void *cookie)
2371 u32 map;
2372 struct adapter *adap = cookie;
2373 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2375 t3_write_reg(adap, A_PL_CLI, 0);
2376 map = t3_read_reg(adap, A_SG_DATA_INTR);
2378 if (unlikely(!map)) /* shared interrupt, most likely */
2379 return IRQ_NONE;
2381 spin_lock(&q0->lock);
2383 if (unlikely(map & F_ERRINTR))
2384 t3_slow_intr_handler(adap);
2386 if (likely(map & 1))
2387 process_responses_gts(adap, q0);
2389 if (map & 2)
2390 process_responses_gts(adap, &adap->sge.qs[1].rspq);
2392 spin_unlock(&q0->lock);
2393 return IRQ_HANDLED;
2397 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2398 * Handles data events from SGE response queues as well as error and other
2399 * async events as they all use the same interrupt pin. We use one SGE
2400 * response queue per port in this mode and protect all response queues with
2401 * queue 0's lock.
2403 static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2405 u32 map;
2406 struct net_device *dev;
2407 struct adapter *adap = cookie;
2408 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2410 t3_write_reg(adap, A_PL_CLI, 0);
2411 map = t3_read_reg(adap, A_SG_DATA_INTR);
2413 if (unlikely(!map)) /* shared interrupt, most likely */
2414 return IRQ_NONE;
2416 spin_lock(&q0->lock);
2418 if (unlikely(map & F_ERRINTR))
2419 t3_slow_intr_handler(adap);
2421 if (likely(map & 1)) {
2422 dev = adap->sge.qs[0].netdev;
2424 if (likely(__netif_rx_schedule_prep(dev)))
2425 __netif_rx_schedule(dev);
2427 if (map & 2) {
2428 dev = adap->sge.qs[1].netdev;
2430 if (likely(__netif_rx_schedule_prep(dev)))
2431 __netif_rx_schedule(dev);
2434 spin_unlock(&q0->lock);
2435 return IRQ_HANDLED;
2439 * t3_intr_handler - select the top-level interrupt handler
2440 * @adap: the adapter
2441 * @polling: whether using NAPI to service response queues
2443 * Selects the top-level interrupt handler based on the type of interrupts
2444 * (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2445 * response queues.
2447 intr_handler_t t3_intr_handler(struct adapter *adap, int polling)
2449 if (adap->flags & USING_MSIX)
2450 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2451 if (adap->flags & USING_MSI)
2452 return polling ? t3_intr_msi_napi : t3_intr_msi;
2453 if (adap->params.rev > 0)
2454 return polling ? t3b_intr_napi : t3b_intr;
2455 return t3_intr;
2459 * t3_sge_err_intr_handler - SGE async event interrupt handler
2460 * @adapter: the adapter
2462 * Interrupt handler for SGE asynchronous (non-data) events.
2464 void t3_sge_err_intr_handler(struct adapter *adapter)
2466 unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE);
2468 if (status & F_RSPQCREDITOVERFOW)
2469 CH_ALERT(adapter, "SGE response queue credit overflow\n");
2471 if (status & F_RSPQDISABLED) {
2472 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2474 CH_ALERT(adapter,
2475 "packet delivered to disabled response queue "
2476 "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2479 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2480 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
2481 t3_fatal_err(adapter);
2485 * sge_timer_cb - perform periodic maintenance of an SGE qset
2486 * @data: the SGE queue set to maintain
2488 * Runs periodically from a timer to perform maintenance of an SGE queue
2489 * set. It performs two tasks:
2491 * a) Cleans up any completed Tx descriptors that may still be pending.
2492 * Normal descriptor cleanup happens when new packets are added to a Tx
2493 * queue so this timer is relatively infrequent and does any cleanup only
2494 * if the Tx queue has not seen any new packets in a while. We make a
2495 * best effort attempt to reclaim descriptors, in that we don't wait
2496 * around if we cannot get a queue's lock (which most likely is because
2497 * someone else is queueing new packets and so will also handle the clean
2498 * up). Since control queues use immediate data exclusively we don't
2499 * bother cleaning them up here.
2501 * b) Replenishes Rx queues that have run out due to memory shortage.
2502 * Normally new Rx buffers are added when existing ones are consumed but
2503 * when out of memory a queue can become empty. We try to add only a few
2504 * buffers here, the queue will be replenished fully as these new buffers
2505 * are used up if memory shortage has subsided.
2507 static void sge_timer_cb(unsigned long data)
2509 spinlock_t *lock;
2510 struct sge_qset *qs = (struct sge_qset *)data;
2511 struct adapter *adap = qs->netdev->priv;
2513 if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
2514 reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]);
2515 spin_unlock(&qs->txq[TXQ_ETH].lock);
2517 if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2518 reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD]);
2519 spin_unlock(&qs->txq[TXQ_OFLD].lock);
2521 lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock :
2522 &adap->sge.qs[0].rspq.lock;
2523 if (spin_trylock_irq(lock)) {
2524 if (!napi_is_scheduled(qs->netdev)) {
2525 u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2527 if (qs->fl[0].credits < qs->fl[0].size)
2528 __refill_fl(adap, &qs->fl[0]);
2529 if (qs->fl[1].credits < qs->fl[1].size)
2530 __refill_fl(adap, &qs->fl[1]);
2532 if (status & (1 << qs->rspq.cntxt_id)) {
2533 qs->rspq.starved++;
2534 if (qs->rspq.credits) {
2535 refill_rspq(adap, &qs->rspq, 1);
2536 qs->rspq.credits--;
2537 qs->rspq.restarted++;
2538 t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
2539 1 << qs->rspq.cntxt_id);
2543 spin_unlock_irq(lock);
2545 mod_timer(&qs->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2549 * t3_update_qset_coalesce - update coalescing settings for a queue set
2550 * @qs: the SGE queue set
2551 * @p: new queue set parameters
2553 * Update the coalescing settings for an SGE queue set. Nothing is done
2554 * if the queue set is not initialized yet.
2556 void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2558 if (!qs->netdev)
2559 return;
2561 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2562 qs->rspq.polling = p->polling;
2563 qs->netdev->poll = p->polling ? napi_rx_handler : ofld_poll;
2567 * t3_sge_alloc_qset - initialize an SGE queue set
2568 * @adapter: the adapter
2569 * @id: the queue set id
2570 * @nports: how many Ethernet ports will be using this queue set
2571 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2572 * @p: configuration parameters for this queue set
2573 * @ntxq: number of Tx queues for the queue set
2574 * @netdev: net device associated with this queue set
2576 * Allocate resources and initialize an SGE queue set. A queue set
2577 * comprises a response queue, two Rx free-buffer queues, and up to 3
2578 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2579 * queue, offload queue, and control queue.
2581 int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2582 int irq_vec_idx, const struct qset_params *p,
2583 int ntxq, struct net_device *netdev)
2585 int i, ret = -ENOMEM;
2586 struct sge_qset *q = &adapter->sge.qs[id];
2588 init_qset_cntxt(q, id);
2589 init_timer(&q->tx_reclaim_timer);
2590 q->tx_reclaim_timer.data = (unsigned long)q;
2591 q->tx_reclaim_timer.function = sge_timer_cb;
2593 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2594 sizeof(struct rx_desc),
2595 sizeof(struct rx_sw_desc),
2596 &q->fl[0].phys_addr, &q->fl[0].sdesc);
2597 if (!q->fl[0].desc)
2598 goto err;
2600 q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2601 sizeof(struct rx_desc),
2602 sizeof(struct rx_sw_desc),
2603 &q->fl[1].phys_addr, &q->fl[1].sdesc);
2604 if (!q->fl[1].desc)
2605 goto err;
2607 q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2608 sizeof(struct rsp_desc), 0,
2609 &q->rspq.phys_addr, NULL);
2610 if (!q->rspq.desc)
2611 goto err;
2613 for (i = 0; i < ntxq; ++i) {
2615 * The control queue always uses immediate data so does not
2616 * need to keep track of any sk_buffs.
2618 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2620 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
2621 sizeof(struct tx_desc), sz,
2622 &q->txq[i].phys_addr,
2623 &q->txq[i].sdesc);
2624 if (!q->txq[i].desc)
2625 goto err;
2627 q->txq[i].gen = 1;
2628 q->txq[i].size = p->txq_size[i];
2629 spin_lock_init(&q->txq[i].lock);
2630 skb_queue_head_init(&q->txq[i].sendq);
2633 tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
2634 (unsigned long)q);
2635 tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
2636 (unsigned long)q);
2638 q->fl[0].gen = q->fl[1].gen = 1;
2639 q->fl[0].size = p->fl_size;
2640 q->fl[1].size = p->jumbo_size;
2642 q->rspq.gen = 1;
2643 q->rspq.size = p->rspq_size;
2644 spin_lock_init(&q->rspq.lock);
2646 q->txq[TXQ_ETH].stop_thres = nports *
2647 flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
2649 #if FL0_PG_CHUNK_SIZE > 0
2650 q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
2651 #else
2652 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
2653 #endif
2654 q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
2655 q->fl[1].buf_size = is_offload(adapter) ?
2656 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
2657 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
2659 spin_lock(&adapter->sge.reg_lock);
2661 /* FL threshold comparison uses < */
2662 ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
2663 q->rspq.phys_addr, q->rspq.size,
2664 q->fl[0].buf_size, 1, 0);
2665 if (ret)
2666 goto err_unlock;
2668 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2669 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
2670 q->fl[i].phys_addr, q->fl[i].size,
2671 q->fl[i].buf_size, p->cong_thres, 1,
2673 if (ret)
2674 goto err_unlock;
2677 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2678 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2679 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2680 1, 0);
2681 if (ret)
2682 goto err_unlock;
2684 if (ntxq > 1) {
2685 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
2686 USE_GTS, SGE_CNTXT_OFLD, id,
2687 q->txq[TXQ_OFLD].phys_addr,
2688 q->txq[TXQ_OFLD].size, 0, 1, 0);
2689 if (ret)
2690 goto err_unlock;
2693 if (ntxq > 2) {
2694 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
2695 SGE_CNTXT_CTRL, id,
2696 q->txq[TXQ_CTRL].phys_addr,
2697 q->txq[TXQ_CTRL].size,
2698 q->txq[TXQ_CTRL].token, 1, 0);
2699 if (ret)
2700 goto err_unlock;
2703 spin_unlock(&adapter->sge.reg_lock);
2704 q->netdev = netdev;
2705 t3_update_qset_coalesce(q, p);
2708 * We use atalk_ptr as a backpointer to a qset. In case a device is
2709 * associated with multiple queue sets only the first one sets
2710 * atalk_ptr.
2712 if (netdev->atalk_ptr == NULL)
2713 netdev->atalk_ptr = q;
2715 refill_fl(adapter, &q->fl[0], q->fl[0].size, GFP_KERNEL);
2716 refill_fl(adapter, &q->fl[1], q->fl[1].size, GFP_KERNEL);
2717 refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
2719 t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2720 V_NEWTIMER(q->rspq.holdoff_tmr));
2722 mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2723 return 0;
2725 err_unlock:
2726 spin_unlock(&adapter->sge.reg_lock);
2727 err:
2728 t3_free_qset(adapter, q);
2729 return ret;
2733 * t3_free_sge_resources - free SGE resources
2734 * @adap: the adapter
2736 * Frees resources used by the SGE queue sets.
2738 void t3_free_sge_resources(struct adapter *adap)
2740 int i;
2742 for (i = 0; i < SGE_QSETS; ++i)
2743 t3_free_qset(adap, &adap->sge.qs[i]);
2747 * t3_sge_start - enable SGE
2748 * @adap: the adapter
2750 * Enables the SGE for DMAs. This is the last step in starting packet
2751 * transfers.
2753 void t3_sge_start(struct adapter *adap)
2755 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2759 * t3_sge_stop - disable SGE operation
2760 * @adap: the adapter
2762 * Disables the DMA engine. This can be called in emeregencies (e.g.,
2763 * from error interrupts) or from normal process context. In the latter
2764 * case it also disables any pending queue restart tasklets. Note that
2765 * if it is called in interrupt context it cannot disable the restart
2766 * tasklets as it cannot wait, however the tasklets will have no effect
2767 * since the doorbells are disabled and the driver will call this again
2768 * later from process context, at which time the tasklets will be stopped
2769 * if they are still running.
2771 void t3_sge_stop(struct adapter *adap)
2773 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
2774 if (!in_interrupt()) {
2775 int i;
2777 for (i = 0; i < SGE_QSETS; ++i) {
2778 struct sge_qset *qs = &adap->sge.qs[i];
2780 tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
2781 tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
2787 * t3_sge_init - initialize SGE
2788 * @adap: the adapter
2789 * @p: the SGE parameters
2791 * Performs SGE initialization needed every time after a chip reset.
2792 * We do not initialize any of the queue sets here, instead the driver
2793 * top-level must request those individually. We also do not enable DMA
2794 * here, that should be done after the queues have been set up.
2796 void t3_sge_init(struct adapter *adap, struct sge_params *p)
2798 unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
2800 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
2801 F_CQCRDTCTRL |
2802 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
2803 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
2804 #if SGE_NUM_GENBITS == 1
2805 ctrl |= F_EGRGENCTRL;
2806 #endif
2807 if (adap->params.rev > 0) {
2808 if (!(adap->flags & (USING_MSIX | USING_MSI)))
2809 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
2810 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL;
2812 t3_write_reg(adap, A_SG_CONTROL, ctrl);
2813 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
2814 V_LORCQDRBTHRSH(512));
2815 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
2816 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
2817 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
2818 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000);
2819 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
2820 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
2821 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
2822 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
2823 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
2827 * t3_sge_prep - one-time SGE initialization
2828 * @adap: the associated adapter
2829 * @p: SGE parameters
2831 * Performs one-time initialization of SGE SW state. Includes determining
2832 * defaults for the assorted SGE parameters, which admins can change until
2833 * they are used to initialize the SGE.
2835 void __devinit t3_sge_prep(struct adapter *adap, struct sge_params *p)
2837 int i;
2839 p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
2840 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
2842 for (i = 0; i < SGE_QSETS; ++i) {
2843 struct qset_params *q = p->qset + i;
2845 q->polling = adap->params.rev > 0;
2846 q->coalesce_usecs = 5;
2847 q->rspq_size = 1024;
2848 q->fl_size = 1024;
2849 q->jumbo_size = 512;
2850 q->txq_size[TXQ_ETH] = 1024;
2851 q->txq_size[TXQ_OFLD] = 1024;
2852 q->txq_size[TXQ_CTRL] = 256;
2853 q->cong_thres = 0;
2856 spin_lock_init(&adap->sge.reg_lock);
2860 * t3_get_desc - dump an SGE descriptor for debugging purposes
2861 * @qs: the queue set
2862 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
2863 * @idx: the descriptor index in the queue
2864 * @data: where to dump the descriptor contents
2866 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
2867 * size of the descriptor.
2869 int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
2870 unsigned char *data)
2872 if (qnum >= 6)
2873 return -EINVAL;
2875 if (qnum < 3) {
2876 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
2877 return -EINVAL;
2878 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
2879 return sizeof(struct tx_desc);
2882 if (qnum == 3) {
2883 if (!qs->rspq.desc || idx >= qs->rspq.size)
2884 return -EINVAL;
2885 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
2886 return sizeof(struct rsp_desc);
2889 qnum -= 4;
2890 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
2891 return -EINVAL;
2892 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
2893 return sizeof(struct rx_desc);