2 * Copyright (c) 2007, 2008, 2009 QLogic Corporation. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/types.h>
34 #include <linux/device.h>
35 #include <linux/dmapool.h>
36 #include <linux/slab.h>
37 #include <linux/list.h>
38 #include <linux/highmem.h>
40 #include <linux/uio.h>
41 #include <linux/rbtree.h>
42 #include <linux/spinlock.h>
43 #include <linux/delay.h>
46 #include "qib_user_sdma.h"
48 /* minimum size of header */
49 #define QIB_USER_SDMA_MIN_HEADER_LENGTH 64
50 /* expected size of headers (for dma_pool) */
51 #define QIB_USER_SDMA_EXP_HEADER_LENGTH 64
52 /* attempt to drain the queue for 5secs */
53 #define QIB_USER_SDMA_DRAIN_TIMEOUT 500
55 struct qib_user_sdma_pkt
{
56 struct list_head list
; /* list element */
58 u8 tiddma
; /* if this is NEW tid-sdma */
59 u8 largepkt
; /* this is large pkt from kmalloc */
60 u16 frag_size
; /* frag size used by PSM */
61 u16 index
; /* last header index or push index */
62 u16 naddr
; /* dimension of addr (1..3) ... */
63 u16 addrlimit
; /* addr array size */
64 u16 tidsmidx
; /* current tidsm index */
65 u16 tidsmcount
; /* tidsm array item count */
66 u16 payload_size
; /* payload size so far for header */
67 u32 bytes_togo
; /* bytes for processing */
68 u32 counter
; /* sdma pkts queued counter for this entry */
69 struct qib_tid_session_member
*tidsm
; /* tid session member array */
70 struct qib_user_sdma_queue
*pq
; /* which pq this pkt belongs to */
71 u64 added
; /* global descq number of entries */
74 u16 offset
; /* offset for kvaddr, addr */
75 u16 length
; /* length in page */
76 u16 first_desc
; /* first desc */
77 u16 last_desc
; /* last desc */
78 u16 put_page
; /* should we put_page? */
79 u16 dma_mapped
; /* is page dma_mapped? */
80 u16 dma_length
; /* for dma_unmap_page() */
82 struct page
*page
; /* may be NULL (coherent mem) */
83 void *kvaddr
; /* FIXME: only for pio hack */
85 } addr
[4]; /* max pages, any more and we coalesce */
88 struct qib_user_sdma_queue
{
90 * pkts sent to dma engine are queued on this
91 * list head. the type of the elements of this
92 * list are struct qib_user_sdma_pkt...
94 struct list_head sent
;
97 * Because above list will be accessed by both process and
98 * signal handler, we need a spinlock for it.
100 spinlock_t sent_lock ____cacheline_aligned_in_smp
;
102 /* headers with expected length are allocated from here... */
103 char header_cache_name
[64];
104 struct dma_pool
*header_cache
;
106 /* packets are allocated from the slab cache... */
107 char pkt_slab_name
[64];
108 struct kmem_cache
*pkt_slab
;
110 /* as packets go on the queued queue, they are counted... */
113 /* pending packets, not sending yet */
115 /* sending packets, not complete yet */
117 /* global descq number of entry of last sending packet */
121 struct rb_root dma_pages_root
;
123 /* protect everything above... */
127 struct qib_user_sdma_queue
*
128 qib_user_sdma_queue_create(struct device
*dev
, int unit
, int ctxt
, int sctxt
)
130 struct qib_user_sdma_queue
*pq
=
131 kmalloc(sizeof(struct qib_user_sdma_queue
), GFP_KERNEL
);
137 pq
->sent_counter
= 0;
142 INIT_LIST_HEAD(&pq
->sent
);
143 spin_lock_init(&pq
->sent_lock
);
144 mutex_init(&pq
->lock
);
146 snprintf(pq
->pkt_slab_name
, sizeof(pq
->pkt_slab_name
),
147 "qib-user-sdma-pkts-%u-%02u.%02u", unit
, ctxt
, sctxt
);
148 pq
->pkt_slab
= kmem_cache_create(pq
->pkt_slab_name
,
149 sizeof(struct qib_user_sdma_pkt
),
155 snprintf(pq
->header_cache_name
, sizeof(pq
->header_cache_name
),
156 "qib-user-sdma-headers-%u-%02u.%02u", unit
, ctxt
, sctxt
);
157 pq
->header_cache
= dma_pool_create(pq
->header_cache_name
,
159 QIB_USER_SDMA_EXP_HEADER_LENGTH
,
161 if (!pq
->header_cache
)
164 pq
->dma_pages_root
= RB_ROOT
;
169 kmem_cache_destroy(pq
->pkt_slab
);
178 static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt
*pkt
,
179 int i
, u16 offset
, u16 len
,
180 u16 first_desc
, u16 last_desc
,
181 u16 put_page
, u16 dma_mapped
,
182 struct page
*page
, void *kvaddr
,
183 dma_addr_t dma_addr
, u16 dma_length
)
185 pkt
->addr
[i
].offset
= offset
;
186 pkt
->addr
[i
].length
= len
;
187 pkt
->addr
[i
].first_desc
= first_desc
;
188 pkt
->addr
[i
].last_desc
= last_desc
;
189 pkt
->addr
[i
].put_page
= put_page
;
190 pkt
->addr
[i
].dma_mapped
= dma_mapped
;
191 pkt
->addr
[i
].page
= page
;
192 pkt
->addr
[i
].kvaddr
= kvaddr
;
193 pkt
->addr
[i
].addr
= dma_addr
;
194 pkt
->addr
[i
].dma_length
= dma_length
;
197 static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue
*pq
,
198 size_t len
, dma_addr_t
*dma_addr
)
202 if (len
== QIB_USER_SDMA_EXP_HEADER_LENGTH
)
203 hdr
= dma_pool_alloc(pq
->header_cache
, GFP_KERNEL
,
209 hdr
= kmalloc(len
, GFP_KERNEL
);
219 static int qib_user_sdma_page_to_frags(const struct qib_devdata
*dd
,
220 struct qib_user_sdma_queue
*pq
,
221 struct qib_user_sdma_pkt
*pkt
,
222 struct page
*page
, u16 put
,
223 u16 offset
, u16 len
, void *kvaddr
)
227 struct qib_message_header
*hdr
;
228 u16 newlen
, pbclen
, lastdesc
, dma_mapped
;
230 union qib_seqnum seqnum
;
232 dma_addr_t dma_addr
=
233 dma_map_page(&dd
->pcidev
->dev
,
234 page
, offset
, len
, DMA_TO_DEVICE
);
237 if (dma_mapping_error(&dd
->pcidev
->dev
, dma_addr
)) {
239 * dma mapping error, pkt has not managed
240 * this page yet, return the page here so
241 * the caller can ignore this page.
260 * In tid-sdma, the transfer length is restricted by
261 * receiver side current tid page length.
263 if (pkt
->tiddma
&& len
> pkt
->tidsm
[pkt
->tidsmidx
].length
)
264 newlen
= pkt
->tidsm
[pkt
->tidsmidx
].length
;
269 * Then the transfer length is restricted by MTU.
270 * the last descriptor flag is determined by:
271 * 1. the current packet is at frag size length.
272 * 2. the current tid page is done if tid-sdma.
273 * 3. there is no more byte togo if sdma.
276 if ((pkt
->payload_size
+ newlen
) >= pkt
->frag_size
) {
277 newlen
= pkt
->frag_size
- pkt
->payload_size
;
279 } else if (pkt
->tiddma
) {
280 if (newlen
== pkt
->tidsm
[pkt
->tidsmidx
].length
)
283 if (newlen
== pkt
->bytes_togo
)
287 /* fill the next fragment in this page */
288 qib_user_sdma_init_frag(pkt
, pkt
->naddr
, /* index */
289 offset
, newlen
, /* offset, len */
290 0, lastdesc
, /* first last desc */
291 put
, dma_mapped
, /* put page, dma mapped */
292 page
, kvaddr
, /* struct page, virt addr */
293 dma_addr
, len
); /* dma addr, dma length */
294 pkt
->bytes_togo
-= newlen
;
295 pkt
->payload_size
+= newlen
;
297 if (pkt
->naddr
== pkt
->addrlimit
) {
302 /* If there is no more byte togo. (lastdesc==1) */
303 if (pkt
->bytes_togo
== 0) {
304 /* The packet is done, header is not dma mapped yet.
305 * it should be from kmalloc */
306 if (!pkt
->addr
[pkt
->index
].addr
) {
307 pkt
->addr
[pkt
->index
].addr
=
308 dma_map_single(&dd
->pcidev
->dev
,
309 pkt
->addr
[pkt
->index
].kvaddr
,
310 pkt
->addr
[pkt
->index
].dma_length
,
312 if (dma_mapping_error(&dd
->pcidev
->dev
,
313 pkt
->addr
[pkt
->index
].addr
)) {
317 pkt
->addr
[pkt
->index
].dma_mapped
= 1;
323 /* If tid-sdma, advance tid info. */
325 pkt
->tidsm
[pkt
->tidsmidx
].length
-= newlen
;
326 if (pkt
->tidsm
[pkt
->tidsmidx
].length
) {
327 pkt
->tidsm
[pkt
->tidsmidx
].offset
+= newlen
;
330 if (pkt
->tidsmidx
== pkt
->tidsmcount
) {
338 * If this is NOT the last descriptor. (newlen==len)
339 * the current packet is not done yet, but the current
340 * send side page is done.
346 * If running this driver under PSM with message size
347 * fitting into one transfer unit, it is not possible
348 * to pass this line. otherwise, it is a buggggg.
352 * Since the current packet is done, and there are more
353 * bytes togo, we need to create a new sdma header, copying
354 * from previous sdma header and modify both.
356 pbclen
= pkt
->addr
[pkt
->index
].length
;
357 pbcvaddr
= qib_user_sdma_alloc_header(pq
, pbclen
, &pbcdaddr
);
362 /* Copy the previous sdma header to new sdma header */
363 pbc16
= (__le16
*)pkt
->addr
[pkt
->index
].kvaddr
;
364 memcpy(pbcvaddr
, pbc16
, pbclen
);
366 /* Modify the previous sdma header */
367 hdr
= (struct qib_message_header
*)&pbc16
[4];
370 pbc16
[0] = cpu_to_le16(le16_to_cpu(pbc16
[0])-(pkt
->bytes_togo
>>2));
372 /* New packet length */
373 hdr
->lrh
[2] = cpu_to_be16(le16_to_cpu(pbc16
[0]));
376 /* turn on the header suppression */
378 cpu_to_le16(le16_to_cpu(hdr
->iph
.pkt_flags
)|0x2);
379 /* turn off ACK_REQ: 0x04 and EXPECTED_DONE: 0x20 */
380 hdr
->flags
&= ~(0x04|0x20);
382 /* turn off extra bytes: 20-21 bits */
383 hdr
->bth
[0] = cpu_to_be32(be32_to_cpu(hdr
->bth
[0])&0xFFCFFFFF);
384 /* turn off ACK_REQ: 0x04 */
385 hdr
->flags
&= ~(0x04);
388 /* New kdeth checksum */
389 vcto
= le32_to_cpu(hdr
->iph
.ver_ctxt_tid_offset
);
390 hdr
->iph
.chksum
= cpu_to_le16(QIB_LRH_BTH
+
391 be16_to_cpu(hdr
->lrh
[2]) -
392 ((vcto
>>16)&0xFFFF) - (vcto
&0xFFFF) -
393 le16_to_cpu(hdr
->iph
.pkt_flags
));
395 /* The packet is done, header is not dma mapped yet.
396 * it should be from kmalloc */
397 if (!pkt
->addr
[pkt
->index
].addr
) {
398 pkt
->addr
[pkt
->index
].addr
=
399 dma_map_single(&dd
->pcidev
->dev
,
400 pkt
->addr
[pkt
->index
].kvaddr
,
401 pkt
->addr
[pkt
->index
].dma_length
,
403 if (dma_mapping_error(&dd
->pcidev
->dev
,
404 pkt
->addr
[pkt
->index
].addr
)) {
408 pkt
->addr
[pkt
->index
].dma_mapped
= 1;
411 /* Modify the new sdma header */
412 pbc16
= (__le16
*)pbcvaddr
;
413 hdr
= (struct qib_message_header
*)&pbc16
[4];
416 pbc16
[0] = cpu_to_le16(le16_to_cpu(pbc16
[0])-(pkt
->payload_size
>>2));
418 /* New packet length */
419 hdr
->lrh
[2] = cpu_to_be16(le16_to_cpu(pbc16
[0]));
422 /* Set new tid and offset for new sdma header */
423 hdr
->iph
.ver_ctxt_tid_offset
= cpu_to_le32(
424 (le32_to_cpu(hdr
->iph
.ver_ctxt_tid_offset
)&0xFF000000) +
425 (pkt
->tidsm
[pkt
->tidsmidx
].tid
<<QLOGIC_IB_I_TID_SHIFT
) +
426 (pkt
->tidsm
[pkt
->tidsmidx
].offset
>>2));
428 /* Middle protocol new packet offset */
429 hdr
->uwords
[2] += pkt
->payload_size
;
432 /* New kdeth checksum */
433 vcto
= le32_to_cpu(hdr
->iph
.ver_ctxt_tid_offset
);
434 hdr
->iph
.chksum
= cpu_to_le16(QIB_LRH_BTH
+
435 be16_to_cpu(hdr
->lrh
[2]) -
436 ((vcto
>>16)&0xFFFF) - (vcto
&0xFFFF) -
437 le16_to_cpu(hdr
->iph
.pkt_flags
));
439 /* Next sequence number in new sdma header */
440 seqnum
.val
= be32_to_cpu(hdr
->bth
[2]);
445 hdr
->bth
[2] = cpu_to_be32(seqnum
.val
);
447 /* Init new sdma header. */
448 qib_user_sdma_init_frag(pkt
, pkt
->naddr
, /* index */
449 0, pbclen
, /* offset, len */
450 1, 0, /* first last desc */
451 0, 0, /* put page, dma mapped */
452 NULL
, pbcvaddr
, /* struct page, virt addr */
453 pbcdaddr
, pbclen
); /* dma addr, dma length */
454 pkt
->index
= pkt
->naddr
;
455 pkt
->payload_size
= 0;
457 if (pkt
->naddr
== pkt
->addrlimit
) {
462 /* Prepare for next fragment in this page */
480 /* we've too many pages in the iovec, coalesce to a single page */
481 static int qib_user_sdma_coalesce(const struct qib_devdata
*dd
,
482 struct qib_user_sdma_queue
*pq
,
483 struct qib_user_sdma_pkt
*pkt
,
484 const struct iovec
*iov
,
488 struct page
*page
= alloc_page(GFP_KERNEL
);
501 for (i
= 0; i
< niov
; i
++) {
504 cfur
= copy_from_user(mpage
,
505 iov
[i
].iov_base
, iov
[i
].iov_len
);
511 mpage
+= iov
[i
].iov_len
;
512 len
+= iov
[i
].iov_len
;
515 ret
= qib_user_sdma_page_to_frags(dd
, pq
, pkt
,
516 page
, 0, 0, len
, mpage_save
);
527 * How many pages in this iovec element?
529 static int qib_user_sdma_num_pages(const struct iovec
*iov
)
531 const unsigned long addr
= (unsigned long) iov
->iov_base
;
532 const unsigned long len
= iov
->iov_len
;
533 const unsigned long spage
= addr
& PAGE_MASK
;
534 const unsigned long epage
= (addr
+ len
- 1) & PAGE_MASK
;
536 return 1 + ((epage
- spage
) >> PAGE_SHIFT
);
539 static void qib_user_sdma_free_pkt_frag(struct device
*dev
,
540 struct qib_user_sdma_queue
*pq
,
541 struct qib_user_sdma_pkt
*pkt
,
546 if (pkt
->addr
[i
].page
) {
547 /* only user data has page */
548 if (pkt
->addr
[i
].dma_mapped
)
551 pkt
->addr
[i
].dma_length
,
554 if (pkt
->addr
[i
].kvaddr
)
555 kunmap(pkt
->addr
[i
].page
);
557 if (pkt
->addr
[i
].put_page
)
558 put_page(pkt
->addr
[i
].page
);
560 __free_page(pkt
->addr
[i
].page
);
561 } else if (pkt
->addr
[i
].kvaddr
) {
563 if (pkt
->addr
[i
].dma_mapped
) {
564 /* from kmalloc & dma mapped */
565 dma_unmap_single(dev
,
567 pkt
->addr
[i
].dma_length
,
569 kfree(pkt
->addr
[i
].kvaddr
);
570 } else if (pkt
->addr
[i
].addr
) {
571 /* free coherent mem from cache... */
572 dma_pool_free(pq
->header_cache
,
573 pkt
->addr
[i
].kvaddr
, pkt
->addr
[i
].addr
);
575 /* from kmalloc but not dma mapped */
576 kfree(pkt
->addr
[i
].kvaddr
);
581 /* return number of pages pinned... */
582 static int qib_user_sdma_pin_pages(const struct qib_devdata
*dd
,
583 struct qib_user_sdma_queue
*pq
,
584 struct qib_user_sdma_pkt
*pkt
,
585 unsigned long addr
, int tlen
, int npages
)
587 struct page
*pages
[8];
597 ret
= get_user_pages_fast(addr
, j
, 0, pages
);
605 for (i
= 0; i
< j
; i
++) {
606 /* map the pages... */
607 unsigned long fofs
= addr
& ~PAGE_MASK
;
608 int flen
= ((fofs
+ tlen
) > PAGE_SIZE
) ?
609 (PAGE_SIZE
- fofs
) : tlen
;
611 ret
= qib_user_sdma_page_to_frags(dd
, pq
, pkt
,
612 pages
[i
], 1, fofs
, flen
, NULL
);
614 /* current page has beed taken
615 * care of inside above call.
630 /* if error, return all pages not managed by pkt */
633 put_page(pages
[i
++]);
639 static int qib_user_sdma_pin_pkt(const struct qib_devdata
*dd
,
640 struct qib_user_sdma_queue
*pq
,
641 struct qib_user_sdma_pkt
*pkt
,
642 const struct iovec
*iov
,
648 for (idx
= 0; idx
< niov
; idx
++) {
649 const int npages
= qib_user_sdma_num_pages(iov
+ idx
);
650 const unsigned long addr
= (unsigned long) iov
[idx
].iov_base
;
652 ret
= qib_user_sdma_pin_pages(dd
, pq
, pkt
, addr
,
653 iov
[idx
].iov_len
, npages
);
661 /* we need to ignore the first entry here */
662 for (idx
= 1; idx
< pkt
->naddr
; idx
++)
663 qib_user_sdma_free_pkt_frag(&dd
->pcidev
->dev
, pq
, pkt
, idx
);
665 /* need to dma unmap the first entry, this is to restore to
666 * the original state so that caller can free the memory in
667 * error condition. Caller does not know if dma mapped or not*/
668 if (pkt
->addr
[0].dma_mapped
) {
669 dma_unmap_single(&dd
->pcidev
->dev
,
671 pkt
->addr
[0].dma_length
,
673 pkt
->addr
[0].addr
= 0;
674 pkt
->addr
[0].dma_mapped
= 0;
681 static int qib_user_sdma_init_payload(const struct qib_devdata
*dd
,
682 struct qib_user_sdma_queue
*pq
,
683 struct qib_user_sdma_pkt
*pkt
,
684 const struct iovec
*iov
,
685 unsigned long niov
, int npages
)
689 if (pkt
->frag_size
== pkt
->bytes_togo
&&
690 npages
>= ARRAY_SIZE(pkt
->addr
))
691 ret
= qib_user_sdma_coalesce(dd
, pq
, pkt
, iov
, niov
);
693 ret
= qib_user_sdma_pin_pkt(dd
, pq
, pkt
, iov
, niov
);
698 /* free a packet list -- return counter value of last packet */
699 static void qib_user_sdma_free_pkt_list(struct device
*dev
,
700 struct qib_user_sdma_queue
*pq
,
701 struct list_head
*list
)
703 struct qib_user_sdma_pkt
*pkt
, *pkt_next
;
705 list_for_each_entry_safe(pkt
, pkt_next
, list
, list
) {
708 for (i
= 0; i
< pkt
->naddr
; i
++)
709 qib_user_sdma_free_pkt_frag(dev
, pq
, pkt
, i
);
714 kmem_cache_free(pq
->pkt_slab
, pkt
);
716 INIT_LIST_HEAD(list
);
720 * copy headers, coalesce etc -- pq->lock must be held
722 * we queue all the packets to list, returning the
723 * number of bytes total. list must be empty initially,
724 * as, if there is an error we clean it...
726 static int qib_user_sdma_queue_pkts(const struct qib_devdata
*dd
,
727 struct qib_pportdata
*ppd
,
728 struct qib_user_sdma_queue
*pq
,
729 const struct iovec
*iov
,
731 struct list_head
*list
,
732 int *maxpkts
, int *ndesc
)
734 unsigned long idx
= 0;
739 struct qib_user_sdma_pkt
*pkt
= NULL
;
742 u32 counter
= pq
->counter
;
745 while (idx
< niov
&& npkts
< *maxpkts
) {
746 const unsigned long addr
= (unsigned long) iov
[idx
].iov_base
;
747 const unsigned long idx_save
= idx
;
756 len
= iov
[idx
].iov_len
;
759 if (len
< QIB_USER_SDMA_MIN_HEADER_LENGTH
||
760 len
> PAGE_SIZE
|| len
& 3 || addr
& 3) {
765 pbc
= qib_user_sdma_alloc_header(pq
, len
, &dma_addr
);
771 cfur
= copy_from_user(pbc
, iov
[idx
].iov_base
, len
);
778 * This assignment is a bit strange. it's because the
779 * the pbc counts the number of 32 bit words in the full
780 * packet _except_ the first word of the pbc itself...
785 * pktnw computation yields the number of 32 bit words
786 * that the caller has indicated in the PBC. note that
787 * this is one less than the total number of words that
788 * goes to the send DMA engine as the first 32 bit word
789 * of the PBC itself is not counted. Armed with this count,
790 * we can verify that the packet is consistent with the
793 pktnw
= le32_to_cpu(*pbc
) & 0xFFFF;
794 if (pktnw
< pktnwc
) {
800 while (pktnwc
< pktnw
&& idx
< niov
) {
801 const size_t slen
= iov
[idx
].iov_len
;
802 const unsigned long faddr
=
803 (unsigned long) iov
[idx
].iov_base
;
805 if (slen
& 3 || faddr
& 3 || !slen
) {
810 npages
+= qib_user_sdma_num_pages(&iov
[idx
]);
818 if (pktnwc
!= pktnw
) {
823 frag_size
= ((le32_to_cpu(*pbc
))>>16) & 0xFFFF;
824 if (((frag_size
? frag_size
: bytes_togo
) + len
) >
831 int pktsize
, tidsmsize
, n
;
833 n
= npages
*((2*PAGE_SIZE
/frag_size
)+1);
834 pktsize
= sizeof(*pkt
) + sizeof(pkt
->addr
[0])*n
;
837 * Determine if this is tid-sdma or just sdma.
839 tiddma
= (((le32_to_cpu(pbc
[7])>>
840 QLOGIC_IB_I_TID_SHIFT
)&
841 QLOGIC_IB_I_TID_MASK
) !=
842 QLOGIC_IB_I_TID_MASK
);
845 tidsmsize
= iov
[idx
].iov_len
;
849 pkt
= kmalloc(pktsize
+tidsmsize
, GFP_KERNEL
);
855 pkt
->frag_size
= frag_size
;
856 pkt
->addrlimit
= n
+ ARRAY_SIZE(pkt
->addr
);
859 char *tidsm
= (char *)pkt
+ pktsize
;
860 cfur
= copy_from_user(tidsm
,
861 iov
[idx
].iov_base
, tidsmsize
);
867 (struct qib_tid_session_member
*)tidsm
;
868 pkt
->tidsmcount
= tidsmsize
/
869 sizeof(struct qib_tid_session_member
);
875 * pbc 'fill1' field is borrowed to pass frag size,
876 * we need to clear it after picking frag size, the
877 * hardware requires this field to be zero.
879 *pbc
= cpu_to_le32(le32_to_cpu(*pbc
) & 0x0000FFFF);
881 pkt
= kmem_cache_alloc(pq
->pkt_slab
, GFP_KERNEL
);
887 pkt
->frag_size
= bytes_togo
;
888 pkt
->addrlimit
= ARRAY_SIZE(pkt
->addr
);
890 pkt
->bytes_togo
= bytes_togo
;
891 pkt
->payload_size
= 0;
892 pkt
->counter
= counter
;
893 pkt
->tiddma
= tiddma
;
895 /* setup the first header */
896 qib_user_sdma_init_frag(pkt
, 0, /* index */
897 0, len
, /* offset, len */
898 1, 0, /* first last desc */
899 0, 0, /* put page, dma mapped */
900 NULL
, pbc
, /* struct page, virt addr */
901 dma_addr
, len
); /* dma addr, dma length */
906 ret
= qib_user_sdma_init_payload(dd
, pq
, pkt
,
912 /* since there is no payload, mark the
913 * header as the last desc. */
914 pkt
->addr
[0].last_desc
= 1;
918 * the header is not dma mapped yet.
919 * it should be from kmalloc.
921 dma_addr
= dma_map_single(&dd
->pcidev
->dev
,
922 pbc
, len
, DMA_TO_DEVICE
);
923 if (dma_mapping_error(&dd
->pcidev
->dev
,
928 pkt
->addr
[0].addr
= dma_addr
;
929 pkt
->addr
[0].dma_mapped
= 1;
936 pkt
->index
= 0; /* reset index for push on hw */
937 *ndesc
+= pkt
->naddr
;
939 list_add_tail(&pkt
->list
, list
);
950 kmem_cache_free(pq
->pkt_slab
, pkt
);
953 dma_pool_free(pq
->header_cache
, pbc
, dma_addr
);
957 qib_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, list
);
962 static void qib_user_sdma_set_complete_counter(struct qib_user_sdma_queue
*pq
,
965 pq
->sent_counter
= c
;
968 /* try to clean out queue -- needs pq->lock */
969 static int qib_user_sdma_queue_clean(struct qib_pportdata
*ppd
,
970 struct qib_user_sdma_queue
*pq
)
972 struct qib_devdata
*dd
= ppd
->dd
;
973 struct list_head free_list
;
974 struct qib_user_sdma_pkt
*pkt
;
975 struct qib_user_sdma_pkt
*pkt_prev
;
979 if (!pq
->num_sending
)
982 INIT_LIST_HEAD(&free_list
);
985 * We need this spin lock here because interrupt handler
986 * might modify this list in qib_user_sdma_send_desc(), also
987 * we can not get interrupted, otherwise it is a deadlock.
989 spin_lock_irqsave(&pq
->sent_lock
, flags
);
990 list_for_each_entry_safe(pkt
, pkt_prev
, &pq
->sent
, list
) {
991 s64 descd
= ppd
->sdma_descq_removed
- pkt
->added
;
996 list_move_tail(&pkt
->list
, &free_list
);
998 /* one more packet cleaned */
1002 spin_unlock_irqrestore(&pq
->sent_lock
, flags
);
1004 if (!list_empty(&free_list
)) {
1007 pkt
= list_entry(free_list
.prev
,
1008 struct qib_user_sdma_pkt
, list
);
1009 counter
= pkt
->counter
;
1011 qib_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, &free_list
);
1012 qib_user_sdma_set_complete_counter(pq
, counter
);
1018 void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue
*pq
)
1023 kmem_cache_destroy(pq
->pkt_slab
);
1024 dma_pool_destroy(pq
->header_cache
);
1028 /* clean descriptor queue, returns > 0 if some elements cleaned */
1029 static int qib_user_sdma_hwqueue_clean(struct qib_pportdata
*ppd
)
1032 unsigned long flags
;
1034 spin_lock_irqsave(&ppd
->sdma_lock
, flags
);
1035 ret
= qib_sdma_make_progress(ppd
);
1036 spin_unlock_irqrestore(&ppd
->sdma_lock
, flags
);
1041 /* we're in close, drain packets so that we can cleanup successfully... */
1042 void qib_user_sdma_queue_drain(struct qib_pportdata
*ppd
,
1043 struct qib_user_sdma_queue
*pq
)
1045 struct qib_devdata
*dd
= ppd
->dd
;
1046 unsigned long flags
;
1052 for (i
= 0; i
< QIB_USER_SDMA_DRAIN_TIMEOUT
; i
++) {
1053 mutex_lock(&pq
->lock
);
1054 if (!pq
->num_pending
&& !pq
->num_sending
) {
1055 mutex_unlock(&pq
->lock
);
1058 qib_user_sdma_hwqueue_clean(ppd
);
1059 qib_user_sdma_queue_clean(ppd
, pq
);
1060 mutex_unlock(&pq
->lock
);
1064 if (pq
->num_pending
|| pq
->num_sending
) {
1065 struct qib_user_sdma_pkt
*pkt
;
1066 struct qib_user_sdma_pkt
*pkt_prev
;
1067 struct list_head free_list
;
1069 mutex_lock(&pq
->lock
);
1070 spin_lock_irqsave(&ppd
->sdma_lock
, flags
);
1072 * Since we hold sdma_lock, it is safe without sent_lock.
1074 if (pq
->num_pending
) {
1075 list_for_each_entry_safe(pkt
, pkt_prev
,
1076 &ppd
->sdma_userpending
, list
) {
1077 if (pkt
->pq
== pq
) {
1078 list_move_tail(&pkt
->list
, &pq
->sent
);
1084 spin_unlock_irqrestore(&ppd
->sdma_lock
, flags
);
1086 qib_dev_err(dd
, "user sdma lists not empty: forcing!\n");
1087 INIT_LIST_HEAD(&free_list
);
1088 list_splice_init(&pq
->sent
, &free_list
);
1089 pq
->num_sending
= 0;
1090 qib_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, &free_list
);
1091 mutex_unlock(&pq
->lock
);
1095 static inline __le64
qib_sdma_make_desc0(u8 gen
,
1096 u64 addr
, u64 dwlen
, u64 dwoffset
)
1098 return cpu_to_le64(/* SDmaPhyAddr[31:0] */
1099 ((addr
& 0xfffffffcULL
) << 32) |
1100 /* SDmaGeneration[1:0] */
1101 ((gen
& 3ULL) << 30) |
1102 /* SDmaDwordCount[10:0] */
1103 ((dwlen
& 0x7ffULL
) << 16) |
1104 /* SDmaBufOffset[12:2] */
1105 (dwoffset
& 0x7ffULL
));
1108 static inline __le64
qib_sdma_make_first_desc0(__le64 descq
)
1110 return descq
| cpu_to_le64(1ULL << 12);
1113 static inline __le64
qib_sdma_make_last_desc0(__le64 descq
)
1115 /* last */ /* dma head */
1116 return descq
| cpu_to_le64(1ULL << 11 | 1ULL << 13);
1119 static inline __le64
qib_sdma_make_desc1(u64 addr
)
1121 /* SDmaPhyAddr[47:32] */
1122 return cpu_to_le64(addr
>> 32);
1125 static void qib_user_sdma_send_frag(struct qib_pportdata
*ppd
,
1126 struct qib_user_sdma_pkt
*pkt
, int idx
,
1127 unsigned ofs
, u16 tail
, u8 gen
)
1129 const u64 addr
= (u64
) pkt
->addr
[idx
].addr
+
1130 (u64
) pkt
->addr
[idx
].offset
;
1131 const u64 dwlen
= (u64
) pkt
->addr
[idx
].length
/ 4;
1135 descqp
= &ppd
->sdma_descq
[tail
].qw
[0];
1137 descq0
= qib_sdma_make_desc0(gen
, addr
, dwlen
, ofs
);
1138 if (pkt
->addr
[idx
].first_desc
)
1139 descq0
= qib_sdma_make_first_desc0(descq0
);
1140 if (pkt
->addr
[idx
].last_desc
) {
1141 descq0
= qib_sdma_make_last_desc0(descq0
);
1142 if (ppd
->sdma_intrequest
) {
1143 descq0
|= cpu_to_le64(1ULL << 15);
1144 ppd
->sdma_intrequest
= 0;
1149 descqp
[1] = qib_sdma_make_desc1(addr
);
1152 void qib_user_sdma_send_desc(struct qib_pportdata
*ppd
,
1153 struct list_head
*pktlist
)
1155 struct qib_devdata
*dd
= ppd
->dd
;
1160 nfree
= qib_sdma_descq_freecnt(ppd
);
1166 tail_c
= tail
= ppd
->sdma_descq_tail
;
1167 gen_c
= gen
= ppd
->sdma_generation
;
1168 while (!list_empty(pktlist
)) {
1169 struct qib_user_sdma_pkt
*pkt
=
1170 list_entry(pktlist
->next
, struct qib_user_sdma_pkt
,
1176 for (i
= pkt
->index
; i
< pkt
->naddr
&& nfree
; i
++) {
1177 qib_user_sdma_send_frag(ppd
, pkt
, i
, ofs
, tail
, gen
);
1178 ofs
+= pkt
->addr
[i
].length
>> 2;
1180 if (++tail
== ppd
->sdma_descq_cnt
) {
1183 ppd
->sdma_intrequest
= 1;
1184 } else if (tail
== (ppd
->sdma_descq_cnt
>>1)) {
1185 ppd
->sdma_intrequest
= 1;
1188 if (pkt
->addr
[i
].last_desc
== 0)
1192 * If the packet is >= 2KB mtu equivalent, we
1193 * have to use the large buffers, and have to
1194 * mark each descriptor as part of a large
1197 if (ofs
> dd
->piosize2kmax_dwords
) {
1198 for (j
= pkt
->index
; j
<= i
; j
++) {
1199 ppd
->sdma_descq
[dtail
].qw
[0] |=
1200 cpu_to_le64(1ULL << 14);
1201 if (++dtail
== ppd
->sdma_descq_cnt
)
1205 c
+= i
+ 1 - pkt
->index
;
1206 pkt
->index
= i
+ 1; /* index for next first */
1207 tail_c
= dtail
= tail
;
1209 ofs
= 0; /* reset for next packet */
1212 ppd
->sdma_descq_added
+= c
;
1214 if (pkt
->index
== pkt
->naddr
) {
1215 pkt
->added
= ppd
->sdma_descq_added
;
1216 pkt
->pq
->added
= pkt
->added
;
1217 pkt
->pq
->num_pending
--;
1218 spin_lock(&pkt
->pq
->sent_lock
);
1219 pkt
->pq
->num_sending
++;
1220 list_move_tail(&pkt
->list
, &pkt
->pq
->sent
);
1221 spin_unlock(&pkt
->pq
->sent_lock
);
1223 if (!nfree
|| (nsent
<<2) > ppd
->sdma_descq_cnt
)
1227 /* advance the tail on the chip if necessary */
1228 if (ppd
->sdma_descq_tail
!= tail_c
) {
1229 ppd
->sdma_generation
= gen_c
;
1230 dd
->f_sdma_update_tail(ppd
, tail_c
);
1233 if (nfree
&& !list_empty(pktlist
))
1239 /* pq->lock must be held, get packets on the wire... */
1240 static int qib_user_sdma_push_pkts(struct qib_pportdata
*ppd
,
1241 struct qib_user_sdma_queue
*pq
,
1242 struct list_head
*pktlist
, int count
)
1245 unsigned long flags
;
1247 if (unlikely(!(ppd
->lflags
& QIBL_LINKACTIVE
)))
1250 spin_lock_irqsave(&ppd
->sdma_lock
, flags
);
1252 if (unlikely(!__qib_sdma_running(ppd
))) {
1257 pq
->num_pending
+= count
;
1258 list_splice_tail_init(pktlist
, &ppd
->sdma_userpending
);
1259 qib_user_sdma_send_desc(ppd
, &ppd
->sdma_userpending
);
1262 spin_unlock_irqrestore(&ppd
->sdma_lock
, flags
);
1266 int qib_user_sdma_writev(struct qib_ctxtdata
*rcd
,
1267 struct qib_user_sdma_queue
*pq
,
1268 const struct iovec
*iov
,
1271 struct qib_devdata
*dd
= rcd
->dd
;
1272 struct qib_pportdata
*ppd
= rcd
->ppd
;
1274 struct list_head list
;
1277 INIT_LIST_HEAD(&list
);
1279 mutex_lock(&pq
->lock
);
1281 /* why not -ECOMM like qib_user_sdma_push_pkts() below? */
1282 if (!qib_sdma_running(ppd
))
1285 /* if I have packets not complete yet */
1286 if (pq
->added
> ppd
->sdma_descq_removed
)
1287 qib_user_sdma_hwqueue_clean(ppd
);
1288 /* if I have complete packets to be freed */
1289 if (pq
->num_sending
)
1290 qib_user_sdma_queue_clean(ppd
, pq
);
1296 ret
= qib_user_sdma_queue_pkts(dd
, ppd
, pq
,
1297 iov
, dim
, &list
, &mxp
, &ndesc
);
1305 /* force packets onto the sdma hw queue... */
1306 if (!list_empty(&list
)) {
1308 * Lazily clean hw queue.
1310 if (qib_sdma_descq_freecnt(ppd
) < ndesc
) {
1311 qib_user_sdma_hwqueue_clean(ppd
);
1312 if (pq
->num_sending
)
1313 qib_user_sdma_queue_clean(ppd
, pq
);
1316 ret
= qib_user_sdma_push_pkts(ppd
, pq
, &list
, mxp
);
1327 if (!list_empty(&list
))
1328 qib_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, &list
);
1329 mutex_unlock(&pq
->lock
);
1331 return (ret
< 0) ? ret
: npkts
;
1334 int qib_user_sdma_make_progress(struct qib_pportdata
*ppd
,
1335 struct qib_user_sdma_queue
*pq
)
1339 mutex_lock(&pq
->lock
);
1340 qib_user_sdma_hwqueue_clean(ppd
);
1341 ret
= qib_user_sdma_queue_clean(ppd
, pq
);
1342 mutex_unlock(&pq
->lock
);
1347 u32
qib_user_sdma_complete_counter(const struct qib_user_sdma_queue
*pq
)
1349 return pq
? pq
->sent_counter
: 0;
1352 u32
qib_user_sdma_inflight_counter(struct qib_user_sdma_queue
*pq
)
1354 return pq
? pq
->counter
: 0;