2 * Copyright (c) 2007, 2008, 2009 QLogic Corporation. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/types.h>
34 #include <linux/device.h>
35 #include <linux/dmapool.h>
36 #include <linux/slab.h>
37 #include <linux/list.h>
38 #include <linux/highmem.h>
40 #include <linux/uio.h>
41 #include <linux/rbtree.h>
42 #include <linux/spinlock.h>
43 #include <linux/delay.h>
46 #include "qib_user_sdma.h"
48 /* minimum size of header */
49 #define QIB_USER_SDMA_MIN_HEADER_LENGTH 64
50 /* expected size of headers (for dma_pool) */
51 #define QIB_USER_SDMA_EXP_HEADER_LENGTH 64
52 /* attempt to drain the queue for 5secs */
53 #define QIB_USER_SDMA_DRAIN_TIMEOUT 250
56 * track how many times a process open this driver.
58 static struct rb_root qib_user_sdma_rb_root
= RB_ROOT
;
60 struct qib_user_sdma_rb_node
{
66 struct qib_user_sdma_pkt
{
67 struct list_head list
; /* list element */
69 u8 tiddma
; /* if this is NEW tid-sdma */
70 u8 largepkt
; /* this is large pkt from kmalloc */
71 u16 frag_size
; /* frag size used by PSM */
72 u16 index
; /* last header index or push index */
73 u16 naddr
; /* dimension of addr (1..3) ... */
74 u16 addrlimit
; /* addr array size */
75 u16 tidsmidx
; /* current tidsm index */
76 u16 tidsmcount
; /* tidsm array item count */
77 u16 payload_size
; /* payload size so far for header */
78 u32 bytes_togo
; /* bytes for processing */
79 u32 counter
; /* sdma pkts queued counter for this entry */
80 struct qib_tid_session_member
*tidsm
; /* tid session member array */
81 struct qib_user_sdma_queue
*pq
; /* which pq this pkt belongs to */
82 u64 added
; /* global descq number of entries */
85 u16 offset
; /* offset for kvaddr, addr */
86 u16 length
; /* length in page */
87 u16 first_desc
; /* first desc */
88 u16 last_desc
; /* last desc */
89 u16 put_page
; /* should we put_page? */
90 u16 dma_mapped
; /* is page dma_mapped? */
91 u16 dma_length
; /* for dma_unmap_page() */
93 struct page
*page
; /* may be NULL (coherent mem) */
94 void *kvaddr
; /* FIXME: only for pio hack */
96 } addr
[4]; /* max pages, any more and we coalesce */
99 struct qib_user_sdma_queue
{
101 * pkts sent to dma engine are queued on this
102 * list head. the type of the elements of this
103 * list are struct qib_user_sdma_pkt...
105 struct list_head sent
;
108 * Because above list will be accessed by both process and
109 * signal handler, we need a spinlock for it.
111 spinlock_t sent_lock ____cacheline_aligned_in_smp
;
113 /* headers with expected length are allocated from here... */
114 char header_cache_name
[64];
115 struct dma_pool
*header_cache
;
117 /* packets are allocated from the slab cache... */
118 char pkt_slab_name
[64];
119 struct kmem_cache
*pkt_slab
;
121 /* as packets go on the queued queue, they are counted... */
124 /* pending packets, not sending yet */
126 /* sending packets, not complete yet */
128 /* global descq number of entry of last sending packet */
132 struct rb_root dma_pages_root
;
134 struct qib_user_sdma_rb_node
*sdma_rb_node
;
136 /* protect everything above... */
140 static struct qib_user_sdma_rb_node
*
141 qib_user_sdma_rb_search(struct rb_root
*root
, pid_t pid
)
143 struct qib_user_sdma_rb_node
*sdma_rb_node
;
144 struct rb_node
*node
= root
->rb_node
;
147 sdma_rb_node
= rb_entry(node
, struct qib_user_sdma_rb_node
,
149 if (pid
< sdma_rb_node
->pid
)
150 node
= node
->rb_left
;
151 else if (pid
> sdma_rb_node
->pid
)
152 node
= node
->rb_right
;
160 qib_user_sdma_rb_insert(struct rb_root
*root
, struct qib_user_sdma_rb_node
*new)
162 struct rb_node
**node
= &(root
->rb_node
);
163 struct rb_node
*parent
= NULL
;
164 struct qib_user_sdma_rb_node
*got
;
167 got
= rb_entry(*node
, struct qib_user_sdma_rb_node
, node
);
169 if (new->pid
< got
->pid
)
170 node
= &((*node
)->rb_left
);
171 else if (new->pid
> got
->pid
)
172 node
= &((*node
)->rb_right
);
177 rb_link_node(&new->node
, parent
, node
);
178 rb_insert_color(&new->node
, root
);
182 struct qib_user_sdma_queue
*
183 qib_user_sdma_queue_create(struct device
*dev
, int unit
, int ctxt
, int sctxt
)
185 struct qib_user_sdma_queue
*pq
=
186 kmalloc(sizeof(struct qib_user_sdma_queue
), GFP_KERNEL
);
187 struct qib_user_sdma_rb_node
*sdma_rb_node
;
193 pq
->sent_counter
= 0;
197 pq
->sdma_rb_node
= NULL
;
199 INIT_LIST_HEAD(&pq
->sent
);
200 spin_lock_init(&pq
->sent_lock
);
201 mutex_init(&pq
->lock
);
203 snprintf(pq
->pkt_slab_name
, sizeof(pq
->pkt_slab_name
),
204 "qib-user-sdma-pkts-%u-%02u.%02u", unit
, ctxt
, sctxt
);
205 pq
->pkt_slab
= kmem_cache_create(pq
->pkt_slab_name
,
206 sizeof(struct qib_user_sdma_pkt
),
212 snprintf(pq
->header_cache_name
, sizeof(pq
->header_cache_name
),
213 "qib-user-sdma-headers-%u-%02u.%02u", unit
, ctxt
, sctxt
);
214 pq
->header_cache
= dma_pool_create(pq
->header_cache_name
,
216 QIB_USER_SDMA_EXP_HEADER_LENGTH
,
218 if (!pq
->header_cache
)
221 pq
->dma_pages_root
= RB_ROOT
;
223 sdma_rb_node
= qib_user_sdma_rb_search(&qib_user_sdma_rb_root
,
226 sdma_rb_node
->refcount
++;
228 sdma_rb_node
= kmalloc(sizeof(
229 struct qib_user_sdma_rb_node
), GFP_KERNEL
);
233 sdma_rb_node
->refcount
= 1;
234 sdma_rb_node
->pid
= current
->pid
;
236 qib_user_sdma_rb_insert(&qib_user_sdma_rb_root
, sdma_rb_node
);
238 pq
->sdma_rb_node
= sdma_rb_node
;
243 dma_pool_destroy(pq
->header_cache
);
245 kmem_cache_destroy(pq
->pkt_slab
);
254 static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt
*pkt
,
255 int i
, u16 offset
, u16 len
,
256 u16 first_desc
, u16 last_desc
,
257 u16 put_page
, u16 dma_mapped
,
258 struct page
*page
, void *kvaddr
,
259 dma_addr_t dma_addr
, u16 dma_length
)
261 pkt
->addr
[i
].offset
= offset
;
262 pkt
->addr
[i
].length
= len
;
263 pkt
->addr
[i
].first_desc
= first_desc
;
264 pkt
->addr
[i
].last_desc
= last_desc
;
265 pkt
->addr
[i
].put_page
= put_page
;
266 pkt
->addr
[i
].dma_mapped
= dma_mapped
;
267 pkt
->addr
[i
].page
= page
;
268 pkt
->addr
[i
].kvaddr
= kvaddr
;
269 pkt
->addr
[i
].addr
= dma_addr
;
270 pkt
->addr
[i
].dma_length
= dma_length
;
273 static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue
*pq
,
274 size_t len
, dma_addr_t
*dma_addr
)
278 if (len
== QIB_USER_SDMA_EXP_HEADER_LENGTH
)
279 hdr
= dma_pool_alloc(pq
->header_cache
, GFP_KERNEL
,
285 hdr
= kmalloc(len
, GFP_KERNEL
);
295 static int qib_user_sdma_page_to_frags(const struct qib_devdata
*dd
,
296 struct qib_user_sdma_queue
*pq
,
297 struct qib_user_sdma_pkt
*pkt
,
298 struct page
*page
, u16 put
,
299 u16 offset
, u16 len
, void *kvaddr
)
303 struct qib_message_header
*hdr
;
304 u16 newlen
, pbclen
, lastdesc
, dma_mapped
;
306 union qib_seqnum seqnum
;
308 dma_addr_t dma_addr
=
309 dma_map_page(&dd
->pcidev
->dev
,
310 page
, offset
, len
, DMA_TO_DEVICE
);
313 if (dma_mapping_error(&dd
->pcidev
->dev
, dma_addr
)) {
315 * dma mapping error, pkt has not managed
316 * this page yet, return the page here so
317 * the caller can ignore this page.
320 unpin_user_page(page
);
336 * In tid-sdma, the transfer length is restricted by
337 * receiver side current tid page length.
339 if (pkt
->tiddma
&& len
> pkt
->tidsm
[pkt
->tidsmidx
].length
)
340 newlen
= pkt
->tidsm
[pkt
->tidsmidx
].length
;
345 * Then the transfer length is restricted by MTU.
346 * the last descriptor flag is determined by:
347 * 1. the current packet is at frag size length.
348 * 2. the current tid page is done if tid-sdma.
349 * 3. there is no more byte togo if sdma.
352 if ((pkt
->payload_size
+ newlen
) >= pkt
->frag_size
) {
353 newlen
= pkt
->frag_size
- pkt
->payload_size
;
355 } else if (pkt
->tiddma
) {
356 if (newlen
== pkt
->tidsm
[pkt
->tidsmidx
].length
)
359 if (newlen
== pkt
->bytes_togo
)
363 /* fill the next fragment in this page */
364 qib_user_sdma_init_frag(pkt
, pkt
->naddr
, /* index */
365 offset
, newlen
, /* offset, len */
366 0, lastdesc
, /* first last desc */
367 put
, dma_mapped
, /* put page, dma mapped */
368 page
, kvaddr
, /* struct page, virt addr */
369 dma_addr
, len
); /* dma addr, dma length */
370 pkt
->bytes_togo
-= newlen
;
371 pkt
->payload_size
+= newlen
;
373 if (pkt
->naddr
== pkt
->addrlimit
) {
378 /* If there is no more byte togo. (lastdesc==1) */
379 if (pkt
->bytes_togo
== 0) {
380 /* The packet is done, header is not dma mapped yet.
381 * it should be from kmalloc */
382 if (!pkt
->addr
[pkt
->index
].addr
) {
383 pkt
->addr
[pkt
->index
].addr
=
384 dma_map_single(&dd
->pcidev
->dev
,
385 pkt
->addr
[pkt
->index
].kvaddr
,
386 pkt
->addr
[pkt
->index
].dma_length
,
388 if (dma_mapping_error(&dd
->pcidev
->dev
,
389 pkt
->addr
[pkt
->index
].addr
)) {
393 pkt
->addr
[pkt
->index
].dma_mapped
= 1;
399 /* If tid-sdma, advance tid info. */
401 pkt
->tidsm
[pkt
->tidsmidx
].length
-= newlen
;
402 if (pkt
->tidsm
[pkt
->tidsmidx
].length
) {
403 pkt
->tidsm
[pkt
->tidsmidx
].offset
+= newlen
;
406 if (pkt
->tidsmidx
== pkt
->tidsmcount
) {
414 * If this is NOT the last descriptor. (newlen==len)
415 * the current packet is not done yet, but the current
416 * send side page is done.
422 * If running this driver under PSM with message size
423 * fitting into one transfer unit, it is not possible
424 * to pass this line. otherwise, it is a buggggg.
428 * Since the current packet is done, and there are more
429 * bytes togo, we need to create a new sdma header, copying
430 * from previous sdma header and modify both.
432 pbclen
= pkt
->addr
[pkt
->index
].length
;
433 pbcvaddr
= qib_user_sdma_alloc_header(pq
, pbclen
, &pbcdaddr
);
438 /* Copy the previous sdma header to new sdma header */
439 pbc16
= (__le16
*)pkt
->addr
[pkt
->index
].kvaddr
;
440 memcpy(pbcvaddr
, pbc16
, pbclen
);
442 /* Modify the previous sdma header */
443 hdr
= (struct qib_message_header
*)&pbc16
[4];
446 pbc16
[0] = cpu_to_le16(le16_to_cpu(pbc16
[0])-(pkt
->bytes_togo
>>2));
448 /* New packet length */
449 hdr
->lrh
[2] = cpu_to_be16(le16_to_cpu(pbc16
[0]));
452 /* turn on the header suppression */
454 cpu_to_le16(le16_to_cpu(hdr
->iph
.pkt_flags
)|0x2);
455 /* turn off ACK_REQ: 0x04 and EXPECTED_DONE: 0x20 */
456 hdr
->flags
&= ~(0x04|0x20);
458 /* turn off extra bytes: 20-21 bits */
459 hdr
->bth
[0] = cpu_to_be32(be32_to_cpu(hdr
->bth
[0])&0xFFCFFFFF);
460 /* turn off ACK_REQ: 0x04 */
461 hdr
->flags
&= ~(0x04);
464 /* New kdeth checksum */
465 vcto
= le32_to_cpu(hdr
->iph
.ver_ctxt_tid_offset
);
466 hdr
->iph
.chksum
= cpu_to_le16(QIB_LRH_BTH
+
467 be16_to_cpu(hdr
->lrh
[2]) -
468 ((vcto
>>16)&0xFFFF) - (vcto
&0xFFFF) -
469 le16_to_cpu(hdr
->iph
.pkt_flags
));
471 /* The packet is done, header is not dma mapped yet.
472 * it should be from kmalloc */
473 if (!pkt
->addr
[pkt
->index
].addr
) {
474 pkt
->addr
[pkt
->index
].addr
=
475 dma_map_single(&dd
->pcidev
->dev
,
476 pkt
->addr
[pkt
->index
].kvaddr
,
477 pkt
->addr
[pkt
->index
].dma_length
,
479 if (dma_mapping_error(&dd
->pcidev
->dev
,
480 pkt
->addr
[pkt
->index
].addr
)) {
484 pkt
->addr
[pkt
->index
].dma_mapped
= 1;
487 /* Modify the new sdma header */
488 pbc16
= (__le16
*)pbcvaddr
;
489 hdr
= (struct qib_message_header
*)&pbc16
[4];
492 pbc16
[0] = cpu_to_le16(le16_to_cpu(pbc16
[0])-(pkt
->payload_size
>>2));
494 /* New packet length */
495 hdr
->lrh
[2] = cpu_to_be16(le16_to_cpu(pbc16
[0]));
498 /* Set new tid and offset for new sdma header */
499 hdr
->iph
.ver_ctxt_tid_offset
= cpu_to_le32(
500 (le32_to_cpu(hdr
->iph
.ver_ctxt_tid_offset
)&0xFF000000) +
501 (pkt
->tidsm
[pkt
->tidsmidx
].tid
<<QLOGIC_IB_I_TID_SHIFT
) +
502 (pkt
->tidsm
[pkt
->tidsmidx
].offset
>>2));
504 /* Middle protocol new packet offset */
505 hdr
->uwords
[2] += pkt
->payload_size
;
508 /* New kdeth checksum */
509 vcto
= le32_to_cpu(hdr
->iph
.ver_ctxt_tid_offset
);
510 hdr
->iph
.chksum
= cpu_to_le16(QIB_LRH_BTH
+
511 be16_to_cpu(hdr
->lrh
[2]) -
512 ((vcto
>>16)&0xFFFF) - (vcto
&0xFFFF) -
513 le16_to_cpu(hdr
->iph
.pkt_flags
));
515 /* Next sequence number in new sdma header */
516 seqnum
.val
= be32_to_cpu(hdr
->bth
[2]);
521 hdr
->bth
[2] = cpu_to_be32(seqnum
.val
);
523 /* Init new sdma header. */
524 qib_user_sdma_init_frag(pkt
, pkt
->naddr
, /* index */
525 0, pbclen
, /* offset, len */
526 1, 0, /* first last desc */
527 0, 0, /* put page, dma mapped */
528 NULL
, pbcvaddr
, /* struct page, virt addr */
529 pbcdaddr
, pbclen
); /* dma addr, dma length */
530 pkt
->index
= pkt
->naddr
;
531 pkt
->payload_size
= 0;
533 if (pkt
->naddr
== pkt
->addrlimit
) {
538 /* Prepare for next fragment in this page */
556 /* we've too many pages in the iovec, coalesce to a single page */
557 static int qib_user_sdma_coalesce(const struct qib_devdata
*dd
,
558 struct qib_user_sdma_queue
*pq
,
559 struct qib_user_sdma_pkt
*pkt
,
560 const struct iovec
*iov
,
564 struct page
*page
= alloc_page(GFP_KERNEL
);
577 for (i
= 0; i
< niov
; i
++) {
580 cfur
= copy_from_user(mpage
,
581 iov
[i
].iov_base
, iov
[i
].iov_len
);
587 mpage
+= iov
[i
].iov_len
;
588 len
+= iov
[i
].iov_len
;
591 ret
= qib_user_sdma_page_to_frags(dd
, pq
, pkt
,
592 page
, 0, 0, len
, mpage_save
);
603 * How many pages in this iovec element?
605 static int qib_user_sdma_num_pages(const struct iovec
*iov
)
607 const unsigned long addr
= (unsigned long) iov
->iov_base
;
608 const unsigned long len
= iov
->iov_len
;
609 const unsigned long spage
= addr
& PAGE_MASK
;
610 const unsigned long epage
= (addr
+ len
- 1) & PAGE_MASK
;
612 return 1 + ((epage
- spage
) >> PAGE_SHIFT
);
615 static void qib_user_sdma_free_pkt_frag(struct device
*dev
,
616 struct qib_user_sdma_queue
*pq
,
617 struct qib_user_sdma_pkt
*pkt
,
622 if (pkt
->addr
[i
].page
) {
623 /* only user data has page */
624 if (pkt
->addr
[i
].dma_mapped
)
627 pkt
->addr
[i
].dma_length
,
630 if (pkt
->addr
[i
].kvaddr
)
631 kunmap(pkt
->addr
[i
].page
);
633 if (pkt
->addr
[i
].put_page
)
634 unpin_user_page(pkt
->addr
[i
].page
);
636 __free_page(pkt
->addr
[i
].page
);
637 } else if (pkt
->addr
[i
].kvaddr
) {
639 if (pkt
->addr
[i
].dma_mapped
) {
640 /* from kmalloc & dma mapped */
641 dma_unmap_single(dev
,
643 pkt
->addr
[i
].dma_length
,
645 kfree(pkt
->addr
[i
].kvaddr
);
646 } else if (pkt
->addr
[i
].addr
) {
647 /* free coherent mem from cache... */
648 dma_pool_free(pq
->header_cache
,
649 pkt
->addr
[i
].kvaddr
, pkt
->addr
[i
].addr
);
651 /* from kmalloc but not dma mapped */
652 kfree(pkt
->addr
[i
].kvaddr
);
657 /* return number of pages pinned... */
658 static int qib_user_sdma_pin_pages(const struct qib_devdata
*dd
,
659 struct qib_user_sdma_queue
*pq
,
660 struct qib_user_sdma_pkt
*pkt
,
661 unsigned long addr
, int tlen
, int npages
)
663 struct page
*pages
[8];
673 ret
= pin_user_pages_fast(addr
, j
, FOLL_LONGTERM
, pages
);
681 for (i
= 0; i
< j
; i
++) {
682 /* map the pages... */
683 unsigned long fofs
= addr
& ~PAGE_MASK
;
684 int flen
= ((fofs
+ tlen
) > PAGE_SIZE
) ?
685 (PAGE_SIZE
- fofs
) : tlen
;
687 ret
= qib_user_sdma_page_to_frags(dd
, pq
, pkt
,
688 pages
[i
], 1, fofs
, flen
, NULL
);
690 /* current page has beed taken
691 * care of inside above call.
706 /* if error, return all pages not managed by pkt */
709 unpin_user_page(pages
[i
++]);
715 static int qib_user_sdma_pin_pkt(const struct qib_devdata
*dd
,
716 struct qib_user_sdma_queue
*pq
,
717 struct qib_user_sdma_pkt
*pkt
,
718 const struct iovec
*iov
,
724 for (idx
= 0; idx
< niov
; idx
++) {
725 const int npages
= qib_user_sdma_num_pages(iov
+ idx
);
726 const unsigned long addr
= (unsigned long) iov
[idx
].iov_base
;
728 ret
= qib_user_sdma_pin_pages(dd
, pq
, pkt
, addr
,
729 iov
[idx
].iov_len
, npages
);
737 /* we need to ignore the first entry here */
738 for (idx
= 1; idx
< pkt
->naddr
; idx
++)
739 qib_user_sdma_free_pkt_frag(&dd
->pcidev
->dev
, pq
, pkt
, idx
);
741 /* need to dma unmap the first entry, this is to restore to
742 * the original state so that caller can free the memory in
743 * error condition. Caller does not know if dma mapped or not*/
744 if (pkt
->addr
[0].dma_mapped
) {
745 dma_unmap_single(&dd
->pcidev
->dev
,
747 pkt
->addr
[0].dma_length
,
749 pkt
->addr
[0].addr
= 0;
750 pkt
->addr
[0].dma_mapped
= 0;
757 static int qib_user_sdma_init_payload(const struct qib_devdata
*dd
,
758 struct qib_user_sdma_queue
*pq
,
759 struct qib_user_sdma_pkt
*pkt
,
760 const struct iovec
*iov
,
761 unsigned long niov
, int npages
)
765 if (pkt
->frag_size
== pkt
->bytes_togo
&&
766 npages
>= ARRAY_SIZE(pkt
->addr
))
767 ret
= qib_user_sdma_coalesce(dd
, pq
, pkt
, iov
, niov
);
769 ret
= qib_user_sdma_pin_pkt(dd
, pq
, pkt
, iov
, niov
);
774 /* free a packet list -- return counter value of last packet */
775 static void qib_user_sdma_free_pkt_list(struct device
*dev
,
776 struct qib_user_sdma_queue
*pq
,
777 struct list_head
*list
)
779 struct qib_user_sdma_pkt
*pkt
, *pkt_next
;
781 list_for_each_entry_safe(pkt
, pkt_next
, list
, list
) {
784 for (i
= 0; i
< pkt
->naddr
; i
++)
785 qib_user_sdma_free_pkt_frag(dev
, pq
, pkt
, i
);
790 kmem_cache_free(pq
->pkt_slab
, pkt
);
792 INIT_LIST_HEAD(list
);
796 * copy headers, coalesce etc -- pq->lock must be held
798 * we queue all the packets to list, returning the
799 * number of bytes total. list must be empty initially,
800 * as, if there is an error we clean it...
802 static int qib_user_sdma_queue_pkts(const struct qib_devdata
*dd
,
803 struct qib_pportdata
*ppd
,
804 struct qib_user_sdma_queue
*pq
,
805 const struct iovec
*iov
,
807 struct list_head
*list
,
808 int *maxpkts
, int *ndesc
)
810 unsigned long idx
= 0;
815 struct qib_user_sdma_pkt
*pkt
= NULL
;
818 u32 counter
= pq
->counter
;
821 while (idx
< niov
&& npkts
< *maxpkts
) {
822 const unsigned long addr
= (unsigned long) iov
[idx
].iov_base
;
823 const unsigned long idx_save
= idx
;
832 len
= iov
[idx
].iov_len
;
835 if (len
< QIB_USER_SDMA_MIN_HEADER_LENGTH
||
836 len
> PAGE_SIZE
|| len
& 3 || addr
& 3) {
841 pbc
= qib_user_sdma_alloc_header(pq
, len
, &dma_addr
);
847 cfur
= copy_from_user(pbc
, iov
[idx
].iov_base
, len
);
854 * This assignment is a bit strange. it's because the
855 * the pbc counts the number of 32 bit words in the full
856 * packet _except_ the first word of the pbc itself...
861 * pktnw computation yields the number of 32 bit words
862 * that the caller has indicated in the PBC. note that
863 * this is one less than the total number of words that
864 * goes to the send DMA engine as the first 32 bit word
865 * of the PBC itself is not counted. Armed with this count,
866 * we can verify that the packet is consistent with the
869 pktnw
= le32_to_cpu(*pbc
) & 0xFFFF;
870 if (pktnw
< pktnwc
) {
876 while (pktnwc
< pktnw
&& idx
< niov
) {
877 const size_t slen
= iov
[idx
].iov_len
;
878 const unsigned long faddr
=
879 (unsigned long) iov
[idx
].iov_base
;
881 if (slen
& 3 || faddr
& 3 || !slen
) {
886 npages
+= qib_user_sdma_num_pages(&iov
[idx
]);
894 if (pktnwc
!= pktnw
) {
899 frag_size
= ((le32_to_cpu(*pbc
))>>16) & 0xFFFF;
900 if (((frag_size
? frag_size
: bytes_togo
) + len
) >
910 n
= npages
*((2*PAGE_SIZE
/frag_size
)+1);
911 pktsize
= struct_size(pkt
, addr
, n
);
914 * Determine if this is tid-sdma or just sdma.
916 tiddma
= (((le32_to_cpu(pbc
[7])>>
917 QLOGIC_IB_I_TID_SHIFT
)&
918 QLOGIC_IB_I_TID_MASK
) !=
919 QLOGIC_IB_I_TID_MASK
);
922 tidsmsize
= iov
[idx
].iov_len
;
926 pkt
= kmalloc(pktsize
+tidsmsize
, GFP_KERNEL
);
932 pkt
->frag_size
= frag_size
;
933 pkt
->addrlimit
= n
+ ARRAY_SIZE(pkt
->addr
);
936 char *tidsm
= (char *)pkt
+ pktsize
;
938 cfur
= copy_from_user(tidsm
,
939 iov
[idx
].iov_base
, tidsmsize
);
945 (struct qib_tid_session_member
*)tidsm
;
946 pkt
->tidsmcount
= tidsmsize
/
947 sizeof(struct qib_tid_session_member
);
953 * pbc 'fill1' field is borrowed to pass frag size,
954 * we need to clear it after picking frag size, the
955 * hardware requires this field to be zero.
957 *pbc
= cpu_to_le32(le32_to_cpu(*pbc
) & 0x0000FFFF);
959 pkt
= kmem_cache_alloc(pq
->pkt_slab
, GFP_KERNEL
);
965 pkt
->frag_size
= bytes_togo
;
966 pkt
->addrlimit
= ARRAY_SIZE(pkt
->addr
);
968 pkt
->bytes_togo
= bytes_togo
;
969 pkt
->payload_size
= 0;
970 pkt
->counter
= counter
;
971 pkt
->tiddma
= tiddma
;
973 /* setup the first header */
974 qib_user_sdma_init_frag(pkt
, 0, /* index */
975 0, len
, /* offset, len */
976 1, 0, /* first last desc */
977 0, 0, /* put page, dma mapped */
978 NULL
, pbc
, /* struct page, virt addr */
979 dma_addr
, len
); /* dma addr, dma length */
984 ret
= qib_user_sdma_init_payload(dd
, pq
, pkt
,
990 /* since there is no payload, mark the
991 * header as the last desc. */
992 pkt
->addr
[0].last_desc
= 1;
996 * the header is not dma mapped yet.
997 * it should be from kmalloc.
999 dma_addr
= dma_map_single(&dd
->pcidev
->dev
,
1000 pbc
, len
, DMA_TO_DEVICE
);
1001 if (dma_mapping_error(&dd
->pcidev
->dev
,
1006 pkt
->addr
[0].addr
= dma_addr
;
1007 pkt
->addr
[0].dma_mapped
= 1;
1014 pkt
->index
= 0; /* reset index for push on hw */
1015 *ndesc
+= pkt
->naddr
;
1017 list_add_tail(&pkt
->list
, list
);
1028 kmem_cache_free(pq
->pkt_slab
, pkt
);
1031 dma_pool_free(pq
->header_cache
, pbc
, dma_addr
);
1035 qib_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, list
);
1040 static void qib_user_sdma_set_complete_counter(struct qib_user_sdma_queue
*pq
,
1043 pq
->sent_counter
= c
;
1046 /* try to clean out queue -- needs pq->lock */
1047 static int qib_user_sdma_queue_clean(struct qib_pportdata
*ppd
,
1048 struct qib_user_sdma_queue
*pq
)
1050 struct qib_devdata
*dd
= ppd
->dd
;
1051 struct list_head free_list
;
1052 struct qib_user_sdma_pkt
*pkt
;
1053 struct qib_user_sdma_pkt
*pkt_prev
;
1054 unsigned long flags
;
1057 if (!pq
->num_sending
)
1060 INIT_LIST_HEAD(&free_list
);
1063 * We need this spin lock here because interrupt handler
1064 * might modify this list in qib_user_sdma_send_desc(), also
1065 * we can not get interrupted, otherwise it is a deadlock.
1067 spin_lock_irqsave(&pq
->sent_lock
, flags
);
1068 list_for_each_entry_safe(pkt
, pkt_prev
, &pq
->sent
, list
) {
1069 s64 descd
= ppd
->sdma_descq_removed
- pkt
->added
;
1074 list_move_tail(&pkt
->list
, &free_list
);
1076 /* one more packet cleaned */
1080 spin_unlock_irqrestore(&pq
->sent_lock
, flags
);
1082 if (!list_empty(&free_list
)) {
1085 pkt
= list_entry(free_list
.prev
,
1086 struct qib_user_sdma_pkt
, list
);
1087 counter
= pkt
->counter
;
1089 qib_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, &free_list
);
1090 qib_user_sdma_set_complete_counter(pq
, counter
);
1096 void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue
*pq
)
1101 pq
->sdma_rb_node
->refcount
--;
1102 if (pq
->sdma_rb_node
->refcount
== 0) {
1103 rb_erase(&pq
->sdma_rb_node
->node
, &qib_user_sdma_rb_root
);
1104 kfree(pq
->sdma_rb_node
);
1106 dma_pool_destroy(pq
->header_cache
);
1107 kmem_cache_destroy(pq
->pkt_slab
);
1111 /* clean descriptor queue, returns > 0 if some elements cleaned */
1112 static int qib_user_sdma_hwqueue_clean(struct qib_pportdata
*ppd
)
1115 unsigned long flags
;
1117 spin_lock_irqsave(&ppd
->sdma_lock
, flags
);
1118 ret
= qib_sdma_make_progress(ppd
);
1119 spin_unlock_irqrestore(&ppd
->sdma_lock
, flags
);
1124 /* we're in close, drain packets so that we can cleanup successfully... */
1125 void qib_user_sdma_queue_drain(struct qib_pportdata
*ppd
,
1126 struct qib_user_sdma_queue
*pq
)
1128 struct qib_devdata
*dd
= ppd
->dd
;
1129 unsigned long flags
;
1135 for (i
= 0; i
< QIB_USER_SDMA_DRAIN_TIMEOUT
; i
++) {
1136 mutex_lock(&pq
->lock
);
1137 if (!pq
->num_pending
&& !pq
->num_sending
) {
1138 mutex_unlock(&pq
->lock
);
1141 qib_user_sdma_hwqueue_clean(ppd
);
1142 qib_user_sdma_queue_clean(ppd
, pq
);
1143 mutex_unlock(&pq
->lock
);
1147 if (pq
->num_pending
|| pq
->num_sending
) {
1148 struct qib_user_sdma_pkt
*pkt
;
1149 struct qib_user_sdma_pkt
*pkt_prev
;
1150 struct list_head free_list
;
1152 mutex_lock(&pq
->lock
);
1153 spin_lock_irqsave(&ppd
->sdma_lock
, flags
);
1155 * Since we hold sdma_lock, it is safe without sent_lock.
1157 if (pq
->num_pending
) {
1158 list_for_each_entry_safe(pkt
, pkt_prev
,
1159 &ppd
->sdma_userpending
, list
) {
1160 if (pkt
->pq
== pq
) {
1161 list_move_tail(&pkt
->list
, &pq
->sent
);
1167 spin_unlock_irqrestore(&ppd
->sdma_lock
, flags
);
1169 qib_dev_err(dd
, "user sdma lists not empty: forcing!\n");
1170 INIT_LIST_HEAD(&free_list
);
1171 list_splice_init(&pq
->sent
, &free_list
);
1172 pq
->num_sending
= 0;
1173 qib_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, &free_list
);
1174 mutex_unlock(&pq
->lock
);
1178 static inline __le64
qib_sdma_make_desc0(u8 gen
,
1179 u64 addr
, u64 dwlen
, u64 dwoffset
)
1181 return cpu_to_le64(/* SDmaPhyAddr[31:0] */
1182 ((addr
& 0xfffffffcULL
) << 32) |
1183 /* SDmaGeneration[1:0] */
1184 ((gen
& 3ULL) << 30) |
1185 /* SDmaDwordCount[10:0] */
1186 ((dwlen
& 0x7ffULL
) << 16) |
1187 /* SDmaBufOffset[12:2] */
1188 (dwoffset
& 0x7ffULL
));
1191 static inline __le64
qib_sdma_make_first_desc0(__le64 descq
)
1193 return descq
| cpu_to_le64(1ULL << 12);
1196 static inline __le64
qib_sdma_make_last_desc0(__le64 descq
)
1198 /* last */ /* dma head */
1199 return descq
| cpu_to_le64(1ULL << 11 | 1ULL << 13);
1202 static inline __le64
qib_sdma_make_desc1(u64 addr
)
1204 /* SDmaPhyAddr[47:32] */
1205 return cpu_to_le64(addr
>> 32);
1208 static void qib_user_sdma_send_frag(struct qib_pportdata
*ppd
,
1209 struct qib_user_sdma_pkt
*pkt
, int idx
,
1210 unsigned ofs
, u16 tail
, u8 gen
)
1212 const u64 addr
= (u64
) pkt
->addr
[idx
].addr
+
1213 (u64
) pkt
->addr
[idx
].offset
;
1214 const u64 dwlen
= (u64
) pkt
->addr
[idx
].length
/ 4;
1218 descqp
= &ppd
->sdma_descq
[tail
].qw
[0];
1220 descq0
= qib_sdma_make_desc0(gen
, addr
, dwlen
, ofs
);
1221 if (pkt
->addr
[idx
].first_desc
)
1222 descq0
= qib_sdma_make_first_desc0(descq0
);
1223 if (pkt
->addr
[idx
].last_desc
) {
1224 descq0
= qib_sdma_make_last_desc0(descq0
);
1225 if (ppd
->sdma_intrequest
) {
1226 descq0
|= cpu_to_le64(1ULL << 15);
1227 ppd
->sdma_intrequest
= 0;
1232 descqp
[1] = qib_sdma_make_desc1(addr
);
1235 void qib_user_sdma_send_desc(struct qib_pportdata
*ppd
,
1236 struct list_head
*pktlist
)
1238 struct qib_devdata
*dd
= ppd
->dd
;
1243 nfree
= qib_sdma_descq_freecnt(ppd
);
1249 tail_c
= tail
= ppd
->sdma_descq_tail
;
1250 gen_c
= gen
= ppd
->sdma_generation
;
1251 while (!list_empty(pktlist
)) {
1252 struct qib_user_sdma_pkt
*pkt
=
1253 list_entry(pktlist
->next
, struct qib_user_sdma_pkt
,
1259 for (i
= pkt
->index
; i
< pkt
->naddr
&& nfree
; i
++) {
1260 qib_user_sdma_send_frag(ppd
, pkt
, i
, ofs
, tail
, gen
);
1261 ofs
+= pkt
->addr
[i
].length
>> 2;
1263 if (++tail
== ppd
->sdma_descq_cnt
) {
1266 ppd
->sdma_intrequest
= 1;
1267 } else if (tail
== (ppd
->sdma_descq_cnt
>>1)) {
1268 ppd
->sdma_intrequest
= 1;
1271 if (pkt
->addr
[i
].last_desc
== 0)
1275 * If the packet is >= 2KB mtu equivalent, we
1276 * have to use the large buffers, and have to
1277 * mark each descriptor as part of a large
1280 if (ofs
> dd
->piosize2kmax_dwords
) {
1281 for (j
= pkt
->index
; j
<= i
; j
++) {
1282 ppd
->sdma_descq
[dtail
].qw
[0] |=
1283 cpu_to_le64(1ULL << 14);
1284 if (++dtail
== ppd
->sdma_descq_cnt
)
1288 c
+= i
+ 1 - pkt
->index
;
1289 pkt
->index
= i
+ 1; /* index for next first */
1290 tail_c
= dtail
= tail
;
1292 ofs
= 0; /* reset for next packet */
1295 ppd
->sdma_descq_added
+= c
;
1297 if (pkt
->index
== pkt
->naddr
) {
1298 pkt
->added
= ppd
->sdma_descq_added
;
1299 pkt
->pq
->added
= pkt
->added
;
1300 pkt
->pq
->num_pending
--;
1301 spin_lock(&pkt
->pq
->sent_lock
);
1302 pkt
->pq
->num_sending
++;
1303 list_move_tail(&pkt
->list
, &pkt
->pq
->sent
);
1304 spin_unlock(&pkt
->pq
->sent_lock
);
1306 if (!nfree
|| (nsent
<<2) > ppd
->sdma_descq_cnt
)
1310 /* advance the tail on the chip if necessary */
1311 if (ppd
->sdma_descq_tail
!= tail_c
) {
1312 ppd
->sdma_generation
= gen_c
;
1313 dd
->f_sdma_update_tail(ppd
, tail_c
);
1316 if (nfree
&& !list_empty(pktlist
))
1320 /* pq->lock must be held, get packets on the wire... */
1321 static int qib_user_sdma_push_pkts(struct qib_pportdata
*ppd
,
1322 struct qib_user_sdma_queue
*pq
,
1323 struct list_head
*pktlist
, int count
)
1325 unsigned long flags
;
1327 if (unlikely(!(ppd
->lflags
& QIBL_LINKACTIVE
)))
1330 /* non-blocking mode */
1331 if (pq
->sdma_rb_node
->refcount
> 1) {
1332 spin_lock_irqsave(&ppd
->sdma_lock
, flags
);
1333 if (unlikely(!__qib_sdma_running(ppd
))) {
1334 spin_unlock_irqrestore(&ppd
->sdma_lock
, flags
);
1337 pq
->num_pending
+= count
;
1338 list_splice_tail_init(pktlist
, &ppd
->sdma_userpending
);
1339 qib_user_sdma_send_desc(ppd
, &ppd
->sdma_userpending
);
1340 spin_unlock_irqrestore(&ppd
->sdma_lock
, flags
);
1344 /* In this case, descriptors from this process are not
1345 * linked to ppd pending queue, interrupt handler
1346 * won't update this process, it is OK to directly
1347 * modify without sdma lock.
1351 pq
->num_pending
+= count
;
1353 * Blocking mode for single rail process, we must
1354 * release/regain sdma_lock to give other process
1355 * chance to make progress. This is important for
1359 spin_lock_irqsave(&ppd
->sdma_lock
, flags
);
1360 if (unlikely(!__qib_sdma_running(ppd
))) {
1361 spin_unlock_irqrestore(&ppd
->sdma_lock
, flags
);
1364 qib_user_sdma_send_desc(ppd
, pktlist
);
1365 if (!list_empty(pktlist
))
1366 qib_sdma_make_progress(ppd
);
1367 spin_unlock_irqrestore(&ppd
->sdma_lock
, flags
);
1368 } while (!list_empty(pktlist
));
1373 int qib_user_sdma_writev(struct qib_ctxtdata
*rcd
,
1374 struct qib_user_sdma_queue
*pq
,
1375 const struct iovec
*iov
,
1378 struct qib_devdata
*dd
= rcd
->dd
;
1379 struct qib_pportdata
*ppd
= rcd
->ppd
;
1381 struct list_head list
;
1384 INIT_LIST_HEAD(&list
);
1386 mutex_lock(&pq
->lock
);
1388 /* why not -ECOMM like qib_user_sdma_push_pkts() below? */
1389 if (!qib_sdma_running(ppd
))
1392 /* if I have packets not complete yet */
1393 if (pq
->added
> ppd
->sdma_descq_removed
)
1394 qib_user_sdma_hwqueue_clean(ppd
);
1395 /* if I have complete packets to be freed */
1396 if (pq
->num_sending
)
1397 qib_user_sdma_queue_clean(ppd
, pq
);
1403 ret
= qib_user_sdma_queue_pkts(dd
, ppd
, pq
,
1404 iov
, dim
, &list
, &mxp
, &ndesc
);
1412 /* force packets onto the sdma hw queue... */
1413 if (!list_empty(&list
)) {
1415 * Lazily clean hw queue.
1417 if (qib_sdma_descq_freecnt(ppd
) < ndesc
) {
1418 qib_user_sdma_hwqueue_clean(ppd
);
1419 if (pq
->num_sending
)
1420 qib_user_sdma_queue_clean(ppd
, pq
);
1423 ret
= qib_user_sdma_push_pkts(ppd
, pq
, &list
, mxp
);
1434 if (!list_empty(&list
))
1435 qib_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, &list
);
1436 mutex_unlock(&pq
->lock
);
1438 return (ret
< 0) ? ret
: npkts
;
1441 int qib_user_sdma_make_progress(struct qib_pportdata
*ppd
,
1442 struct qib_user_sdma_queue
*pq
)
1446 mutex_lock(&pq
->lock
);
1447 qib_user_sdma_hwqueue_clean(ppd
);
1448 ret
= qib_user_sdma_queue_clean(ppd
, pq
);
1449 mutex_unlock(&pq
->lock
);
1454 u32
qib_user_sdma_complete_counter(const struct qib_user_sdma_queue
*pq
)
1456 return pq
? pq
->sent_counter
: 0;
1459 u32
qib_user_sdma_inflight_counter(struct qib_user_sdma_queue
*pq
)
1461 return pq
? pq
->counter
: 0;