2 * Copyright (c) 2007, 2008, 2009 QLogic Corporation. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/types.h>
34 #include <linux/device.h>
35 #include <linux/dmapool.h>
36 #include <linux/slab.h>
37 #include <linux/list.h>
38 #include <linux/highmem.h>
40 #include <linux/uio.h>
41 #include <linux/rbtree.h>
42 #include <linux/spinlock.h>
43 #include <linux/delay.h>
46 #include "qib_user_sdma.h"
48 /* minimum size of header */
49 #define QIB_USER_SDMA_MIN_HEADER_LENGTH 64
50 /* expected size of headers (for dma_pool) */
51 #define QIB_USER_SDMA_EXP_HEADER_LENGTH 64
52 /* attempt to drain the queue for 5secs */
53 #define QIB_USER_SDMA_DRAIN_TIMEOUT 250
56 * track how many times a process open this driver.
58 static struct rb_root qib_user_sdma_rb_root
= RB_ROOT
;
60 struct qib_user_sdma_rb_node
{
66 struct qib_user_sdma_pkt
{
67 struct list_head list
; /* list element */
69 u8 tiddma
; /* if this is NEW tid-sdma */
70 u8 largepkt
; /* this is large pkt from kmalloc */
71 u16 frag_size
; /* frag size used by PSM */
72 u16 index
; /* last header index or push index */
73 u16 naddr
; /* dimension of addr (1..3) ... */
74 u16 addrlimit
; /* addr array size */
75 u16 tidsmidx
; /* current tidsm index */
76 u16 tidsmcount
; /* tidsm array item count */
77 u16 payload_size
; /* payload size so far for header */
78 u32 bytes_togo
; /* bytes for processing */
79 u32 counter
; /* sdma pkts queued counter for this entry */
80 struct qib_tid_session_member
*tidsm
; /* tid session member array */
81 struct qib_user_sdma_queue
*pq
; /* which pq this pkt belongs to */
82 u64 added
; /* global descq number of entries */
85 u16 offset
; /* offset for kvaddr, addr */
86 u16 length
; /* length in page */
87 u16 first_desc
; /* first desc */
88 u16 last_desc
; /* last desc */
89 u16 put_page
; /* should we put_page? */
90 u16 dma_mapped
; /* is page dma_mapped? */
91 u16 dma_length
; /* for dma_unmap_page() */
93 struct page
*page
; /* may be NULL (coherent mem) */
94 void *kvaddr
; /* FIXME: only for pio hack */
96 } addr
[4]; /* max pages, any more and we coalesce */
99 struct qib_user_sdma_queue
{
101 * pkts sent to dma engine are queued on this
102 * list head. the type of the elements of this
103 * list are struct qib_user_sdma_pkt...
105 struct list_head sent
;
108 * Because above list will be accessed by both process and
109 * signal handler, we need a spinlock for it.
111 spinlock_t sent_lock ____cacheline_aligned_in_smp
;
113 /* headers with expected length are allocated from here... */
114 char header_cache_name
[64];
115 struct dma_pool
*header_cache
;
117 /* packets are allocated from the slab cache... */
118 char pkt_slab_name
[64];
119 struct kmem_cache
*pkt_slab
;
121 /* as packets go on the queued queue, they are counted... */
124 /* pending packets, not sending yet */
126 /* sending packets, not complete yet */
128 /* global descq number of entry of last sending packet */
132 struct rb_root dma_pages_root
;
134 struct qib_user_sdma_rb_node
*sdma_rb_node
;
136 /* protect everything above... */
140 static struct qib_user_sdma_rb_node
*
141 qib_user_sdma_rb_search(struct rb_root
*root
, pid_t pid
)
143 struct qib_user_sdma_rb_node
*sdma_rb_node
;
144 struct rb_node
*node
= root
->rb_node
;
147 sdma_rb_node
= container_of(node
,
148 struct qib_user_sdma_rb_node
, node
);
149 if (pid
< sdma_rb_node
->pid
)
150 node
= node
->rb_left
;
151 else if (pid
> sdma_rb_node
->pid
)
152 node
= node
->rb_right
;
160 qib_user_sdma_rb_insert(struct rb_root
*root
, struct qib_user_sdma_rb_node
*new)
162 struct rb_node
**node
= &(root
->rb_node
);
163 struct rb_node
*parent
= NULL
;
164 struct qib_user_sdma_rb_node
*got
;
167 got
= container_of(*node
, struct qib_user_sdma_rb_node
, node
);
169 if (new->pid
< got
->pid
)
170 node
= &((*node
)->rb_left
);
171 else if (new->pid
> got
->pid
)
172 node
= &((*node
)->rb_right
);
177 rb_link_node(&new->node
, parent
, node
);
178 rb_insert_color(&new->node
, root
);
182 struct qib_user_sdma_queue
*
183 qib_user_sdma_queue_create(struct device
*dev
, int unit
, int ctxt
, int sctxt
)
185 struct qib_user_sdma_queue
*pq
=
186 kmalloc(sizeof(struct qib_user_sdma_queue
), GFP_KERNEL
);
187 struct qib_user_sdma_rb_node
*sdma_rb_node
;
193 pq
->sent_counter
= 0;
197 pq
->sdma_rb_node
= NULL
;
199 INIT_LIST_HEAD(&pq
->sent
);
200 spin_lock_init(&pq
->sent_lock
);
201 mutex_init(&pq
->lock
);
203 snprintf(pq
->pkt_slab_name
, sizeof(pq
->pkt_slab_name
),
204 "qib-user-sdma-pkts-%u-%02u.%02u", unit
, ctxt
, sctxt
);
205 pq
->pkt_slab
= kmem_cache_create(pq
->pkt_slab_name
,
206 sizeof(struct qib_user_sdma_pkt
),
212 snprintf(pq
->header_cache_name
, sizeof(pq
->header_cache_name
),
213 "qib-user-sdma-headers-%u-%02u.%02u", unit
, ctxt
, sctxt
);
214 pq
->header_cache
= dma_pool_create(pq
->header_cache_name
,
216 QIB_USER_SDMA_EXP_HEADER_LENGTH
,
218 if (!pq
->header_cache
)
221 pq
->dma_pages_root
= RB_ROOT
;
223 sdma_rb_node
= qib_user_sdma_rb_search(&qib_user_sdma_rb_root
,
226 sdma_rb_node
->refcount
++;
230 sdma_rb_node
= kmalloc(sizeof(
231 struct qib_user_sdma_rb_node
), GFP_KERNEL
);
235 sdma_rb_node
->refcount
= 1;
236 sdma_rb_node
->pid
= current
->pid
;
238 ret
= qib_user_sdma_rb_insert(&qib_user_sdma_rb_root
,
242 pq
->sdma_rb_node
= sdma_rb_node
;
247 dma_pool_destroy(pq
->header_cache
);
249 kmem_cache_destroy(pq
->pkt_slab
);
258 static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt
*pkt
,
259 int i
, u16 offset
, u16 len
,
260 u16 first_desc
, u16 last_desc
,
261 u16 put_page
, u16 dma_mapped
,
262 struct page
*page
, void *kvaddr
,
263 dma_addr_t dma_addr
, u16 dma_length
)
265 pkt
->addr
[i
].offset
= offset
;
266 pkt
->addr
[i
].length
= len
;
267 pkt
->addr
[i
].first_desc
= first_desc
;
268 pkt
->addr
[i
].last_desc
= last_desc
;
269 pkt
->addr
[i
].put_page
= put_page
;
270 pkt
->addr
[i
].dma_mapped
= dma_mapped
;
271 pkt
->addr
[i
].page
= page
;
272 pkt
->addr
[i
].kvaddr
= kvaddr
;
273 pkt
->addr
[i
].addr
= dma_addr
;
274 pkt
->addr
[i
].dma_length
= dma_length
;
277 static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue
*pq
,
278 size_t len
, dma_addr_t
*dma_addr
)
282 if (len
== QIB_USER_SDMA_EXP_HEADER_LENGTH
)
283 hdr
= dma_pool_alloc(pq
->header_cache
, GFP_KERNEL
,
289 hdr
= kmalloc(len
, GFP_KERNEL
);
299 static int qib_user_sdma_page_to_frags(const struct qib_devdata
*dd
,
300 struct qib_user_sdma_queue
*pq
,
301 struct qib_user_sdma_pkt
*pkt
,
302 struct page
*page
, u16 put
,
303 u16 offset
, u16 len
, void *kvaddr
)
307 struct qib_message_header
*hdr
;
308 u16 newlen
, pbclen
, lastdesc
, dma_mapped
;
310 union qib_seqnum seqnum
;
312 dma_addr_t dma_addr
=
313 dma_map_page(&dd
->pcidev
->dev
,
314 page
, offset
, len
, DMA_TO_DEVICE
);
317 if (dma_mapping_error(&dd
->pcidev
->dev
, dma_addr
)) {
319 * dma mapping error, pkt has not managed
320 * this page yet, return the page here so
321 * the caller can ignore this page.
340 * In tid-sdma, the transfer length is restricted by
341 * receiver side current tid page length.
343 if (pkt
->tiddma
&& len
> pkt
->tidsm
[pkt
->tidsmidx
].length
)
344 newlen
= pkt
->tidsm
[pkt
->tidsmidx
].length
;
349 * Then the transfer length is restricted by MTU.
350 * the last descriptor flag is determined by:
351 * 1. the current packet is at frag size length.
352 * 2. the current tid page is done if tid-sdma.
353 * 3. there is no more byte togo if sdma.
356 if ((pkt
->payload_size
+ newlen
) >= pkt
->frag_size
) {
357 newlen
= pkt
->frag_size
- pkt
->payload_size
;
359 } else if (pkt
->tiddma
) {
360 if (newlen
== pkt
->tidsm
[pkt
->tidsmidx
].length
)
363 if (newlen
== pkt
->bytes_togo
)
367 /* fill the next fragment in this page */
368 qib_user_sdma_init_frag(pkt
, pkt
->naddr
, /* index */
369 offset
, newlen
, /* offset, len */
370 0, lastdesc
, /* first last desc */
371 put
, dma_mapped
, /* put page, dma mapped */
372 page
, kvaddr
, /* struct page, virt addr */
373 dma_addr
, len
); /* dma addr, dma length */
374 pkt
->bytes_togo
-= newlen
;
375 pkt
->payload_size
+= newlen
;
377 if (pkt
->naddr
== pkt
->addrlimit
) {
382 /* If there is no more byte togo. (lastdesc==1) */
383 if (pkt
->bytes_togo
== 0) {
384 /* The packet is done, header is not dma mapped yet.
385 * it should be from kmalloc */
386 if (!pkt
->addr
[pkt
->index
].addr
) {
387 pkt
->addr
[pkt
->index
].addr
=
388 dma_map_single(&dd
->pcidev
->dev
,
389 pkt
->addr
[pkt
->index
].kvaddr
,
390 pkt
->addr
[pkt
->index
].dma_length
,
392 if (dma_mapping_error(&dd
->pcidev
->dev
,
393 pkt
->addr
[pkt
->index
].addr
)) {
397 pkt
->addr
[pkt
->index
].dma_mapped
= 1;
403 /* If tid-sdma, advance tid info. */
405 pkt
->tidsm
[pkt
->tidsmidx
].length
-= newlen
;
406 if (pkt
->tidsm
[pkt
->tidsmidx
].length
) {
407 pkt
->tidsm
[pkt
->tidsmidx
].offset
+= newlen
;
410 if (pkt
->tidsmidx
== pkt
->tidsmcount
) {
418 * If this is NOT the last descriptor. (newlen==len)
419 * the current packet is not done yet, but the current
420 * send side page is done.
426 * If running this driver under PSM with message size
427 * fitting into one transfer unit, it is not possible
428 * to pass this line. otherwise, it is a buggggg.
432 * Since the current packet is done, and there are more
433 * bytes togo, we need to create a new sdma header, copying
434 * from previous sdma header and modify both.
436 pbclen
= pkt
->addr
[pkt
->index
].length
;
437 pbcvaddr
= qib_user_sdma_alloc_header(pq
, pbclen
, &pbcdaddr
);
442 /* Copy the previous sdma header to new sdma header */
443 pbc16
= (__le16
*)pkt
->addr
[pkt
->index
].kvaddr
;
444 memcpy(pbcvaddr
, pbc16
, pbclen
);
446 /* Modify the previous sdma header */
447 hdr
= (struct qib_message_header
*)&pbc16
[4];
450 pbc16
[0] = cpu_to_le16(le16_to_cpu(pbc16
[0])-(pkt
->bytes_togo
>>2));
452 /* New packet length */
453 hdr
->lrh
[2] = cpu_to_be16(le16_to_cpu(pbc16
[0]));
456 /* turn on the header suppression */
458 cpu_to_le16(le16_to_cpu(hdr
->iph
.pkt_flags
)|0x2);
459 /* turn off ACK_REQ: 0x04 and EXPECTED_DONE: 0x20 */
460 hdr
->flags
&= ~(0x04|0x20);
462 /* turn off extra bytes: 20-21 bits */
463 hdr
->bth
[0] = cpu_to_be32(be32_to_cpu(hdr
->bth
[0])&0xFFCFFFFF);
464 /* turn off ACK_REQ: 0x04 */
465 hdr
->flags
&= ~(0x04);
468 /* New kdeth checksum */
469 vcto
= le32_to_cpu(hdr
->iph
.ver_ctxt_tid_offset
);
470 hdr
->iph
.chksum
= cpu_to_le16(QIB_LRH_BTH
+
471 be16_to_cpu(hdr
->lrh
[2]) -
472 ((vcto
>>16)&0xFFFF) - (vcto
&0xFFFF) -
473 le16_to_cpu(hdr
->iph
.pkt_flags
));
475 /* The packet is done, header is not dma mapped yet.
476 * it should be from kmalloc */
477 if (!pkt
->addr
[pkt
->index
].addr
) {
478 pkt
->addr
[pkt
->index
].addr
=
479 dma_map_single(&dd
->pcidev
->dev
,
480 pkt
->addr
[pkt
->index
].kvaddr
,
481 pkt
->addr
[pkt
->index
].dma_length
,
483 if (dma_mapping_error(&dd
->pcidev
->dev
,
484 pkt
->addr
[pkt
->index
].addr
)) {
488 pkt
->addr
[pkt
->index
].dma_mapped
= 1;
491 /* Modify the new sdma header */
492 pbc16
= (__le16
*)pbcvaddr
;
493 hdr
= (struct qib_message_header
*)&pbc16
[4];
496 pbc16
[0] = cpu_to_le16(le16_to_cpu(pbc16
[0])-(pkt
->payload_size
>>2));
498 /* New packet length */
499 hdr
->lrh
[2] = cpu_to_be16(le16_to_cpu(pbc16
[0]));
502 /* Set new tid and offset for new sdma header */
503 hdr
->iph
.ver_ctxt_tid_offset
= cpu_to_le32(
504 (le32_to_cpu(hdr
->iph
.ver_ctxt_tid_offset
)&0xFF000000) +
505 (pkt
->tidsm
[pkt
->tidsmidx
].tid
<<QLOGIC_IB_I_TID_SHIFT
) +
506 (pkt
->tidsm
[pkt
->tidsmidx
].offset
>>2));
508 /* Middle protocol new packet offset */
509 hdr
->uwords
[2] += pkt
->payload_size
;
512 /* New kdeth checksum */
513 vcto
= le32_to_cpu(hdr
->iph
.ver_ctxt_tid_offset
);
514 hdr
->iph
.chksum
= cpu_to_le16(QIB_LRH_BTH
+
515 be16_to_cpu(hdr
->lrh
[2]) -
516 ((vcto
>>16)&0xFFFF) - (vcto
&0xFFFF) -
517 le16_to_cpu(hdr
->iph
.pkt_flags
));
519 /* Next sequence number in new sdma header */
520 seqnum
.val
= be32_to_cpu(hdr
->bth
[2]);
525 hdr
->bth
[2] = cpu_to_be32(seqnum
.val
);
527 /* Init new sdma header. */
528 qib_user_sdma_init_frag(pkt
, pkt
->naddr
, /* index */
529 0, pbclen
, /* offset, len */
530 1, 0, /* first last desc */
531 0, 0, /* put page, dma mapped */
532 NULL
, pbcvaddr
, /* struct page, virt addr */
533 pbcdaddr
, pbclen
); /* dma addr, dma length */
534 pkt
->index
= pkt
->naddr
;
535 pkt
->payload_size
= 0;
537 if (pkt
->naddr
== pkt
->addrlimit
) {
542 /* Prepare for next fragment in this page */
560 /* we've too many pages in the iovec, coalesce to a single page */
561 static int qib_user_sdma_coalesce(const struct qib_devdata
*dd
,
562 struct qib_user_sdma_queue
*pq
,
563 struct qib_user_sdma_pkt
*pkt
,
564 const struct iovec
*iov
,
568 struct page
*page
= alloc_page(GFP_KERNEL
);
581 for (i
= 0; i
< niov
; i
++) {
584 cfur
= copy_from_user(mpage
,
585 iov
[i
].iov_base
, iov
[i
].iov_len
);
591 mpage
+= iov
[i
].iov_len
;
592 len
+= iov
[i
].iov_len
;
595 ret
= qib_user_sdma_page_to_frags(dd
, pq
, pkt
,
596 page
, 0, 0, len
, mpage_save
);
607 * How many pages in this iovec element?
609 static int qib_user_sdma_num_pages(const struct iovec
*iov
)
611 const unsigned long addr
= (unsigned long) iov
->iov_base
;
612 const unsigned long len
= iov
->iov_len
;
613 const unsigned long spage
= addr
& PAGE_MASK
;
614 const unsigned long epage
= (addr
+ len
- 1) & PAGE_MASK
;
616 return 1 + ((epage
- spage
) >> PAGE_SHIFT
);
619 static void qib_user_sdma_free_pkt_frag(struct device
*dev
,
620 struct qib_user_sdma_queue
*pq
,
621 struct qib_user_sdma_pkt
*pkt
,
626 if (pkt
->addr
[i
].page
) {
627 /* only user data has page */
628 if (pkt
->addr
[i
].dma_mapped
)
631 pkt
->addr
[i
].dma_length
,
634 if (pkt
->addr
[i
].kvaddr
)
635 kunmap(pkt
->addr
[i
].page
);
637 if (pkt
->addr
[i
].put_page
)
638 put_page(pkt
->addr
[i
].page
);
640 __free_page(pkt
->addr
[i
].page
);
641 } else if (pkt
->addr
[i
].kvaddr
) {
643 if (pkt
->addr
[i
].dma_mapped
) {
644 /* from kmalloc & dma mapped */
645 dma_unmap_single(dev
,
647 pkt
->addr
[i
].dma_length
,
649 kfree(pkt
->addr
[i
].kvaddr
);
650 } else if (pkt
->addr
[i
].addr
) {
651 /* free coherent mem from cache... */
652 dma_pool_free(pq
->header_cache
,
653 pkt
->addr
[i
].kvaddr
, pkt
->addr
[i
].addr
);
655 /* from kmalloc but not dma mapped */
656 kfree(pkt
->addr
[i
].kvaddr
);
661 /* return number of pages pinned... */
662 static int qib_user_sdma_pin_pages(const struct qib_devdata
*dd
,
663 struct qib_user_sdma_queue
*pq
,
664 struct qib_user_sdma_pkt
*pkt
,
665 unsigned long addr
, int tlen
, int npages
)
667 struct page
*pages
[8];
677 ret
= get_user_pages_fast(addr
, j
, 0, pages
);
685 for (i
= 0; i
< j
; i
++) {
686 /* map the pages... */
687 unsigned long fofs
= addr
& ~PAGE_MASK
;
688 int flen
= ((fofs
+ tlen
) > PAGE_SIZE
) ?
689 (PAGE_SIZE
- fofs
) : tlen
;
691 ret
= qib_user_sdma_page_to_frags(dd
, pq
, pkt
,
692 pages
[i
], 1, fofs
, flen
, NULL
);
694 /* current page has beed taken
695 * care of inside above call.
710 /* if error, return all pages not managed by pkt */
713 put_page(pages
[i
++]);
719 static int qib_user_sdma_pin_pkt(const struct qib_devdata
*dd
,
720 struct qib_user_sdma_queue
*pq
,
721 struct qib_user_sdma_pkt
*pkt
,
722 const struct iovec
*iov
,
728 for (idx
= 0; idx
< niov
; idx
++) {
729 const int npages
= qib_user_sdma_num_pages(iov
+ idx
);
730 const unsigned long addr
= (unsigned long) iov
[idx
].iov_base
;
732 ret
= qib_user_sdma_pin_pages(dd
, pq
, pkt
, addr
,
733 iov
[idx
].iov_len
, npages
);
741 /* we need to ignore the first entry here */
742 for (idx
= 1; idx
< pkt
->naddr
; idx
++)
743 qib_user_sdma_free_pkt_frag(&dd
->pcidev
->dev
, pq
, pkt
, idx
);
745 /* need to dma unmap the first entry, this is to restore to
746 * the original state so that caller can free the memory in
747 * error condition. Caller does not know if dma mapped or not*/
748 if (pkt
->addr
[0].dma_mapped
) {
749 dma_unmap_single(&dd
->pcidev
->dev
,
751 pkt
->addr
[0].dma_length
,
753 pkt
->addr
[0].addr
= 0;
754 pkt
->addr
[0].dma_mapped
= 0;
761 static int qib_user_sdma_init_payload(const struct qib_devdata
*dd
,
762 struct qib_user_sdma_queue
*pq
,
763 struct qib_user_sdma_pkt
*pkt
,
764 const struct iovec
*iov
,
765 unsigned long niov
, int npages
)
769 if (pkt
->frag_size
== pkt
->bytes_togo
&&
770 npages
>= ARRAY_SIZE(pkt
->addr
))
771 ret
= qib_user_sdma_coalesce(dd
, pq
, pkt
, iov
, niov
);
773 ret
= qib_user_sdma_pin_pkt(dd
, pq
, pkt
, iov
, niov
);
778 /* free a packet list -- return counter value of last packet */
779 static void qib_user_sdma_free_pkt_list(struct device
*dev
,
780 struct qib_user_sdma_queue
*pq
,
781 struct list_head
*list
)
783 struct qib_user_sdma_pkt
*pkt
, *pkt_next
;
785 list_for_each_entry_safe(pkt
, pkt_next
, list
, list
) {
788 for (i
= 0; i
< pkt
->naddr
; i
++)
789 qib_user_sdma_free_pkt_frag(dev
, pq
, pkt
, i
);
794 kmem_cache_free(pq
->pkt_slab
, pkt
);
796 INIT_LIST_HEAD(list
);
800 * copy headers, coalesce etc -- pq->lock must be held
802 * we queue all the packets to list, returning the
803 * number of bytes total. list must be empty initially,
804 * as, if there is an error we clean it...
806 static int qib_user_sdma_queue_pkts(const struct qib_devdata
*dd
,
807 struct qib_pportdata
*ppd
,
808 struct qib_user_sdma_queue
*pq
,
809 const struct iovec
*iov
,
811 struct list_head
*list
,
812 int *maxpkts
, int *ndesc
)
814 unsigned long idx
= 0;
819 struct qib_user_sdma_pkt
*pkt
= NULL
;
822 u32 counter
= pq
->counter
;
825 while (idx
< niov
&& npkts
< *maxpkts
) {
826 const unsigned long addr
= (unsigned long) iov
[idx
].iov_base
;
827 const unsigned long idx_save
= idx
;
836 len
= iov
[idx
].iov_len
;
839 if (len
< QIB_USER_SDMA_MIN_HEADER_LENGTH
||
840 len
> PAGE_SIZE
|| len
& 3 || addr
& 3) {
845 pbc
= qib_user_sdma_alloc_header(pq
, len
, &dma_addr
);
851 cfur
= copy_from_user(pbc
, iov
[idx
].iov_base
, len
);
858 * This assignment is a bit strange. it's because the
859 * the pbc counts the number of 32 bit words in the full
860 * packet _except_ the first word of the pbc itself...
865 * pktnw computation yields the number of 32 bit words
866 * that the caller has indicated in the PBC. note that
867 * this is one less than the total number of words that
868 * goes to the send DMA engine as the first 32 bit word
869 * of the PBC itself is not counted. Armed with this count,
870 * we can verify that the packet is consistent with the
873 pktnw
= le32_to_cpu(*pbc
) & 0xFFFF;
874 if (pktnw
< pktnwc
) {
880 while (pktnwc
< pktnw
&& idx
< niov
) {
881 const size_t slen
= iov
[idx
].iov_len
;
882 const unsigned long faddr
=
883 (unsigned long) iov
[idx
].iov_base
;
885 if (slen
& 3 || faddr
& 3 || !slen
) {
890 npages
+= qib_user_sdma_num_pages(&iov
[idx
]);
898 if (pktnwc
!= pktnw
) {
903 frag_size
= ((le32_to_cpu(*pbc
))>>16) & 0xFFFF;
904 if (((frag_size
? frag_size
: bytes_togo
) + len
) >
911 int pktsize
, tidsmsize
, n
;
913 n
= npages
*((2*PAGE_SIZE
/frag_size
)+1);
914 pktsize
= sizeof(*pkt
) + sizeof(pkt
->addr
[0])*n
;
917 * Determine if this is tid-sdma or just sdma.
919 tiddma
= (((le32_to_cpu(pbc
[7])>>
920 QLOGIC_IB_I_TID_SHIFT
)&
921 QLOGIC_IB_I_TID_MASK
) !=
922 QLOGIC_IB_I_TID_MASK
);
925 tidsmsize
= iov
[idx
].iov_len
;
929 pkt
= kmalloc(pktsize
+tidsmsize
, GFP_KERNEL
);
935 pkt
->frag_size
= frag_size
;
936 pkt
->addrlimit
= n
+ ARRAY_SIZE(pkt
->addr
);
939 char *tidsm
= (char *)pkt
+ pktsize
;
941 cfur
= copy_from_user(tidsm
,
942 iov
[idx
].iov_base
, tidsmsize
);
948 (struct qib_tid_session_member
*)tidsm
;
949 pkt
->tidsmcount
= tidsmsize
/
950 sizeof(struct qib_tid_session_member
);
956 * pbc 'fill1' field is borrowed to pass frag size,
957 * we need to clear it after picking frag size, the
958 * hardware requires this field to be zero.
960 *pbc
= cpu_to_le32(le32_to_cpu(*pbc
) & 0x0000FFFF);
962 pkt
= kmem_cache_alloc(pq
->pkt_slab
, GFP_KERNEL
);
968 pkt
->frag_size
= bytes_togo
;
969 pkt
->addrlimit
= ARRAY_SIZE(pkt
->addr
);
971 pkt
->bytes_togo
= bytes_togo
;
972 pkt
->payload_size
= 0;
973 pkt
->counter
= counter
;
974 pkt
->tiddma
= tiddma
;
976 /* setup the first header */
977 qib_user_sdma_init_frag(pkt
, 0, /* index */
978 0, len
, /* offset, len */
979 1, 0, /* first last desc */
980 0, 0, /* put page, dma mapped */
981 NULL
, pbc
, /* struct page, virt addr */
982 dma_addr
, len
); /* dma addr, dma length */
987 ret
= qib_user_sdma_init_payload(dd
, pq
, pkt
,
993 /* since there is no payload, mark the
994 * header as the last desc. */
995 pkt
->addr
[0].last_desc
= 1;
999 * the header is not dma mapped yet.
1000 * it should be from kmalloc.
1002 dma_addr
= dma_map_single(&dd
->pcidev
->dev
,
1003 pbc
, len
, DMA_TO_DEVICE
);
1004 if (dma_mapping_error(&dd
->pcidev
->dev
,
1009 pkt
->addr
[0].addr
= dma_addr
;
1010 pkt
->addr
[0].dma_mapped
= 1;
1017 pkt
->index
= 0; /* reset index for push on hw */
1018 *ndesc
+= pkt
->naddr
;
1020 list_add_tail(&pkt
->list
, list
);
1031 kmem_cache_free(pq
->pkt_slab
, pkt
);
1034 dma_pool_free(pq
->header_cache
, pbc
, dma_addr
);
1038 qib_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, list
);
1043 static void qib_user_sdma_set_complete_counter(struct qib_user_sdma_queue
*pq
,
1046 pq
->sent_counter
= c
;
1049 /* try to clean out queue -- needs pq->lock */
1050 static int qib_user_sdma_queue_clean(struct qib_pportdata
*ppd
,
1051 struct qib_user_sdma_queue
*pq
)
1053 struct qib_devdata
*dd
= ppd
->dd
;
1054 struct list_head free_list
;
1055 struct qib_user_sdma_pkt
*pkt
;
1056 struct qib_user_sdma_pkt
*pkt_prev
;
1057 unsigned long flags
;
1060 if (!pq
->num_sending
)
1063 INIT_LIST_HEAD(&free_list
);
1066 * We need this spin lock here because interrupt handler
1067 * might modify this list in qib_user_sdma_send_desc(), also
1068 * we can not get interrupted, otherwise it is a deadlock.
1070 spin_lock_irqsave(&pq
->sent_lock
, flags
);
1071 list_for_each_entry_safe(pkt
, pkt_prev
, &pq
->sent
, list
) {
1072 s64 descd
= ppd
->sdma_descq_removed
- pkt
->added
;
1077 list_move_tail(&pkt
->list
, &free_list
);
1079 /* one more packet cleaned */
1083 spin_unlock_irqrestore(&pq
->sent_lock
, flags
);
1085 if (!list_empty(&free_list
)) {
1088 pkt
= list_entry(free_list
.prev
,
1089 struct qib_user_sdma_pkt
, list
);
1090 counter
= pkt
->counter
;
1092 qib_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, &free_list
);
1093 qib_user_sdma_set_complete_counter(pq
, counter
);
1099 void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue
*pq
)
1104 pq
->sdma_rb_node
->refcount
--;
1105 if (pq
->sdma_rb_node
->refcount
== 0) {
1106 rb_erase(&pq
->sdma_rb_node
->node
, &qib_user_sdma_rb_root
);
1107 kfree(pq
->sdma_rb_node
);
1109 dma_pool_destroy(pq
->header_cache
);
1110 kmem_cache_destroy(pq
->pkt_slab
);
1114 /* clean descriptor queue, returns > 0 if some elements cleaned */
1115 static int qib_user_sdma_hwqueue_clean(struct qib_pportdata
*ppd
)
1118 unsigned long flags
;
1120 spin_lock_irqsave(&ppd
->sdma_lock
, flags
);
1121 ret
= qib_sdma_make_progress(ppd
);
1122 spin_unlock_irqrestore(&ppd
->sdma_lock
, flags
);
1127 /* we're in close, drain packets so that we can cleanup successfully... */
1128 void qib_user_sdma_queue_drain(struct qib_pportdata
*ppd
,
1129 struct qib_user_sdma_queue
*pq
)
1131 struct qib_devdata
*dd
= ppd
->dd
;
1132 unsigned long flags
;
1138 for (i
= 0; i
< QIB_USER_SDMA_DRAIN_TIMEOUT
; i
++) {
1139 mutex_lock(&pq
->lock
);
1140 if (!pq
->num_pending
&& !pq
->num_sending
) {
1141 mutex_unlock(&pq
->lock
);
1144 qib_user_sdma_hwqueue_clean(ppd
);
1145 qib_user_sdma_queue_clean(ppd
, pq
);
1146 mutex_unlock(&pq
->lock
);
1150 if (pq
->num_pending
|| pq
->num_sending
) {
1151 struct qib_user_sdma_pkt
*pkt
;
1152 struct qib_user_sdma_pkt
*pkt_prev
;
1153 struct list_head free_list
;
1155 mutex_lock(&pq
->lock
);
1156 spin_lock_irqsave(&ppd
->sdma_lock
, flags
);
1158 * Since we hold sdma_lock, it is safe without sent_lock.
1160 if (pq
->num_pending
) {
1161 list_for_each_entry_safe(pkt
, pkt_prev
,
1162 &ppd
->sdma_userpending
, list
) {
1163 if (pkt
->pq
== pq
) {
1164 list_move_tail(&pkt
->list
, &pq
->sent
);
1170 spin_unlock_irqrestore(&ppd
->sdma_lock
, flags
);
1172 qib_dev_err(dd
, "user sdma lists not empty: forcing!\n");
1173 INIT_LIST_HEAD(&free_list
);
1174 list_splice_init(&pq
->sent
, &free_list
);
1175 pq
->num_sending
= 0;
1176 qib_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, &free_list
);
1177 mutex_unlock(&pq
->lock
);
1181 static inline __le64
qib_sdma_make_desc0(u8 gen
,
1182 u64 addr
, u64 dwlen
, u64 dwoffset
)
1184 return cpu_to_le64(/* SDmaPhyAddr[31:0] */
1185 ((addr
& 0xfffffffcULL
) << 32) |
1186 /* SDmaGeneration[1:0] */
1187 ((gen
& 3ULL) << 30) |
1188 /* SDmaDwordCount[10:0] */
1189 ((dwlen
& 0x7ffULL
) << 16) |
1190 /* SDmaBufOffset[12:2] */
1191 (dwoffset
& 0x7ffULL
));
1194 static inline __le64
qib_sdma_make_first_desc0(__le64 descq
)
1196 return descq
| cpu_to_le64(1ULL << 12);
1199 static inline __le64
qib_sdma_make_last_desc0(__le64 descq
)
1201 /* last */ /* dma head */
1202 return descq
| cpu_to_le64(1ULL << 11 | 1ULL << 13);
1205 static inline __le64
qib_sdma_make_desc1(u64 addr
)
1207 /* SDmaPhyAddr[47:32] */
1208 return cpu_to_le64(addr
>> 32);
1211 static void qib_user_sdma_send_frag(struct qib_pportdata
*ppd
,
1212 struct qib_user_sdma_pkt
*pkt
, int idx
,
1213 unsigned ofs
, u16 tail
, u8 gen
)
1215 const u64 addr
= (u64
) pkt
->addr
[idx
].addr
+
1216 (u64
) pkt
->addr
[idx
].offset
;
1217 const u64 dwlen
= (u64
) pkt
->addr
[idx
].length
/ 4;
1221 descqp
= &ppd
->sdma_descq
[tail
].qw
[0];
1223 descq0
= qib_sdma_make_desc0(gen
, addr
, dwlen
, ofs
);
1224 if (pkt
->addr
[idx
].first_desc
)
1225 descq0
= qib_sdma_make_first_desc0(descq0
);
1226 if (pkt
->addr
[idx
].last_desc
) {
1227 descq0
= qib_sdma_make_last_desc0(descq0
);
1228 if (ppd
->sdma_intrequest
) {
1229 descq0
|= cpu_to_le64(1ULL << 15);
1230 ppd
->sdma_intrequest
= 0;
1235 descqp
[1] = qib_sdma_make_desc1(addr
);
1238 void qib_user_sdma_send_desc(struct qib_pportdata
*ppd
,
1239 struct list_head
*pktlist
)
1241 struct qib_devdata
*dd
= ppd
->dd
;
1246 nfree
= qib_sdma_descq_freecnt(ppd
);
1252 tail_c
= tail
= ppd
->sdma_descq_tail
;
1253 gen_c
= gen
= ppd
->sdma_generation
;
1254 while (!list_empty(pktlist
)) {
1255 struct qib_user_sdma_pkt
*pkt
=
1256 list_entry(pktlist
->next
, struct qib_user_sdma_pkt
,
1262 for (i
= pkt
->index
; i
< pkt
->naddr
&& nfree
; i
++) {
1263 qib_user_sdma_send_frag(ppd
, pkt
, i
, ofs
, tail
, gen
);
1264 ofs
+= pkt
->addr
[i
].length
>> 2;
1266 if (++tail
== ppd
->sdma_descq_cnt
) {
1269 ppd
->sdma_intrequest
= 1;
1270 } else if (tail
== (ppd
->sdma_descq_cnt
>>1)) {
1271 ppd
->sdma_intrequest
= 1;
1274 if (pkt
->addr
[i
].last_desc
== 0)
1278 * If the packet is >= 2KB mtu equivalent, we
1279 * have to use the large buffers, and have to
1280 * mark each descriptor as part of a large
1283 if (ofs
> dd
->piosize2kmax_dwords
) {
1284 for (j
= pkt
->index
; j
<= i
; j
++) {
1285 ppd
->sdma_descq
[dtail
].qw
[0] |=
1286 cpu_to_le64(1ULL << 14);
1287 if (++dtail
== ppd
->sdma_descq_cnt
)
1291 c
+= i
+ 1 - pkt
->index
;
1292 pkt
->index
= i
+ 1; /* index for next first */
1293 tail_c
= dtail
= tail
;
1295 ofs
= 0; /* reset for next packet */
1298 ppd
->sdma_descq_added
+= c
;
1300 if (pkt
->index
== pkt
->naddr
) {
1301 pkt
->added
= ppd
->sdma_descq_added
;
1302 pkt
->pq
->added
= pkt
->added
;
1303 pkt
->pq
->num_pending
--;
1304 spin_lock(&pkt
->pq
->sent_lock
);
1305 pkt
->pq
->num_sending
++;
1306 list_move_tail(&pkt
->list
, &pkt
->pq
->sent
);
1307 spin_unlock(&pkt
->pq
->sent_lock
);
1309 if (!nfree
|| (nsent
<<2) > ppd
->sdma_descq_cnt
)
1313 /* advance the tail on the chip if necessary */
1314 if (ppd
->sdma_descq_tail
!= tail_c
) {
1315 ppd
->sdma_generation
= gen_c
;
1316 dd
->f_sdma_update_tail(ppd
, tail_c
);
1319 if (nfree
&& !list_empty(pktlist
))
1323 /* pq->lock must be held, get packets on the wire... */
1324 static int qib_user_sdma_push_pkts(struct qib_pportdata
*ppd
,
1325 struct qib_user_sdma_queue
*pq
,
1326 struct list_head
*pktlist
, int count
)
1328 unsigned long flags
;
1330 if (unlikely(!(ppd
->lflags
& QIBL_LINKACTIVE
)))
1333 /* non-blocking mode */
1334 if (pq
->sdma_rb_node
->refcount
> 1) {
1335 spin_lock_irqsave(&ppd
->sdma_lock
, flags
);
1336 if (unlikely(!__qib_sdma_running(ppd
))) {
1337 spin_unlock_irqrestore(&ppd
->sdma_lock
, flags
);
1340 pq
->num_pending
+= count
;
1341 list_splice_tail_init(pktlist
, &ppd
->sdma_userpending
);
1342 qib_user_sdma_send_desc(ppd
, &ppd
->sdma_userpending
);
1343 spin_unlock_irqrestore(&ppd
->sdma_lock
, flags
);
1347 /* In this case, descriptors from this process are not
1348 * linked to ppd pending queue, interrupt handler
1349 * won't update this process, it is OK to directly
1350 * modify without sdma lock.
1354 pq
->num_pending
+= count
;
1356 * Blocking mode for single rail process, we must
1357 * release/regain sdma_lock to give other process
1358 * chance to make progress. This is important for
1362 spin_lock_irqsave(&ppd
->sdma_lock
, flags
);
1363 if (unlikely(!__qib_sdma_running(ppd
))) {
1364 spin_unlock_irqrestore(&ppd
->sdma_lock
, flags
);
1367 qib_user_sdma_send_desc(ppd
, pktlist
);
1368 if (!list_empty(pktlist
))
1369 qib_sdma_make_progress(ppd
);
1370 spin_unlock_irqrestore(&ppd
->sdma_lock
, flags
);
1371 } while (!list_empty(pktlist
));
1376 int qib_user_sdma_writev(struct qib_ctxtdata
*rcd
,
1377 struct qib_user_sdma_queue
*pq
,
1378 const struct iovec
*iov
,
1381 struct qib_devdata
*dd
= rcd
->dd
;
1382 struct qib_pportdata
*ppd
= rcd
->ppd
;
1384 struct list_head list
;
1387 INIT_LIST_HEAD(&list
);
1389 mutex_lock(&pq
->lock
);
1391 /* why not -ECOMM like qib_user_sdma_push_pkts() below? */
1392 if (!qib_sdma_running(ppd
))
1395 /* if I have packets not complete yet */
1396 if (pq
->added
> ppd
->sdma_descq_removed
)
1397 qib_user_sdma_hwqueue_clean(ppd
);
1398 /* if I have complete packets to be freed */
1399 if (pq
->num_sending
)
1400 qib_user_sdma_queue_clean(ppd
, pq
);
1406 ret
= qib_user_sdma_queue_pkts(dd
, ppd
, pq
,
1407 iov
, dim
, &list
, &mxp
, &ndesc
);
1415 /* force packets onto the sdma hw queue... */
1416 if (!list_empty(&list
)) {
1418 * Lazily clean hw queue.
1420 if (qib_sdma_descq_freecnt(ppd
) < ndesc
) {
1421 qib_user_sdma_hwqueue_clean(ppd
);
1422 if (pq
->num_sending
)
1423 qib_user_sdma_queue_clean(ppd
, pq
);
1426 ret
= qib_user_sdma_push_pkts(ppd
, pq
, &list
, mxp
);
1437 if (!list_empty(&list
))
1438 qib_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, &list
);
1439 mutex_unlock(&pq
->lock
);
1441 return (ret
< 0) ? ret
: npkts
;
1444 int qib_user_sdma_make_progress(struct qib_pportdata
*ppd
,
1445 struct qib_user_sdma_queue
*pq
)
1449 mutex_lock(&pq
->lock
);
1450 qib_user_sdma_hwqueue_clean(ppd
);
1451 ret
= qib_user_sdma_queue_clean(ppd
, pq
);
1452 mutex_unlock(&pq
->lock
);
1457 u32
qib_user_sdma_complete_counter(const struct qib_user_sdma_queue
*pq
)
1459 return pq
? pq
->sent_counter
: 0;
1462 u32
qib_user_sdma_inflight_counter(struct qib_user_sdma_queue
*pq
)
1464 return pq
? pq
->counter
: 0;