2 * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/types.h>
34 #include <linux/device.h>
35 #include <linux/dmapool.h>
36 #include <linux/sched.h>
37 #include <linux/slab.h>
38 #include <linux/list.h>
39 #include <linux/highmem.h>
41 #include <linux/uio.h>
42 #include <linux/rbtree.h>
43 #include <linux/spinlock.h>
44 #include <linux/delay.h>
46 #include "ipath_kernel.h"
47 #include "ipath_user_sdma.h"
49 /* minimum size of header */
50 #define IPATH_USER_SDMA_MIN_HEADER_LENGTH 64
51 /* expected size of headers (for dma_pool) */
52 #define IPATH_USER_SDMA_EXP_HEADER_LENGTH 64
53 /* length mask in PBC (lower 11 bits) */
54 #define IPATH_PBC_LENGTH_MASK ((1 << 11) - 1)
56 struct ipath_user_sdma_pkt
{
57 u8 naddr
; /* dimension of addr (1..3) ... */
58 u32 counter
; /* sdma pkts queued counter for this entry */
59 u64 added
; /* global descq number of entries */
62 u32 offset
; /* offset for kvaddr, addr */
63 u32 length
; /* length in page */
64 u8 put_page
; /* should we put_page? */
65 u8 dma_mapped
; /* is page dma_mapped? */
66 struct page
*page
; /* may be NULL (coherent mem) */
67 void *kvaddr
; /* FIXME: only for pio hack */
69 } addr
[4]; /* max pages, any more and we coalesce */
70 struct list_head list
; /* list element */
73 struct ipath_user_sdma_queue
{
75 * pkts sent to dma engine are queued on this
76 * list head. the type of the elements of this
77 * list are struct ipath_user_sdma_pkt...
79 struct list_head sent
;
81 /* headers with expected length are allocated from here... */
82 char header_cache_name
[64];
83 struct dma_pool
*header_cache
;
85 /* packets are allocated from the slab cache... */
86 char pkt_slab_name
[64];
87 struct kmem_cache
*pkt_slab
;
89 /* as packets go on the queued queue, they are counted... */
94 struct rb_root dma_pages_root
;
96 /* protect everything above... */
100 struct ipath_user_sdma_queue
*
101 ipath_user_sdma_queue_create(struct device
*dev
, int unit
, int port
, int sport
)
103 struct ipath_user_sdma_queue
*pq
=
104 kmalloc(sizeof(struct ipath_user_sdma_queue
), GFP_KERNEL
);
110 pq
->sent_counter
= 0;
111 INIT_LIST_HEAD(&pq
->sent
);
113 mutex_init(&pq
->lock
);
115 snprintf(pq
->pkt_slab_name
, sizeof(pq
->pkt_slab_name
),
116 "ipath-user-sdma-pkts-%u-%02u.%02u", unit
, port
, sport
);
117 pq
->pkt_slab
= kmem_cache_create(pq
->pkt_slab_name
,
118 sizeof(struct ipath_user_sdma_pkt
),
124 snprintf(pq
->header_cache_name
, sizeof(pq
->header_cache_name
),
125 "ipath-user-sdma-headers-%u-%02u.%02u", unit
, port
, sport
);
126 pq
->header_cache
= dma_pool_create(pq
->header_cache_name
,
128 IPATH_USER_SDMA_EXP_HEADER_LENGTH
,
130 if (!pq
->header_cache
)
133 pq
->dma_pages_root
= RB_ROOT
;
138 kmem_cache_destroy(pq
->pkt_slab
);
147 static void ipath_user_sdma_init_frag(struct ipath_user_sdma_pkt
*pkt
,
148 int i
, size_t offset
, size_t len
,
149 int put_page
, int dma_mapped
,
151 void *kvaddr
, dma_addr_t dma_addr
)
153 pkt
->addr
[i
].offset
= offset
;
154 pkt
->addr
[i
].length
= len
;
155 pkt
->addr
[i
].put_page
= put_page
;
156 pkt
->addr
[i
].dma_mapped
= dma_mapped
;
157 pkt
->addr
[i
].page
= page
;
158 pkt
->addr
[i
].kvaddr
= kvaddr
;
159 pkt
->addr
[i
].addr
= dma_addr
;
162 static void ipath_user_sdma_init_header(struct ipath_user_sdma_pkt
*pkt
,
163 u32 counter
, size_t offset
,
164 size_t len
, int dma_mapped
,
166 void *kvaddr
, dma_addr_t dma_addr
)
169 pkt
->counter
= counter
;
170 ipath_user_sdma_init_frag(pkt
, 0, offset
, len
, 0, dma_mapped
, page
,
174 /* we've too many pages in the iovec, coalesce to a single page */
175 static int ipath_user_sdma_coalesce(const struct ipath_devdata
*dd
,
176 struct ipath_user_sdma_pkt
*pkt
,
177 const struct iovec
*iov
,
178 unsigned long niov
) {
180 struct page
*page
= alloc_page(GFP_KERNEL
);
194 for (i
= 0; i
< niov
; i
++) {
197 cfur
= copy_from_user(mpage
,
198 iov
[i
].iov_base
, iov
[i
].iov_len
);
204 mpage
+= iov
[i
].iov_len
;
205 len
+= iov
[i
].iov_len
;
208 dma_addr
= dma_map_page(&dd
->pcidev
->dev
, page
, 0, len
,
210 if (dma_mapping_error(&dd
->pcidev
->dev
, dma_addr
)) {
215 ipath_user_sdma_init_frag(pkt
, 1, 0, len
, 0, 1, page
, mpage_save
,
228 /* how many pages in this iovec element? */
229 static int ipath_user_sdma_num_pages(const struct iovec
*iov
)
231 const unsigned long addr
= (unsigned long) iov
->iov_base
;
232 const unsigned long len
= iov
->iov_len
;
233 const unsigned long spage
= addr
& PAGE_MASK
;
234 const unsigned long epage
= (addr
+ len
- 1) & PAGE_MASK
;
236 return 1 + ((epage
- spage
) >> PAGE_SHIFT
);
239 /* truncate length to page boundary */
240 static int ipath_user_sdma_page_length(unsigned long addr
, unsigned long len
)
242 const unsigned long offset
= addr
& ~PAGE_MASK
;
244 return ((offset
+ len
) > PAGE_SIZE
) ? (PAGE_SIZE
- offset
) : len
;
247 static void ipath_user_sdma_free_pkt_frag(struct device
*dev
,
248 struct ipath_user_sdma_queue
*pq
,
249 struct ipath_user_sdma_pkt
*pkt
,
254 if (pkt
->addr
[i
].page
) {
255 if (pkt
->addr
[i
].dma_mapped
)
261 if (pkt
->addr
[i
].kvaddr
)
262 kunmap(pkt
->addr
[i
].page
);
264 if (pkt
->addr
[i
].put_page
)
265 put_page(pkt
->addr
[i
].page
);
267 __free_page(pkt
->addr
[i
].page
);
268 } else if (pkt
->addr
[i
].kvaddr
)
269 /* free coherent mem from cache... */
270 dma_pool_free(pq
->header_cache
,
271 pkt
->addr
[i
].kvaddr
, pkt
->addr
[i
].addr
);
274 /* return number of pages pinned... */
275 static int ipath_user_sdma_pin_pages(const struct ipath_devdata
*dd
,
276 struct ipath_user_sdma_pkt
*pkt
,
277 unsigned long addr
, int tlen
, int npages
)
279 struct page
*pages
[2];
283 ret
= get_user_pages(current
, current
->mm
, addr
,
284 npages
, 0, 1, pages
, NULL
);
289 for (i
= 0; i
< ret
; i
++)
296 for (j
= 0; j
< npages
; j
++) {
297 /* map the pages... */
299 ipath_user_sdma_page_length(addr
, tlen
);
300 dma_addr_t dma_addr
=
301 dma_map_page(&dd
->pcidev
->dev
,
302 pages
[j
], 0, flen
, DMA_TO_DEVICE
);
303 unsigned long fofs
= addr
& ~PAGE_MASK
;
305 if (dma_mapping_error(&dd
->pcidev
->dev
, dma_addr
)) {
310 ipath_user_sdma_init_frag(pkt
, pkt
->naddr
, fofs
, flen
, 1, 1,
311 pages
[j
], kmap(pages
[j
]),
323 static int ipath_user_sdma_pin_pkt(const struct ipath_devdata
*dd
,
324 struct ipath_user_sdma_queue
*pq
,
325 struct ipath_user_sdma_pkt
*pkt
,
326 const struct iovec
*iov
,
332 for (idx
= 0; idx
< niov
; idx
++) {
333 const int npages
= ipath_user_sdma_num_pages(iov
+ idx
);
334 const unsigned long addr
= (unsigned long) iov
[idx
].iov_base
;
336 ret
= ipath_user_sdma_pin_pages(dd
, pkt
,
337 addr
, iov
[idx
].iov_len
,
346 for (idx
= 0; idx
< pkt
->naddr
; idx
++)
347 ipath_user_sdma_free_pkt_frag(&dd
->pcidev
->dev
, pq
, pkt
, idx
);
353 static int ipath_user_sdma_init_payload(const struct ipath_devdata
*dd
,
354 struct ipath_user_sdma_queue
*pq
,
355 struct ipath_user_sdma_pkt
*pkt
,
356 const struct iovec
*iov
,
357 unsigned long niov
, int npages
)
361 if (npages
>= ARRAY_SIZE(pkt
->addr
))
362 ret
= ipath_user_sdma_coalesce(dd
, pkt
, iov
, niov
);
364 ret
= ipath_user_sdma_pin_pkt(dd
, pq
, pkt
, iov
, niov
);
369 /* free a packet list -- return counter value of last packet */
370 static void ipath_user_sdma_free_pkt_list(struct device
*dev
,
371 struct ipath_user_sdma_queue
*pq
,
372 struct list_head
*list
)
374 struct ipath_user_sdma_pkt
*pkt
, *pkt_next
;
376 list_for_each_entry_safe(pkt
, pkt_next
, list
, list
) {
379 for (i
= 0; i
< pkt
->naddr
; i
++)
380 ipath_user_sdma_free_pkt_frag(dev
, pq
, pkt
, i
);
382 kmem_cache_free(pq
->pkt_slab
, pkt
);
387 * copy headers, coalesce etc -- pq->lock must be held
389 * we queue all the packets to list, returning the
390 * number of bytes total. list must be empty initially,
391 * as, if there is an error we clean it...
393 static int ipath_user_sdma_queue_pkts(const struct ipath_devdata
*dd
,
394 struct ipath_user_sdma_queue
*pq
,
395 struct list_head
*list
,
396 const struct iovec
*iov
,
400 unsigned long idx
= 0;
403 struct page
*page
= NULL
;
406 struct ipath_user_sdma_pkt
*pkt
= NULL
;
409 u32 counter
= pq
->counter
;
412 while (idx
< niov
&& npkts
< maxpkts
) {
413 const unsigned long addr
= (unsigned long) iov
[idx
].iov_base
;
414 const unsigned long idx_save
= idx
;
422 len
= iov
[idx
].iov_len
;
426 pkt
= kmem_cache_alloc(pq
->pkt_slab
, GFP_KERNEL
);
432 if (len
< IPATH_USER_SDMA_MIN_HEADER_LENGTH
||
433 len
> PAGE_SIZE
|| len
& 3 || addr
& 3) {
438 if (len
== IPATH_USER_SDMA_EXP_HEADER_LENGTH
)
439 pbc
= dma_pool_alloc(pq
->header_cache
, GFP_KERNEL
,
445 page
= alloc_page(GFP_KERNEL
);
453 cfur
= copy_from_user(pbc
, iov
[idx
].iov_base
, len
);
460 * this assignment is a bit strange. it's because the
461 * the pbc counts the number of 32 bit words in the full
462 * packet _except_ the first word of the pbc itself...
467 * pktnw computation yields the number of 32 bit words
468 * that the caller has indicated in the PBC. note that
469 * this is one less than the total number of words that
470 * goes to the send DMA engine as the first 32 bit word
471 * of the PBC itself is not counted. Armed with this count,
472 * we can verify that the packet is consistent with the
475 pktnw
= le32_to_cpu(*pbc
) & IPATH_PBC_LENGTH_MASK
;
476 if (pktnw
< pktnwc
|| pktnw
> pktnwc
+ (PAGE_SIZE
>> 2)) {
483 while (pktnwc
< pktnw
&& idx
< niov
) {
484 const size_t slen
= iov
[idx
].iov_len
;
485 const unsigned long faddr
=
486 (unsigned long) iov
[idx
].iov_base
;
488 if (slen
& 3 || faddr
& 3 || !slen
||
495 if ((faddr
& PAGE_MASK
) !=
496 ((faddr
+ slen
- 1) & PAGE_MASK
))
504 if (pktnwc
!= pktnw
) {
510 dma_addr
= dma_map_page(&dd
->pcidev
->dev
,
511 page
, 0, len
, DMA_TO_DEVICE
);
512 if (dma_mapping_error(&dd
->pcidev
->dev
, dma_addr
)) {
520 ipath_user_sdma_init_header(pkt
, counter
, 0, len
, dma_mapped
,
521 page
, pbc
, dma_addr
);
524 ret
= ipath_user_sdma_init_payload(dd
, pq
, pkt
,
534 list_add_tail(&pkt
->list
, list
);
542 dma_unmap_page(&dd
->pcidev
->dev
, dma_addr
, len
, DMA_TO_DEVICE
);
548 dma_pool_free(pq
->header_cache
, pbc
, dma_addr
);
550 kmem_cache_free(pq
->pkt_slab
, pkt
);
552 ipath_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, list
);
557 static void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue
*pq
,
560 pq
->sent_counter
= c
;
563 /* try to clean out queue -- needs pq->lock */
564 static int ipath_user_sdma_queue_clean(const struct ipath_devdata
*dd
,
565 struct ipath_user_sdma_queue
*pq
)
567 struct list_head free_list
;
568 struct ipath_user_sdma_pkt
*pkt
;
569 struct ipath_user_sdma_pkt
*pkt_prev
;
572 INIT_LIST_HEAD(&free_list
);
574 list_for_each_entry_safe(pkt
, pkt_prev
, &pq
->sent
, list
) {
575 s64 descd
= dd
->ipath_sdma_descq_removed
- pkt
->added
;
580 list_move_tail(&pkt
->list
, &free_list
);
582 /* one more packet cleaned */
586 if (!list_empty(&free_list
)) {
589 pkt
= list_entry(free_list
.prev
,
590 struct ipath_user_sdma_pkt
, list
);
591 counter
= pkt
->counter
;
593 ipath_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, &free_list
);
594 ipath_user_sdma_set_complete_counter(pq
, counter
);
600 void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue
*pq
)
605 kmem_cache_destroy(pq
->pkt_slab
);
606 dma_pool_destroy(pq
->header_cache
);
610 /* clean descriptor queue, returns > 0 if some elements cleaned */
611 static int ipath_user_sdma_hwqueue_clean(struct ipath_devdata
*dd
)
616 spin_lock_irqsave(&dd
->ipath_sdma_lock
, flags
);
617 ret
= ipath_sdma_make_progress(dd
);
618 spin_unlock_irqrestore(&dd
->ipath_sdma_lock
, flags
);
623 /* we're in close, drain packets so that we can cleanup successfully... */
624 void ipath_user_sdma_queue_drain(struct ipath_devdata
*dd
,
625 struct ipath_user_sdma_queue
*pq
)
632 for (i
= 0; i
< 100; i
++) {
633 mutex_lock(&pq
->lock
);
634 if (list_empty(&pq
->sent
)) {
635 mutex_unlock(&pq
->lock
);
638 ipath_user_sdma_hwqueue_clean(dd
);
639 ipath_user_sdma_queue_clean(dd
, pq
);
640 mutex_unlock(&pq
->lock
);
644 if (!list_empty(&pq
->sent
)) {
645 struct list_head free_list
;
647 printk(KERN_INFO
"drain: lists not empty: forcing!\n");
648 INIT_LIST_HEAD(&free_list
);
649 mutex_lock(&pq
->lock
);
650 list_splice_init(&pq
->sent
, &free_list
);
651 ipath_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, &free_list
);
652 mutex_unlock(&pq
->lock
);
656 static inline __le64
ipath_sdma_make_desc0(struct ipath_devdata
*dd
,
657 u64 addr
, u64 dwlen
, u64 dwoffset
)
659 return cpu_to_le64(/* SDmaPhyAddr[31:0] */
660 ((addr
& 0xfffffffcULL
) << 32) |
661 /* SDmaGeneration[1:0] */
662 ((dd
->ipath_sdma_generation
& 3ULL) << 30) |
663 /* SDmaDwordCount[10:0] */
664 ((dwlen
& 0x7ffULL
) << 16) |
665 /* SDmaBufOffset[12:2] */
666 (dwoffset
& 0x7ffULL
));
669 static inline __le64
ipath_sdma_make_first_desc0(__le64 descq
)
671 return descq
| cpu_to_le64(1ULL << 12);
674 static inline __le64
ipath_sdma_make_last_desc0(__le64 descq
)
676 /* last */ /* dma head */
677 return descq
| cpu_to_le64(1ULL << 11 | 1ULL << 13);
680 static inline __le64
ipath_sdma_make_desc1(u64 addr
)
682 /* SDmaPhyAddr[47:32] */
683 return cpu_to_le64(addr
>> 32);
686 static void ipath_user_sdma_send_frag(struct ipath_devdata
*dd
,
687 struct ipath_user_sdma_pkt
*pkt
, int idx
,
688 unsigned ofs
, u16 tail
)
690 const u64 addr
= (u64
) pkt
->addr
[idx
].addr
+
691 (u64
) pkt
->addr
[idx
].offset
;
692 const u64 dwlen
= (u64
) pkt
->addr
[idx
].length
/ 4;
696 descqp
= &dd
->ipath_sdma_descq
[tail
].qw
[0];
698 descq0
= ipath_sdma_make_desc0(dd
, addr
, dwlen
, ofs
);
700 descq0
= ipath_sdma_make_first_desc0(descq0
);
701 if (idx
== pkt
->naddr
- 1)
702 descq0
= ipath_sdma_make_last_desc0(descq0
);
705 descqp
[1] = ipath_sdma_make_desc1(addr
);
708 /* pq->lock must be held, get packets on the wire... */
709 static int ipath_user_sdma_push_pkts(struct ipath_devdata
*dd
,
710 struct ipath_user_sdma_queue
*pq
,
711 struct list_head
*pktlist
)
717 if (list_empty(pktlist
))
720 if (unlikely(!(dd
->ipath_flags
& IPATH_LINKACTIVE
)))
723 spin_lock_irqsave(&dd
->ipath_sdma_lock
, flags
);
725 if (unlikely(dd
->ipath_sdma_status
& IPATH_SDMA_ABORT_MASK
)) {
730 tail
= dd
->ipath_sdma_descq_tail
;
731 while (!list_empty(pktlist
)) {
732 struct ipath_user_sdma_pkt
*pkt
=
733 list_entry(pktlist
->next
, struct ipath_user_sdma_pkt
,
739 if (pkt
->naddr
> ipath_sdma_descq_freecnt(dd
))
740 goto unlock_check_tail
;
742 for (i
= 0; i
< pkt
->naddr
; i
++) {
743 ipath_user_sdma_send_frag(dd
, pkt
, i
, ofs
, tail
);
744 ofs
+= pkt
->addr
[i
].length
>> 2;
746 if (++tail
== dd
->ipath_sdma_descq_cnt
) {
748 ++dd
->ipath_sdma_generation
;
752 if ((ofs
<<2) > dd
->ipath_ibmaxlen
) {
753 ipath_dbg("packet size %X > ibmax %X, fail\n",
754 ofs
<<2, dd
->ipath_ibmaxlen
);
760 * if the packet is >= 2KB mtu equivalent, we have to use
761 * the large buffers, and have to mark each descriptor as
762 * part of a large buffer packet.
764 if (ofs
>= IPATH_SMALLBUF_DWORDS
) {
765 for (i
= 0; i
< pkt
->naddr
; i
++) {
766 dd
->ipath_sdma_descq
[dtail
].qw
[0] |=
767 cpu_to_le64(1ULL << 14);
768 if (++dtail
== dd
->ipath_sdma_descq_cnt
)
773 dd
->ipath_sdma_descq_added
+= pkt
->naddr
;
774 pkt
->added
= dd
->ipath_sdma_descq_added
;
775 list_move_tail(&pkt
->list
, &pq
->sent
);
780 /* advance the tail on the chip if necessary */
781 if (dd
->ipath_sdma_descq_tail
!= tail
) {
783 ipath_write_kreg(dd
, dd
->ipath_kregs
->kr_senddmatail
, tail
);
784 dd
->ipath_sdma_descq_tail
= tail
;
788 spin_unlock_irqrestore(&dd
->ipath_sdma_lock
, flags
);
793 int ipath_user_sdma_writev(struct ipath_devdata
*dd
,
794 struct ipath_user_sdma_queue
*pq
,
795 const struct iovec
*iov
,
799 struct list_head list
;
802 INIT_LIST_HEAD(&list
);
804 mutex_lock(&pq
->lock
);
806 if (dd
->ipath_sdma_descq_added
!= dd
->ipath_sdma_descq_removed
) {
807 ipath_user_sdma_hwqueue_clean(dd
);
808 ipath_user_sdma_queue_clean(dd
, pq
);
814 down_write(¤t
->mm
->mmap_sem
);
815 ret
= ipath_user_sdma_queue_pkts(dd
, pq
, &list
, iov
, dim
, mxp
);
816 up_write(¤t
->mm
->mmap_sem
);
825 /* force packets onto the sdma hw queue... */
826 if (!list_empty(&list
)) {
828 * lazily clean hw queue. the 4 is a guess of about
829 * how many sdma descriptors a packet will take (it
830 * doesn't have to be perfect).
832 if (ipath_sdma_descq_freecnt(dd
) < ret
* 4) {
833 ipath_user_sdma_hwqueue_clean(dd
);
834 ipath_user_sdma_queue_clean(dd
, pq
);
837 ret
= ipath_user_sdma_push_pkts(dd
, pq
, &list
);
844 if (!list_empty(&list
))
851 if (!list_empty(&list
))
852 ipath_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, &list
);
853 mutex_unlock(&pq
->lock
);
855 return (ret
< 0) ? ret
: npkts
;
858 int ipath_user_sdma_make_progress(struct ipath_devdata
*dd
,
859 struct ipath_user_sdma_queue
*pq
)
863 mutex_lock(&pq
->lock
);
864 ipath_user_sdma_hwqueue_clean(dd
);
865 ret
= ipath_user_sdma_queue_clean(dd
, pq
);
866 mutex_unlock(&pq
->lock
);
871 u32
ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue
*pq
)
873 return pq
->sent_counter
;
876 u32
ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue
*pq
)