2 * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/types.h>
34 #include <linux/device.h>
35 #include <linux/dmapool.h>
36 #include <linux/sched.h>
37 #include <linux/slab.h>
38 #include <linux/list.h>
39 #include <linux/highmem.h>
41 #include <linux/uio.h>
42 #include <linux/rbtree.h>
43 #include <linux/spinlock.h>
44 #include <linux/delay.h>
46 #include "ipath_kernel.h"
47 #include "ipath_user_sdma.h"
49 /* minimum size of header */
50 #define IPATH_USER_SDMA_MIN_HEADER_LENGTH 64
51 /* expected size of headers (for dma_pool) */
52 #define IPATH_USER_SDMA_EXP_HEADER_LENGTH 64
53 /* length mask in PBC (lower 11 bits) */
54 #define IPATH_PBC_LENGTH_MASK ((1 << 11) - 1)
56 struct ipath_user_sdma_pkt
{
57 u8 naddr
; /* dimension of addr (1..3) ... */
58 u32 counter
; /* sdma pkts queued counter for this entry */
59 u64 added
; /* global descq number of entries */
62 u32 offset
; /* offset for kvaddr, addr */
63 u32 length
; /* length in page */
64 u8 put_page
; /* should we put_page? */
65 u8 dma_mapped
; /* is page dma_mapped? */
66 struct page
*page
; /* may be NULL (coherent mem) */
67 void *kvaddr
; /* FIXME: only for pio hack */
69 } addr
[4]; /* max pages, any more and we coalesce */
70 struct list_head list
; /* list element */
73 struct ipath_user_sdma_queue
{
75 * pkts sent to dma engine are queued on this
76 * list head. the type of the elements of this
77 * list are struct ipath_user_sdma_pkt...
79 struct list_head sent
;
81 /* headers with expected length are allocated from here... */
82 char header_cache_name
[64];
83 struct dma_pool
*header_cache
;
85 /* packets are allocated from the slab cache... */
86 char pkt_slab_name
[64];
87 struct kmem_cache
*pkt_slab
;
89 /* as packets go on the queued queue, they are counted... */
94 struct rb_root dma_pages_root
;
96 /* protect everything above... */
100 struct ipath_user_sdma_queue
*
101 ipath_user_sdma_queue_create(struct device
*dev
, int unit
, int port
, int sport
)
103 struct ipath_user_sdma_queue
*pq
=
104 kmalloc(sizeof(struct ipath_user_sdma_queue
), GFP_KERNEL
);
110 pq
->sent_counter
= 0;
111 INIT_LIST_HEAD(&pq
->sent
);
113 mutex_init(&pq
->lock
);
115 snprintf(pq
->pkt_slab_name
, sizeof(pq
->pkt_slab_name
),
116 "ipath-user-sdma-pkts-%u-%02u.%02u", unit
, port
, sport
);
117 pq
->pkt_slab
= kmem_cache_create(pq
->pkt_slab_name
,
118 sizeof(struct ipath_user_sdma_pkt
),
124 snprintf(pq
->header_cache_name
, sizeof(pq
->header_cache_name
),
125 "ipath-user-sdma-headers-%u-%02u.%02u", unit
, port
, sport
);
126 pq
->header_cache
= dma_pool_create(pq
->header_cache_name
,
128 IPATH_USER_SDMA_EXP_HEADER_LENGTH
,
130 if (!pq
->header_cache
)
133 pq
->dma_pages_root
= RB_ROOT
;
138 kmem_cache_destroy(pq
->pkt_slab
);
147 static void ipath_user_sdma_init_frag(struct ipath_user_sdma_pkt
*pkt
,
148 int i
, size_t offset
, size_t len
,
149 int put_page
, int dma_mapped
,
151 void *kvaddr
, dma_addr_t dma_addr
)
153 pkt
->addr
[i
].offset
= offset
;
154 pkt
->addr
[i
].length
= len
;
155 pkt
->addr
[i
].put_page
= put_page
;
156 pkt
->addr
[i
].dma_mapped
= dma_mapped
;
157 pkt
->addr
[i
].page
= page
;
158 pkt
->addr
[i
].kvaddr
= kvaddr
;
159 pkt
->addr
[i
].addr
= dma_addr
;
162 static void ipath_user_sdma_init_header(struct ipath_user_sdma_pkt
*pkt
,
163 u32 counter
, size_t offset
,
164 size_t len
, int dma_mapped
,
166 void *kvaddr
, dma_addr_t dma_addr
)
169 pkt
->counter
= counter
;
170 ipath_user_sdma_init_frag(pkt
, 0, offset
, len
, 0, dma_mapped
, page
,
174 /* we've too many pages in the iovec, coalesce to a single page */
175 static int ipath_user_sdma_coalesce(const struct ipath_devdata
*dd
,
176 struct ipath_user_sdma_pkt
*pkt
,
177 const struct iovec
*iov
,
178 unsigned long niov
) {
180 struct page
*page
= alloc_page(GFP_KERNEL
);
194 for (i
= 0; i
< niov
; i
++) {
197 cfur
= copy_from_user(mpage
,
198 iov
[i
].iov_base
, iov
[i
].iov_len
);
204 mpage
+= iov
[i
].iov_len
;
205 len
+= iov
[i
].iov_len
;
208 dma_addr
= dma_map_page(&dd
->pcidev
->dev
, page
, 0, len
,
210 if (dma_mapping_error(&dd
->pcidev
->dev
, dma_addr
)) {
215 ipath_user_sdma_init_frag(pkt
, 1, 0, len
, 0, 1, page
, mpage_save
,
228 /* how many pages in this iovec element? */
229 static int ipath_user_sdma_num_pages(const struct iovec
*iov
)
231 const unsigned long addr
= (unsigned long) iov
->iov_base
;
232 const unsigned long len
= iov
->iov_len
;
233 const unsigned long spage
= addr
& PAGE_MASK
;
234 const unsigned long epage
= (addr
+ len
- 1) & PAGE_MASK
;
236 return 1 + ((epage
- spage
) >> PAGE_SHIFT
);
239 /* truncate length to page boundary */
240 static int ipath_user_sdma_page_length(unsigned long addr
, unsigned long len
)
242 const unsigned long offset
= addr
& ~PAGE_MASK
;
244 return ((offset
+ len
) > PAGE_SIZE
) ? (PAGE_SIZE
- offset
) : len
;
247 static void ipath_user_sdma_free_pkt_frag(struct device
*dev
,
248 struct ipath_user_sdma_queue
*pq
,
249 struct ipath_user_sdma_pkt
*pkt
,
254 if (pkt
->addr
[i
].page
) {
255 if (pkt
->addr
[i
].dma_mapped
)
261 if (pkt
->addr
[i
].kvaddr
)
262 kunmap(pkt
->addr
[i
].page
);
264 if (pkt
->addr
[i
].put_page
)
265 put_page(pkt
->addr
[i
].page
);
267 __free_page(pkt
->addr
[i
].page
);
268 } else if (pkt
->addr
[i
].kvaddr
)
269 /* free coherent mem from cache... */
270 dma_pool_free(pq
->header_cache
,
271 pkt
->addr
[i
].kvaddr
, pkt
->addr
[i
].addr
);
274 /* return number of pages pinned... */
275 static int ipath_user_sdma_pin_pages(const struct ipath_devdata
*dd
,
276 struct ipath_user_sdma_pkt
*pkt
,
277 unsigned long addr
, int tlen
, int npages
)
279 struct page
*pages
[2];
283 ret
= get_user_pages_fast(addr
, npages
, 0, pages
);
287 for (i
= 0; i
< ret
; i
++)
294 for (j
= 0; j
< npages
; j
++) {
295 /* map the pages... */
297 ipath_user_sdma_page_length(addr
, tlen
);
298 dma_addr_t dma_addr
=
299 dma_map_page(&dd
->pcidev
->dev
,
300 pages
[j
], 0, flen
, DMA_TO_DEVICE
);
301 unsigned long fofs
= addr
& ~PAGE_MASK
;
303 if (dma_mapping_error(&dd
->pcidev
->dev
, dma_addr
)) {
308 ipath_user_sdma_init_frag(pkt
, pkt
->naddr
, fofs
, flen
, 1, 1,
309 pages
[j
], kmap(pages
[j
]),
321 static int ipath_user_sdma_pin_pkt(const struct ipath_devdata
*dd
,
322 struct ipath_user_sdma_queue
*pq
,
323 struct ipath_user_sdma_pkt
*pkt
,
324 const struct iovec
*iov
,
330 for (idx
= 0; idx
< niov
; idx
++) {
331 const int npages
= ipath_user_sdma_num_pages(iov
+ idx
);
332 const unsigned long addr
= (unsigned long) iov
[idx
].iov_base
;
334 ret
= ipath_user_sdma_pin_pages(dd
, pkt
,
335 addr
, iov
[idx
].iov_len
,
344 for (idx
= 0; idx
< pkt
->naddr
; idx
++)
345 ipath_user_sdma_free_pkt_frag(&dd
->pcidev
->dev
, pq
, pkt
, idx
);
351 static int ipath_user_sdma_init_payload(const struct ipath_devdata
*dd
,
352 struct ipath_user_sdma_queue
*pq
,
353 struct ipath_user_sdma_pkt
*pkt
,
354 const struct iovec
*iov
,
355 unsigned long niov
, int npages
)
359 if (npages
>= ARRAY_SIZE(pkt
->addr
))
360 ret
= ipath_user_sdma_coalesce(dd
, pkt
, iov
, niov
);
362 ret
= ipath_user_sdma_pin_pkt(dd
, pq
, pkt
, iov
, niov
);
367 /* free a packet list -- return counter value of last packet */
368 static void ipath_user_sdma_free_pkt_list(struct device
*dev
,
369 struct ipath_user_sdma_queue
*pq
,
370 struct list_head
*list
)
372 struct ipath_user_sdma_pkt
*pkt
, *pkt_next
;
374 list_for_each_entry_safe(pkt
, pkt_next
, list
, list
) {
377 for (i
= 0; i
< pkt
->naddr
; i
++)
378 ipath_user_sdma_free_pkt_frag(dev
, pq
, pkt
, i
);
380 kmem_cache_free(pq
->pkt_slab
, pkt
);
385 * copy headers, coalesce etc -- pq->lock must be held
387 * we queue all the packets to list, returning the
388 * number of bytes total. list must be empty initially,
389 * as, if there is an error we clean it...
391 static int ipath_user_sdma_queue_pkts(const struct ipath_devdata
*dd
,
392 struct ipath_user_sdma_queue
*pq
,
393 struct list_head
*list
,
394 const struct iovec
*iov
,
398 unsigned long idx
= 0;
401 struct page
*page
= NULL
;
404 struct ipath_user_sdma_pkt
*pkt
= NULL
;
407 u32 counter
= pq
->counter
;
410 while (idx
< niov
&& npkts
< maxpkts
) {
411 const unsigned long addr
= (unsigned long) iov
[idx
].iov_base
;
412 const unsigned long idx_save
= idx
;
420 len
= iov
[idx
].iov_len
;
424 pkt
= kmem_cache_alloc(pq
->pkt_slab
, GFP_KERNEL
);
430 if (len
< IPATH_USER_SDMA_MIN_HEADER_LENGTH
||
431 len
> PAGE_SIZE
|| len
& 3 || addr
& 3) {
436 if (len
== IPATH_USER_SDMA_EXP_HEADER_LENGTH
)
437 pbc
= dma_pool_alloc(pq
->header_cache
, GFP_KERNEL
,
443 page
= alloc_page(GFP_KERNEL
);
451 cfur
= copy_from_user(pbc
, iov
[idx
].iov_base
, len
);
458 * this assignment is a bit strange. it's because the
459 * the pbc counts the number of 32 bit words in the full
460 * packet _except_ the first word of the pbc itself...
465 * pktnw computation yields the number of 32 bit words
466 * that the caller has indicated in the PBC. note that
467 * this is one less than the total number of words that
468 * goes to the send DMA engine as the first 32 bit word
469 * of the PBC itself is not counted. Armed with this count,
470 * we can verify that the packet is consistent with the
473 pktnw
= le32_to_cpu(*pbc
) & IPATH_PBC_LENGTH_MASK
;
474 if (pktnw
< pktnwc
|| pktnw
> pktnwc
+ (PAGE_SIZE
>> 2)) {
481 while (pktnwc
< pktnw
&& idx
< niov
) {
482 const size_t slen
= iov
[idx
].iov_len
;
483 const unsigned long faddr
=
484 (unsigned long) iov
[idx
].iov_base
;
486 if (slen
& 3 || faddr
& 3 || !slen
||
493 if ((faddr
& PAGE_MASK
) !=
494 ((faddr
+ slen
- 1) & PAGE_MASK
))
502 if (pktnwc
!= pktnw
) {
508 dma_addr
= dma_map_page(&dd
->pcidev
->dev
,
509 page
, 0, len
, DMA_TO_DEVICE
);
510 if (dma_mapping_error(&dd
->pcidev
->dev
, dma_addr
)) {
518 ipath_user_sdma_init_header(pkt
, counter
, 0, len
, dma_mapped
,
519 page
, pbc
, dma_addr
);
522 ret
= ipath_user_sdma_init_payload(dd
, pq
, pkt
,
532 list_add_tail(&pkt
->list
, list
);
540 dma_unmap_page(&dd
->pcidev
->dev
, dma_addr
, len
, DMA_TO_DEVICE
);
546 dma_pool_free(pq
->header_cache
, pbc
, dma_addr
);
548 kmem_cache_free(pq
->pkt_slab
, pkt
);
550 ipath_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, list
);
555 static void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue
*pq
,
558 pq
->sent_counter
= c
;
561 /* try to clean out queue -- needs pq->lock */
562 static int ipath_user_sdma_queue_clean(const struct ipath_devdata
*dd
,
563 struct ipath_user_sdma_queue
*pq
)
565 struct list_head free_list
;
566 struct ipath_user_sdma_pkt
*pkt
;
567 struct ipath_user_sdma_pkt
*pkt_prev
;
570 INIT_LIST_HEAD(&free_list
);
572 list_for_each_entry_safe(pkt
, pkt_prev
, &pq
->sent
, list
) {
573 s64 descd
= dd
->ipath_sdma_descq_removed
- pkt
->added
;
578 list_move_tail(&pkt
->list
, &free_list
);
580 /* one more packet cleaned */
584 if (!list_empty(&free_list
)) {
587 pkt
= list_entry(free_list
.prev
,
588 struct ipath_user_sdma_pkt
, list
);
589 counter
= pkt
->counter
;
591 ipath_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, &free_list
);
592 ipath_user_sdma_set_complete_counter(pq
, counter
);
598 void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue
*pq
)
603 kmem_cache_destroy(pq
->pkt_slab
);
604 dma_pool_destroy(pq
->header_cache
);
608 /* clean descriptor queue, returns > 0 if some elements cleaned */
609 static int ipath_user_sdma_hwqueue_clean(struct ipath_devdata
*dd
)
614 spin_lock_irqsave(&dd
->ipath_sdma_lock
, flags
);
615 ret
= ipath_sdma_make_progress(dd
);
616 spin_unlock_irqrestore(&dd
->ipath_sdma_lock
, flags
);
621 /* we're in close, drain packets so that we can cleanup successfully... */
622 void ipath_user_sdma_queue_drain(struct ipath_devdata
*dd
,
623 struct ipath_user_sdma_queue
*pq
)
630 for (i
= 0; i
< 100; i
++) {
631 mutex_lock(&pq
->lock
);
632 if (list_empty(&pq
->sent
)) {
633 mutex_unlock(&pq
->lock
);
636 ipath_user_sdma_hwqueue_clean(dd
);
637 ipath_user_sdma_queue_clean(dd
, pq
);
638 mutex_unlock(&pq
->lock
);
642 if (!list_empty(&pq
->sent
)) {
643 struct list_head free_list
;
645 printk(KERN_INFO
"drain: lists not empty: forcing!\n");
646 INIT_LIST_HEAD(&free_list
);
647 mutex_lock(&pq
->lock
);
648 list_splice_init(&pq
->sent
, &free_list
);
649 ipath_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, &free_list
);
650 mutex_unlock(&pq
->lock
);
654 static inline __le64
ipath_sdma_make_desc0(struct ipath_devdata
*dd
,
655 u64 addr
, u64 dwlen
, u64 dwoffset
)
657 return cpu_to_le64(/* SDmaPhyAddr[31:0] */
658 ((addr
& 0xfffffffcULL
) << 32) |
659 /* SDmaGeneration[1:0] */
660 ((dd
->ipath_sdma_generation
& 3ULL) << 30) |
661 /* SDmaDwordCount[10:0] */
662 ((dwlen
& 0x7ffULL
) << 16) |
663 /* SDmaBufOffset[12:2] */
664 (dwoffset
& 0x7ffULL
));
667 static inline __le64
ipath_sdma_make_first_desc0(__le64 descq
)
669 return descq
| cpu_to_le64(1ULL << 12);
672 static inline __le64
ipath_sdma_make_last_desc0(__le64 descq
)
674 /* last */ /* dma head */
675 return descq
| cpu_to_le64(1ULL << 11 | 1ULL << 13);
678 static inline __le64
ipath_sdma_make_desc1(u64 addr
)
680 /* SDmaPhyAddr[47:32] */
681 return cpu_to_le64(addr
>> 32);
684 static void ipath_user_sdma_send_frag(struct ipath_devdata
*dd
,
685 struct ipath_user_sdma_pkt
*pkt
, int idx
,
686 unsigned ofs
, u16 tail
)
688 const u64 addr
= (u64
) pkt
->addr
[idx
].addr
+
689 (u64
) pkt
->addr
[idx
].offset
;
690 const u64 dwlen
= (u64
) pkt
->addr
[idx
].length
/ 4;
694 descqp
= &dd
->ipath_sdma_descq
[tail
].qw
[0];
696 descq0
= ipath_sdma_make_desc0(dd
, addr
, dwlen
, ofs
);
698 descq0
= ipath_sdma_make_first_desc0(descq0
);
699 if (idx
== pkt
->naddr
- 1)
700 descq0
= ipath_sdma_make_last_desc0(descq0
);
703 descqp
[1] = ipath_sdma_make_desc1(addr
);
706 /* pq->lock must be held, get packets on the wire... */
707 static int ipath_user_sdma_push_pkts(struct ipath_devdata
*dd
,
708 struct ipath_user_sdma_queue
*pq
,
709 struct list_head
*pktlist
)
715 if (list_empty(pktlist
))
718 if (unlikely(!(dd
->ipath_flags
& IPATH_LINKACTIVE
)))
721 spin_lock_irqsave(&dd
->ipath_sdma_lock
, flags
);
723 if (unlikely(dd
->ipath_sdma_status
& IPATH_SDMA_ABORT_MASK
)) {
728 tail
= dd
->ipath_sdma_descq_tail
;
729 while (!list_empty(pktlist
)) {
730 struct ipath_user_sdma_pkt
*pkt
=
731 list_entry(pktlist
->next
, struct ipath_user_sdma_pkt
,
737 if (pkt
->naddr
> ipath_sdma_descq_freecnt(dd
))
738 goto unlock_check_tail
;
740 for (i
= 0; i
< pkt
->naddr
; i
++) {
741 ipath_user_sdma_send_frag(dd
, pkt
, i
, ofs
, tail
);
742 ofs
+= pkt
->addr
[i
].length
>> 2;
744 if (++tail
== dd
->ipath_sdma_descq_cnt
) {
746 ++dd
->ipath_sdma_generation
;
750 if ((ofs
<<2) > dd
->ipath_ibmaxlen
) {
751 ipath_dbg("packet size %X > ibmax %X, fail\n",
752 ofs
<<2, dd
->ipath_ibmaxlen
);
758 * if the packet is >= 2KB mtu equivalent, we have to use
759 * the large buffers, and have to mark each descriptor as
760 * part of a large buffer packet.
762 if (ofs
>= IPATH_SMALLBUF_DWORDS
) {
763 for (i
= 0; i
< pkt
->naddr
; i
++) {
764 dd
->ipath_sdma_descq
[dtail
].qw
[0] |=
765 cpu_to_le64(1ULL << 14);
766 if (++dtail
== dd
->ipath_sdma_descq_cnt
)
771 dd
->ipath_sdma_descq_added
+= pkt
->naddr
;
772 pkt
->added
= dd
->ipath_sdma_descq_added
;
773 list_move_tail(&pkt
->list
, &pq
->sent
);
778 /* advance the tail on the chip if necessary */
779 if (dd
->ipath_sdma_descq_tail
!= tail
) {
781 ipath_write_kreg(dd
, dd
->ipath_kregs
->kr_senddmatail
, tail
);
782 dd
->ipath_sdma_descq_tail
= tail
;
786 spin_unlock_irqrestore(&dd
->ipath_sdma_lock
, flags
);
791 int ipath_user_sdma_writev(struct ipath_devdata
*dd
,
792 struct ipath_user_sdma_queue
*pq
,
793 const struct iovec
*iov
,
797 struct list_head list
;
800 INIT_LIST_HEAD(&list
);
802 mutex_lock(&pq
->lock
);
804 if (dd
->ipath_sdma_descq_added
!= dd
->ipath_sdma_descq_removed
) {
805 ipath_user_sdma_hwqueue_clean(dd
);
806 ipath_user_sdma_queue_clean(dd
, pq
);
812 ret
= ipath_user_sdma_queue_pkts(dd
, pq
, &list
, iov
, dim
, mxp
);
820 /* force packets onto the sdma hw queue... */
821 if (!list_empty(&list
)) {
823 * lazily clean hw queue. the 4 is a guess of about
824 * how many sdma descriptors a packet will take (it
825 * doesn't have to be perfect).
827 if (ipath_sdma_descq_freecnt(dd
) < ret
* 4) {
828 ipath_user_sdma_hwqueue_clean(dd
);
829 ipath_user_sdma_queue_clean(dd
, pq
);
832 ret
= ipath_user_sdma_push_pkts(dd
, pq
, &list
);
839 if (!list_empty(&list
))
846 if (!list_empty(&list
))
847 ipath_user_sdma_free_pkt_list(&dd
->pcidev
->dev
, pq
, &list
);
848 mutex_unlock(&pq
->lock
);
850 return (ret
< 0) ? ret
: npkts
;
853 int ipath_user_sdma_make_progress(struct ipath_devdata
*dd
,
854 struct ipath_user_sdma_queue
*pq
)
858 mutex_lock(&pq
->lock
);
859 ipath_user_sdma_hwqueue_clean(dd
);
860 ret
= ipath_user_sdma_queue_clean(dd
, pq
);
861 mutex_unlock(&pq
->lock
);
866 u32
ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue
*pq
)
868 return pq
->sent_counter
;
871 u32
ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue
*pq
)