2 * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #include <rdma/ib_mad.h>
36 #include <rdma/ib_user_verbs.h>
38 #include <linux/module.h>
39 #include <linux/utsname.h>
40 #include <linux/rculist.h>
42 #include <linux/random.h>
43 #include <linux/vmalloc.h>
44 #include <rdma/rdma_vt.h>
47 #include "qib_common.h"
49 static unsigned int ib_qib_qp_table_size
= 256;
50 module_param_named(qp_table_size
, ib_qib_qp_table_size
, uint
, S_IRUGO
);
51 MODULE_PARM_DESC(qp_table_size
, "QP table size");
53 static unsigned int qib_lkey_table_size
= 16;
54 module_param_named(lkey_table_size
, qib_lkey_table_size
, uint
,
56 MODULE_PARM_DESC(lkey_table_size
,
57 "LKEY table size in bits (2^n, 1 <= n <= 23)");
59 static unsigned int ib_qib_max_pds
= 0xFFFF;
60 module_param_named(max_pds
, ib_qib_max_pds
, uint
, S_IRUGO
);
61 MODULE_PARM_DESC(max_pds
,
62 "Maximum number of protection domains to support");
64 static unsigned int ib_qib_max_ahs
= 0xFFFF;
65 module_param_named(max_ahs
, ib_qib_max_ahs
, uint
, S_IRUGO
);
66 MODULE_PARM_DESC(max_ahs
, "Maximum number of address handles to support");
68 unsigned int ib_qib_max_cqes
= 0x2FFFF;
69 module_param_named(max_cqes
, ib_qib_max_cqes
, uint
, S_IRUGO
);
70 MODULE_PARM_DESC(max_cqes
,
71 "Maximum number of completion queue entries to support");
73 unsigned int ib_qib_max_cqs
= 0x1FFFF;
74 module_param_named(max_cqs
, ib_qib_max_cqs
, uint
, S_IRUGO
);
75 MODULE_PARM_DESC(max_cqs
, "Maximum number of completion queues to support");
77 unsigned int ib_qib_max_qp_wrs
= 0x3FFF;
78 module_param_named(max_qp_wrs
, ib_qib_max_qp_wrs
, uint
, S_IRUGO
);
79 MODULE_PARM_DESC(max_qp_wrs
, "Maximum number of QP WRs to support");
81 unsigned int ib_qib_max_qps
= 16384;
82 module_param_named(max_qps
, ib_qib_max_qps
, uint
, S_IRUGO
);
83 MODULE_PARM_DESC(max_qps
, "Maximum number of QPs to support");
85 unsigned int ib_qib_max_sges
= 0x60;
86 module_param_named(max_sges
, ib_qib_max_sges
, uint
, S_IRUGO
);
87 MODULE_PARM_DESC(max_sges
, "Maximum number of SGEs to support");
89 unsigned int ib_qib_max_mcast_grps
= 16384;
90 module_param_named(max_mcast_grps
, ib_qib_max_mcast_grps
, uint
, S_IRUGO
);
91 MODULE_PARM_DESC(max_mcast_grps
,
92 "Maximum number of multicast groups to support");
94 unsigned int ib_qib_max_mcast_qp_attached
= 16;
95 module_param_named(max_mcast_qp_attached
, ib_qib_max_mcast_qp_attached
,
97 MODULE_PARM_DESC(max_mcast_qp_attached
,
98 "Maximum number of attached QPs to support");
100 unsigned int ib_qib_max_srqs
= 1024;
101 module_param_named(max_srqs
, ib_qib_max_srqs
, uint
, S_IRUGO
);
102 MODULE_PARM_DESC(max_srqs
, "Maximum number of SRQs to support");
104 unsigned int ib_qib_max_srq_sges
= 128;
105 module_param_named(max_srq_sges
, ib_qib_max_srq_sges
, uint
, S_IRUGO
);
106 MODULE_PARM_DESC(max_srq_sges
, "Maximum number of SRQ SGEs to support");
108 unsigned int ib_qib_max_srq_wrs
= 0x1FFFF;
109 module_param_named(max_srq_wrs
, ib_qib_max_srq_wrs
, uint
, S_IRUGO
);
110 MODULE_PARM_DESC(max_srq_wrs
, "Maximum number of SRQ WRs support");
112 static unsigned int ib_qib_disable_sma
;
113 module_param_named(disable_sma
, ib_qib_disable_sma
, uint
, S_IWUSR
| S_IRUGO
);
114 MODULE_PARM_DESC(disable_sma
, "Disable the SMA");
117 * Translate ib_wr_opcode into ib_wc_opcode.
119 const enum ib_wc_opcode ib_qib_wc_opcode
[] = {
120 [IB_WR_RDMA_WRITE
] = IB_WC_RDMA_WRITE
,
121 [IB_WR_RDMA_WRITE_WITH_IMM
] = IB_WC_RDMA_WRITE
,
122 [IB_WR_SEND
] = IB_WC_SEND
,
123 [IB_WR_SEND_WITH_IMM
] = IB_WC_SEND
,
124 [IB_WR_RDMA_READ
] = IB_WC_RDMA_READ
,
125 [IB_WR_ATOMIC_CMP_AND_SWP
] = IB_WC_COMP_SWAP
,
126 [IB_WR_ATOMIC_FETCH_AND_ADD
] = IB_WC_FETCH_ADD
132 __be64 ib_qib_sys_image_guid
;
135 * qib_copy_sge - copy data to SGE memory
137 * @data: the data to copy
138 * @length: the length of the data
140 void qib_copy_sge(struct rvt_sge_state
*ss
, void *data
, u32 length
, int release
)
142 struct rvt_sge
*sge
= &ss
->sge
;
145 u32 len
= sge
->length
;
149 if (len
> sge
->sge_length
)
150 len
= sge
->sge_length
;
152 memcpy(sge
->vaddr
, data
, len
);
155 sge
->sge_length
-= len
;
156 if (sge
->sge_length
== 0) {
160 *sge
= *ss
->sg_list
++;
161 } else if (sge
->length
== 0 && sge
->mr
->lkey
) {
162 if (++sge
->n
>= RVT_SEGSZ
) {
163 if (++sge
->m
>= sge
->mr
->mapsz
)
168 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
170 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
178 * qib_skip_sge - skip over SGE memory - XXX almost dup of prev func
180 * @length: the number of bytes to skip
182 void qib_skip_sge(struct rvt_sge_state
*ss
, u32 length
, int release
)
184 struct rvt_sge
*sge
= &ss
->sge
;
187 u32 len
= sge
->length
;
191 if (len
> sge
->sge_length
)
192 len
= sge
->sge_length
;
196 sge
->sge_length
-= len
;
197 if (sge
->sge_length
== 0) {
201 *sge
= *ss
->sg_list
++;
202 } else if (sge
->length
== 0 && sge
->mr
->lkey
) {
203 if (++sge
->n
>= RVT_SEGSZ
) {
204 if (++sge
->m
>= sge
->mr
->mapsz
)
209 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
211 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
218 * Count the number of DMA descriptors needed to send length bytes of data.
219 * Don't modify the qib_sge_state to get the count.
220 * Return zero if any of the segments is not aligned.
222 static u32
qib_count_sge(struct rvt_sge_state
*ss
, u32 length
)
224 struct rvt_sge
*sg_list
= ss
->sg_list
;
225 struct rvt_sge sge
= ss
->sge
;
226 u8 num_sge
= ss
->num_sge
;
227 u32 ndesc
= 1; /* count the header */
230 u32 len
= sge
.length
;
234 if (len
> sge
.sge_length
)
235 len
= sge
.sge_length
;
237 if (((long) sge
.vaddr
& (sizeof(u32
) - 1)) ||
238 (len
!= length
&& (len
& (sizeof(u32
) - 1)))) {
245 sge
.sge_length
-= len
;
246 if (sge
.sge_length
== 0) {
249 } else if (sge
.length
== 0 && sge
.mr
->lkey
) {
250 if (++sge
.n
>= RVT_SEGSZ
) {
251 if (++sge
.m
>= sge
.mr
->mapsz
)
256 sge
.mr
->map
[sge
.m
]->segs
[sge
.n
].vaddr
;
258 sge
.mr
->map
[sge
.m
]->segs
[sge
.n
].length
;
266 * Copy from the SGEs to the data buffer.
268 static void qib_copy_from_sge(void *data
, struct rvt_sge_state
*ss
, u32 length
)
270 struct rvt_sge
*sge
= &ss
->sge
;
273 u32 len
= sge
->length
;
277 if (len
> sge
->sge_length
)
278 len
= sge
->sge_length
;
280 memcpy(data
, sge
->vaddr
, len
);
283 sge
->sge_length
-= len
;
284 if (sge
->sge_length
== 0) {
286 *sge
= *ss
->sg_list
++;
287 } else if (sge
->length
== 0 && sge
->mr
->lkey
) {
288 if (++sge
->n
>= RVT_SEGSZ
) {
289 if (++sge
->m
>= sge
->mr
->mapsz
)
294 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
296 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
304 * qib_qp_rcv - processing an incoming packet on a QP
305 * @rcd: the context pointer
306 * @hdr: the packet header
307 * @has_grh: true if the packet has a GRH
308 * @data: the packet data
309 * @tlen: the packet length
310 * @qp: the QP the packet came on
312 * This is called from qib_ib_rcv() to process an incoming packet
314 * Called at interrupt level.
316 static void qib_qp_rcv(struct qib_ctxtdata
*rcd
, struct qib_ib_header
*hdr
,
317 int has_grh
, void *data
, u32 tlen
, struct rvt_qp
*qp
)
319 struct qib_ibport
*ibp
= &rcd
->ppd
->ibport_data
;
321 spin_lock(&qp
->r_lock
);
323 /* Check for valid receive state. */
324 if (!(ib_rvt_state_ops
[qp
->state
] & RVT_PROCESS_RECV_OK
)) {
325 ibp
->rvp
.n_pkt_drops
++;
329 switch (qp
->ibqp
.qp_type
) {
332 if (ib_qib_disable_sma
)
336 qib_ud_rcv(ibp
, hdr
, has_grh
, data
, tlen
, qp
);
340 qib_rc_rcv(rcd
, hdr
, has_grh
, data
, tlen
, qp
);
344 qib_uc_rcv(ibp
, hdr
, has_grh
, data
, tlen
, qp
);
352 spin_unlock(&qp
->r_lock
);
356 * qib_ib_rcv - process an incoming packet
357 * @rcd: the context pointer
358 * @rhdr: the header of the packet
359 * @data: the packet payload
360 * @tlen: the packet length
362 * This is called from qib_kreceive() to process an incoming packet at
363 * interrupt level. Tlen is the length of the header + data + CRC in bytes.
365 void qib_ib_rcv(struct qib_ctxtdata
*rcd
, void *rhdr
, void *data
, u32 tlen
)
367 struct qib_pportdata
*ppd
= rcd
->ppd
;
368 struct qib_ibport
*ibp
= &ppd
->ibport_data
;
369 struct qib_ib_header
*hdr
= rhdr
;
370 struct qib_devdata
*dd
= ppd
->dd
;
371 struct rvt_dev_info
*rdi
= &dd
->verbs_dev
.rdi
;
372 struct qib_other_headers
*ohdr
;
379 /* 24 == LRH+BTH+CRC */
380 if (unlikely(tlen
< 24))
383 /* Check for a valid destination LID (see ch. 7.11.1). */
384 lid
= be16_to_cpu(hdr
->lrh
[1]);
385 if (lid
< be16_to_cpu(IB_MULTICAST_LID_BASE
)) {
386 lid
&= ~((1 << ppd
->lmc
) - 1);
387 if (unlikely(lid
!= ppd
->lid
))
392 lnh
= be16_to_cpu(hdr
->lrh
[0]) & 3;
393 if (lnh
== QIB_LRH_BTH
)
395 else if (lnh
== QIB_LRH_GRH
) {
398 ohdr
= &hdr
->u
.l
.oth
;
399 if (hdr
->u
.l
.grh
.next_hdr
!= IB_GRH_NEXT_HDR
)
401 vtf
= be32_to_cpu(hdr
->u
.l
.grh
.version_tclass_flow
);
402 if ((vtf
>> IB_GRH_VERSION_SHIFT
) != IB_GRH_VERSION
)
407 opcode
= (be32_to_cpu(ohdr
->bth
[0]) >> 24) & 0x7f;
408 #ifdef CONFIG_DEBUG_FS
409 rcd
->opstats
->stats
[opcode
].n_bytes
+= tlen
;
410 rcd
->opstats
->stats
[opcode
].n_packets
++;
413 /* Get the destination QP number. */
414 qp_num
= be32_to_cpu(ohdr
->bth
[1]) & RVT_QPN_MASK
;
415 if (qp_num
== QIB_MULTICAST_QPN
) {
416 struct rvt_mcast
*mcast
;
417 struct rvt_mcast_qp
*p
;
419 if (lnh
!= QIB_LRH_GRH
)
421 mcast
= rvt_mcast_find(&ibp
->rvp
, &hdr
->u
.l
.grh
.dgid
);
424 this_cpu_inc(ibp
->pmastats
->n_multicast_rcv
);
425 list_for_each_entry_rcu(p
, &mcast
->qp_list
, list
)
426 qib_qp_rcv(rcd
, hdr
, 1, data
, tlen
, p
->qp
);
428 * Notify rvt_multicast_detach() if it is waiting for us
431 if (atomic_dec_return(&mcast
->refcount
) <= 1)
432 wake_up(&mcast
->wait
);
435 qp
= rvt_lookup_qpn(rdi
, &ibp
->rvp
, qp_num
);
440 this_cpu_inc(ibp
->pmastats
->n_unicast_rcv
);
441 qib_qp_rcv(rcd
, hdr
, lnh
== QIB_LRH_GRH
, data
, tlen
, qp
);
447 ibp
->rvp
.n_pkt_drops
++;
451 * This is called from a timer to check for QPs
452 * which need kernel memory in order to send a packet.
454 static void mem_timer(unsigned long data
)
456 struct qib_ibdev
*dev
= (struct qib_ibdev
*) data
;
457 struct list_head
*list
= &dev
->memwait
;
458 struct rvt_qp
*qp
= NULL
;
459 struct qib_qp_priv
*priv
= NULL
;
462 spin_lock_irqsave(&dev
->rdi
.pending_lock
, flags
);
463 if (!list_empty(list
)) {
464 priv
= list_entry(list
->next
, struct qib_qp_priv
, iowait
);
466 list_del_init(&priv
->iowait
);
467 atomic_inc(&qp
->refcount
);
468 if (!list_empty(list
))
469 mod_timer(&dev
->mem_timer
, jiffies
+ 1);
471 spin_unlock_irqrestore(&dev
->rdi
.pending_lock
, flags
);
474 spin_lock_irqsave(&qp
->s_lock
, flags
);
475 if (qp
->s_flags
& RVT_S_WAIT_KMEM
) {
476 qp
->s_flags
&= ~RVT_S_WAIT_KMEM
;
477 qib_schedule_send(qp
);
479 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
480 if (atomic_dec_and_test(&qp
->refcount
))
485 static void update_sge(struct rvt_sge_state
*ss
, u32 length
)
487 struct rvt_sge
*sge
= &ss
->sge
;
489 sge
->vaddr
+= length
;
490 sge
->length
-= length
;
491 sge
->sge_length
-= length
;
492 if (sge
->sge_length
== 0) {
494 *sge
= *ss
->sg_list
++;
495 } else if (sge
->length
== 0 && sge
->mr
->lkey
) {
496 if (++sge
->n
>= RVT_SEGSZ
) {
497 if (++sge
->m
>= sge
->mr
->mapsz
)
501 sge
->vaddr
= sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
502 sge
->length
= sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
506 #ifdef __LITTLE_ENDIAN
507 static inline u32
get_upper_bits(u32 data
, u32 shift
)
509 return data
>> shift
;
512 static inline u32
set_upper_bits(u32 data
, u32 shift
)
514 return data
<< shift
;
517 static inline u32
clear_upper_bytes(u32 data
, u32 n
, u32 off
)
519 data
<<= ((sizeof(u32
) - n
) * BITS_PER_BYTE
);
520 data
>>= ((sizeof(u32
) - n
- off
) * BITS_PER_BYTE
);
524 static inline u32
get_upper_bits(u32 data
, u32 shift
)
526 return data
<< shift
;
529 static inline u32
set_upper_bits(u32 data
, u32 shift
)
531 return data
>> shift
;
534 static inline u32
clear_upper_bytes(u32 data
, u32 n
, u32 off
)
536 data
>>= ((sizeof(u32
) - n
) * BITS_PER_BYTE
);
537 data
<<= ((sizeof(u32
) - n
- off
) * BITS_PER_BYTE
);
542 static void copy_io(u32 __iomem
*piobuf
, struct rvt_sge_state
*ss
,
543 u32 length
, unsigned flush_wc
)
550 u32 len
= ss
->sge
.length
;
555 if (len
> ss
->sge
.sge_length
)
556 len
= ss
->sge
.sge_length
;
558 /* If the source address is not aligned, try to align it. */
559 off
= (unsigned long)ss
->sge
.vaddr
& (sizeof(u32
) - 1);
561 u32
*addr
= (u32
*)((unsigned long)ss
->sge
.vaddr
&
563 u32 v
= get_upper_bits(*addr
, off
* BITS_PER_BYTE
);
566 y
= sizeof(u32
) - off
;
569 if (len
+ extra
>= sizeof(u32
)) {
570 data
|= set_upper_bits(v
, extra
*
572 len
= sizeof(u32
) - extra
;
577 __raw_writel(data
, piobuf
);
582 /* Clear unused upper bytes */
583 data
|= clear_upper_bytes(v
, len
, extra
);
591 /* Source address is aligned. */
592 u32
*addr
= (u32
*) ss
->sge
.vaddr
;
593 int shift
= extra
* BITS_PER_BYTE
;
594 int ushift
= 32 - shift
;
597 while (l
>= sizeof(u32
)) {
600 data
|= set_upper_bits(v
, shift
);
601 __raw_writel(data
, piobuf
);
602 data
= get_upper_bits(v
, ushift
);
608 * We still have 'extra' number of bytes leftover.
613 if (l
+ extra
>= sizeof(u32
)) {
614 data
|= set_upper_bits(v
, shift
);
615 len
-= l
+ extra
- sizeof(u32
);
620 __raw_writel(data
, piobuf
);
625 /* Clear unused upper bytes */
626 data
|= clear_upper_bytes(v
, l
, extra
);
633 } else if (len
== length
) {
637 } else if (len
== length
) {
641 * Need to round up for the last dword in the
645 qib_pio_copy(piobuf
, ss
->sge
.vaddr
, w
- 1);
647 last
= ((u32
*) ss
->sge
.vaddr
)[w
- 1];
652 qib_pio_copy(piobuf
, ss
->sge
.vaddr
, w
);
655 extra
= len
& (sizeof(u32
) - 1);
657 u32 v
= ((u32
*) ss
->sge
.vaddr
)[w
];
659 /* Clear unused upper bytes */
660 data
= clear_upper_bytes(v
, extra
, 0);
666 /* Update address before sending packet. */
667 update_sge(ss
, length
);
669 /* must flush early everything before trigger word */
671 __raw_writel(last
, piobuf
);
672 /* be sure trigger word is written */
675 __raw_writel(last
, piobuf
);
678 static noinline
struct qib_verbs_txreq
*__get_txreq(struct qib_ibdev
*dev
,
681 struct qib_qp_priv
*priv
= qp
->priv
;
682 struct qib_verbs_txreq
*tx
;
685 spin_lock_irqsave(&qp
->s_lock
, flags
);
686 spin_lock(&dev
->rdi
.pending_lock
);
688 if (!list_empty(&dev
->txreq_free
)) {
689 struct list_head
*l
= dev
->txreq_free
.next
;
692 spin_unlock(&dev
->rdi
.pending_lock
);
693 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
694 tx
= list_entry(l
, struct qib_verbs_txreq
, txreq
.list
);
696 if (ib_rvt_state_ops
[qp
->state
] & RVT_PROCESS_RECV_OK
&&
697 list_empty(&priv
->iowait
)) {
699 qp
->s_flags
|= RVT_S_WAIT_TX
;
700 list_add_tail(&priv
->iowait
, &dev
->txwait
);
702 qp
->s_flags
&= ~RVT_S_BUSY
;
703 spin_unlock(&dev
->rdi
.pending_lock
);
704 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
705 tx
= ERR_PTR(-EBUSY
);
710 static inline struct qib_verbs_txreq
*get_txreq(struct qib_ibdev
*dev
,
713 struct qib_verbs_txreq
*tx
;
716 spin_lock_irqsave(&dev
->rdi
.pending_lock
, flags
);
717 /* assume the list non empty */
718 if (likely(!list_empty(&dev
->txreq_free
))) {
719 struct list_head
*l
= dev
->txreq_free
.next
;
722 spin_unlock_irqrestore(&dev
->rdi
.pending_lock
, flags
);
723 tx
= list_entry(l
, struct qib_verbs_txreq
, txreq
.list
);
725 /* call slow path to get the extra lock */
726 spin_unlock_irqrestore(&dev
->rdi
.pending_lock
, flags
);
727 tx
= __get_txreq(dev
, qp
);
732 void qib_put_txreq(struct qib_verbs_txreq
*tx
)
734 struct qib_ibdev
*dev
;
736 struct qib_qp_priv
*priv
;
740 dev
= to_idev(qp
->ibqp
.device
);
746 if (tx
->txreq
.flags
& QIB_SDMA_TXREQ_F_FREEBUF
) {
747 tx
->txreq
.flags
&= ~QIB_SDMA_TXREQ_F_FREEBUF
;
748 dma_unmap_single(&dd_from_dev(dev
)->pcidev
->dev
,
749 tx
->txreq
.addr
, tx
->hdr_dwords
<< 2,
751 kfree(tx
->align_buf
);
754 spin_lock_irqsave(&dev
->rdi
.pending_lock
, flags
);
756 /* Put struct back on free list */
757 list_add(&tx
->txreq
.list
, &dev
->txreq_free
);
759 if (!list_empty(&dev
->txwait
)) {
760 /* Wake up first QP wanting a free struct */
761 priv
= list_entry(dev
->txwait
.next
, struct qib_qp_priv
,
764 list_del_init(&priv
->iowait
);
765 atomic_inc(&qp
->refcount
);
766 spin_unlock_irqrestore(&dev
->rdi
.pending_lock
, flags
);
768 spin_lock_irqsave(&qp
->s_lock
, flags
);
769 if (qp
->s_flags
& RVT_S_WAIT_TX
) {
770 qp
->s_flags
&= ~RVT_S_WAIT_TX
;
771 qib_schedule_send(qp
);
773 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
775 if (atomic_dec_and_test(&qp
->refcount
))
778 spin_unlock_irqrestore(&dev
->rdi
.pending_lock
, flags
);
782 * This is called when there are send DMA descriptors that might be
785 * This is called with ppd->sdma_lock held.
787 void qib_verbs_sdma_desc_avail(struct qib_pportdata
*ppd
, unsigned avail
)
789 struct rvt_qp
*qp
, *nqp
;
790 struct qib_qp_priv
*qpp
, *nqpp
;
791 struct rvt_qp
*qps
[20];
792 struct qib_ibdev
*dev
;
796 dev
= &ppd
->dd
->verbs_dev
;
797 spin_lock(&dev
->rdi
.pending_lock
);
799 /* Search wait list for first QP wanting DMA descriptors. */
800 list_for_each_entry_safe(qpp
, nqpp
, &dev
->dmawait
, iowait
) {
803 if (qp
->port_num
!= ppd
->port
)
805 if (n
== ARRAY_SIZE(qps
))
807 if (qpp
->s_tx
->txreq
.sg_count
> avail
)
809 avail
-= qpp
->s_tx
->txreq
.sg_count
;
810 list_del_init(&qpp
->iowait
);
811 atomic_inc(&qp
->refcount
);
815 spin_unlock(&dev
->rdi
.pending_lock
);
817 for (i
= 0; i
< n
; i
++) {
819 spin_lock(&qp
->s_lock
);
820 if (qp
->s_flags
& RVT_S_WAIT_DMA_DESC
) {
821 qp
->s_flags
&= ~RVT_S_WAIT_DMA_DESC
;
822 qib_schedule_send(qp
);
824 spin_unlock(&qp
->s_lock
);
825 if (atomic_dec_and_test(&qp
->refcount
))
831 * This is called with ppd->sdma_lock held.
833 static void sdma_complete(struct qib_sdma_txreq
*cookie
, int status
)
835 struct qib_verbs_txreq
*tx
=
836 container_of(cookie
, struct qib_verbs_txreq
, txreq
);
837 struct rvt_qp
*qp
= tx
->qp
;
838 struct qib_qp_priv
*priv
= qp
->priv
;
840 spin_lock(&qp
->s_lock
);
842 qib_send_complete(qp
, tx
->wqe
, IB_WC_SUCCESS
);
843 else if (qp
->ibqp
.qp_type
== IB_QPT_RC
) {
844 struct qib_ib_header
*hdr
;
846 if (tx
->txreq
.flags
& QIB_SDMA_TXREQ_F_FREEBUF
)
847 hdr
= &tx
->align_buf
->hdr
;
849 struct qib_ibdev
*dev
= to_idev(qp
->ibqp
.device
);
851 hdr
= &dev
->pio_hdrs
[tx
->hdr_inx
].hdr
;
853 qib_rc_send_complete(qp
, hdr
);
855 if (atomic_dec_and_test(&priv
->s_dma_busy
)) {
856 if (qp
->state
== IB_QPS_RESET
)
857 wake_up(&priv
->wait_dma
);
858 else if (qp
->s_flags
& RVT_S_WAIT_DMA
) {
859 qp
->s_flags
&= ~RVT_S_WAIT_DMA
;
860 qib_schedule_send(qp
);
863 spin_unlock(&qp
->s_lock
);
868 static int wait_kmem(struct qib_ibdev
*dev
, struct rvt_qp
*qp
)
870 struct qib_qp_priv
*priv
= qp
->priv
;
874 spin_lock_irqsave(&qp
->s_lock
, flags
);
875 if (ib_rvt_state_ops
[qp
->state
] & RVT_PROCESS_RECV_OK
) {
876 spin_lock(&dev
->rdi
.pending_lock
);
877 if (list_empty(&priv
->iowait
)) {
878 if (list_empty(&dev
->memwait
))
879 mod_timer(&dev
->mem_timer
, jiffies
+ 1);
880 qp
->s_flags
|= RVT_S_WAIT_KMEM
;
881 list_add_tail(&priv
->iowait
, &dev
->memwait
);
883 spin_unlock(&dev
->rdi
.pending_lock
);
884 qp
->s_flags
&= ~RVT_S_BUSY
;
887 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
892 static int qib_verbs_send_dma(struct rvt_qp
*qp
, struct qib_ib_header
*hdr
,
893 u32 hdrwords
, struct rvt_sge_state
*ss
, u32 len
,
894 u32 plen
, u32 dwords
)
896 struct qib_qp_priv
*priv
= qp
->priv
;
897 struct qib_ibdev
*dev
= to_idev(qp
->ibqp
.device
);
898 struct qib_devdata
*dd
= dd_from_dev(dev
);
899 struct qib_ibport
*ibp
= to_iport(qp
->ibqp
.device
, qp
->port_num
);
900 struct qib_pportdata
*ppd
= ppd_from_ibp(ibp
);
901 struct qib_verbs_txreq
*tx
;
902 struct qib_pio_header
*phdr
;
910 /* resend previously constructed packet */
911 ret
= qib_sdma_verbs_send(ppd
, tx
->ss
, tx
->dwords
, tx
);
915 tx
= get_txreq(dev
, qp
);
919 control
= dd
->f_setpbc_control(ppd
, plen
, qp
->s_srate
,
920 be16_to_cpu(hdr
->lrh
[0]) >> 12);
923 tx
->mr
= qp
->s_rdma_mr
;
925 qp
->s_rdma_mr
= NULL
;
926 tx
->txreq
.callback
= sdma_complete
;
927 if (dd
->flags
& QIB_HAS_SDMA_TIMEOUT
)
928 tx
->txreq
.flags
= QIB_SDMA_TXREQ_F_HEADTOHOST
;
930 tx
->txreq
.flags
= QIB_SDMA_TXREQ_F_INTREQ
;
931 if (plen
+ 1 > dd
->piosize2kmax_dwords
)
932 tx
->txreq
.flags
|= QIB_SDMA_TXREQ_F_USELARGEBUF
;
936 * Don't try to DMA if it takes more descriptors than
939 ndesc
= qib_count_sge(ss
, len
);
940 if (ndesc
>= ppd
->sdma_descq_cnt
)
945 phdr
= &dev
->pio_hdrs
[tx
->hdr_inx
];
946 phdr
->pbc
[0] = cpu_to_le32(plen
);
947 phdr
->pbc
[1] = cpu_to_le32(control
);
948 memcpy(&phdr
->hdr
, hdr
, hdrwords
<< 2);
949 tx
->txreq
.flags
|= QIB_SDMA_TXREQ_F_FREEDESC
;
950 tx
->txreq
.sg_count
= ndesc
;
951 tx
->txreq
.addr
= dev
->pio_hdrs_phys
+
952 tx
->hdr_inx
* sizeof(struct qib_pio_header
);
953 tx
->hdr_dwords
= hdrwords
+ 2; /* add PBC length */
954 ret
= qib_sdma_verbs_send(ppd
, ss
, dwords
, tx
);
958 /* Allocate a buffer and copy the header and payload to it. */
959 tx
->hdr_dwords
= plen
+ 1;
960 phdr
= kmalloc(tx
->hdr_dwords
<< 2, GFP_ATOMIC
);
963 phdr
->pbc
[0] = cpu_to_le32(plen
);
964 phdr
->pbc
[1] = cpu_to_le32(control
);
965 memcpy(&phdr
->hdr
, hdr
, hdrwords
<< 2);
966 qib_copy_from_sge((u32
*) &phdr
->hdr
+ hdrwords
, ss
, len
);
968 tx
->txreq
.addr
= dma_map_single(&dd
->pcidev
->dev
, phdr
,
969 tx
->hdr_dwords
<< 2, DMA_TO_DEVICE
);
970 if (dma_mapping_error(&dd
->pcidev
->dev
, tx
->txreq
.addr
))
972 tx
->align_buf
= phdr
;
973 tx
->txreq
.flags
|= QIB_SDMA_TXREQ_F_FREEBUF
;
974 tx
->txreq
.sg_count
= 1;
975 ret
= qib_sdma_verbs_send(ppd
, NULL
, 0, tx
);
982 ret
= wait_kmem(dev
, qp
);
984 ibp
->rvp
.n_unaligned
++;
993 * If we are now in the error state, return zero to flush the
996 static int no_bufs_available(struct rvt_qp
*qp
)
998 struct qib_qp_priv
*priv
= qp
->priv
;
999 struct qib_ibdev
*dev
= to_idev(qp
->ibqp
.device
);
1000 struct qib_devdata
*dd
;
1001 unsigned long flags
;
1005 * Note that as soon as want_buffer() is called and
1006 * possibly before it returns, qib_ib_piobufavail()
1007 * could be called. Therefore, put QP on the I/O wait list before
1008 * enabling the PIO avail interrupt.
1010 spin_lock_irqsave(&qp
->s_lock
, flags
);
1011 if (ib_rvt_state_ops
[qp
->state
] & RVT_PROCESS_RECV_OK
) {
1012 spin_lock(&dev
->rdi
.pending_lock
);
1013 if (list_empty(&priv
->iowait
)) {
1015 qp
->s_flags
|= RVT_S_WAIT_PIO
;
1016 list_add_tail(&priv
->iowait
, &dev
->piowait
);
1017 dd
= dd_from_dev(dev
);
1018 dd
->f_wantpiobuf_intr(dd
, 1);
1020 spin_unlock(&dev
->rdi
.pending_lock
);
1021 qp
->s_flags
&= ~RVT_S_BUSY
;
1024 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
1028 static int qib_verbs_send_pio(struct rvt_qp
*qp
, struct qib_ib_header
*ibhdr
,
1029 u32 hdrwords
, struct rvt_sge_state
*ss
, u32 len
,
1030 u32 plen
, u32 dwords
)
1032 struct qib_devdata
*dd
= dd_from_ibdev(qp
->ibqp
.device
);
1033 struct qib_pportdata
*ppd
= dd
->pport
+ qp
->port_num
- 1;
1034 u32
*hdr
= (u32
*) ibhdr
;
1035 u32 __iomem
*piobuf_orig
;
1036 u32 __iomem
*piobuf
;
1038 unsigned long flags
;
1043 control
= dd
->f_setpbc_control(ppd
, plen
, qp
->s_srate
,
1044 be16_to_cpu(ibhdr
->lrh
[0]) >> 12);
1045 pbc
= ((u64
) control
<< 32) | plen
;
1046 piobuf
= dd
->f_getsendbuf(ppd
, pbc
, &pbufn
);
1047 if (unlikely(piobuf
== NULL
))
1048 return no_bufs_available(qp
);
1052 * We have to flush after the PBC for correctness on some cpus
1053 * or WC buffer can be written out of order.
1055 writeq(pbc
, piobuf
);
1056 piobuf_orig
= piobuf
;
1059 flush_wc
= dd
->flags
& QIB_PIO_FLUSH_WC
;
1062 * If there is just the header portion, must flush before
1063 * writing last word of header for correctness, and after
1064 * the last header word (trigger word).
1068 qib_pio_copy(piobuf
, hdr
, hdrwords
- 1);
1070 __raw_writel(hdr
[hdrwords
- 1], piobuf
+ hdrwords
- 1);
1073 qib_pio_copy(piobuf
, hdr
, hdrwords
);
1079 qib_pio_copy(piobuf
, hdr
, hdrwords
);
1082 /* The common case is aligned and contained in one segment. */
1083 if (likely(ss
->num_sge
== 1 && len
<= ss
->sge
.length
&&
1084 !((unsigned long)ss
->sge
.vaddr
& (sizeof(u32
) - 1)))) {
1085 u32
*addr
= (u32
*) ss
->sge
.vaddr
;
1087 /* Update address before sending packet. */
1088 update_sge(ss
, len
);
1090 qib_pio_copy(piobuf
, addr
, dwords
- 1);
1091 /* must flush early everything before trigger word */
1093 __raw_writel(addr
[dwords
- 1], piobuf
+ dwords
- 1);
1094 /* be sure trigger word is written */
1097 qib_pio_copy(piobuf
, addr
, dwords
);
1100 copy_io(piobuf
, ss
, len
, flush_wc
);
1102 if (dd
->flags
& QIB_USE_SPCL_TRIG
) {
1103 u32 spcl_off
= (pbufn
>= dd
->piobcnt2k
) ? 2047 : 1023;
1106 __raw_writel(0xaebecede, piobuf_orig
+ spcl_off
);
1108 qib_sendbuf_done(dd
, pbufn
);
1109 if (qp
->s_rdma_mr
) {
1110 rvt_put_mr(qp
->s_rdma_mr
);
1111 qp
->s_rdma_mr
= NULL
;
1114 spin_lock_irqsave(&qp
->s_lock
, flags
);
1115 qib_send_complete(qp
, qp
->s_wqe
, IB_WC_SUCCESS
);
1116 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
1117 } else if (qp
->ibqp
.qp_type
== IB_QPT_RC
) {
1118 spin_lock_irqsave(&qp
->s_lock
, flags
);
1119 qib_rc_send_complete(qp
, ibhdr
);
1120 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
1126 * qib_verbs_send - send a packet
1127 * @qp: the QP to send on
1128 * @hdr: the packet header
1129 * @hdrwords: the number of 32-bit words in the header
1130 * @ss: the SGE to send
1131 * @len: the length of the packet in bytes
1133 * Return zero if packet is sent or queued OK.
1134 * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
1136 int qib_verbs_send(struct rvt_qp
*qp
, struct qib_ib_header
*hdr
,
1137 u32 hdrwords
, struct rvt_sge_state
*ss
, u32 len
)
1139 struct qib_devdata
*dd
= dd_from_ibdev(qp
->ibqp
.device
);
1142 u32 dwords
= (len
+ 3) >> 2;
1145 * Calculate the send buffer trigger address.
1146 * The +1 counts for the pbc control dword following the pbc length.
1148 plen
= hdrwords
+ dwords
+ 1;
1151 * VL15 packets (IB_QPT_SMI) will always use PIO, so we
1152 * can defer SDMA restart until link goes ACTIVE without
1153 * worrying about just how we got there.
1155 if (qp
->ibqp
.qp_type
== IB_QPT_SMI
||
1156 !(dd
->flags
& QIB_HAS_SEND_DMA
))
1157 ret
= qib_verbs_send_pio(qp
, hdr
, hdrwords
, ss
, len
,
1160 ret
= qib_verbs_send_dma(qp
, hdr
, hdrwords
, ss
, len
,
1166 int qib_snapshot_counters(struct qib_pportdata
*ppd
, u64
*swords
,
1167 u64
*rwords
, u64
*spkts
, u64
*rpkts
,
1171 struct qib_devdata
*dd
= ppd
->dd
;
1173 if (!(dd
->flags
& QIB_PRESENT
)) {
1174 /* no hardware, freeze, etc. */
1178 *swords
= dd
->f_portcntr(ppd
, QIBPORTCNTR_WORDSEND
);
1179 *rwords
= dd
->f_portcntr(ppd
, QIBPORTCNTR_WORDRCV
);
1180 *spkts
= dd
->f_portcntr(ppd
, QIBPORTCNTR_PKTSEND
);
1181 *rpkts
= dd
->f_portcntr(ppd
, QIBPORTCNTR_PKTRCV
);
1182 *xmit_wait
= dd
->f_portcntr(ppd
, QIBPORTCNTR_SENDSTALL
);
1191 * qib_get_counters - get various chip counters
1192 * @dd: the qlogic_ib device
1193 * @cntrs: counters are placed here
1195 * Return the counters needed by recv_pma_get_portcounters().
1197 int qib_get_counters(struct qib_pportdata
*ppd
,
1198 struct qib_verbs_counters
*cntrs
)
1202 if (!(ppd
->dd
->flags
& QIB_PRESENT
)) {
1203 /* no hardware, freeze, etc. */
1207 cntrs
->symbol_error_counter
=
1208 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_IBSYMBOLERR
);
1209 cntrs
->link_error_recovery_counter
=
1210 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_IBLINKERRRECOV
);
1212 * The link downed counter counts when the other side downs the
1213 * connection. We add in the number of times we downed the link
1214 * due to local link integrity errors to compensate.
1216 cntrs
->link_downed_counter
=
1217 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_IBLINKDOWN
);
1218 cntrs
->port_rcv_errors
=
1219 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_RXDROPPKT
) +
1220 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_RCVOVFL
) +
1221 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_ERR_RLEN
) +
1222 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_INVALIDRLEN
) +
1223 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_ERRLINK
) +
1224 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_ERRICRC
) +
1225 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_ERRVCRC
) +
1226 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_ERRLPCRC
) +
1227 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_BADFORMAT
);
1228 cntrs
->port_rcv_errors
+=
1229 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_RXLOCALPHYERR
);
1230 cntrs
->port_rcv_errors
+=
1231 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_RXVLERR
);
1232 cntrs
->port_rcv_remphys_errors
=
1233 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_RCVEBP
);
1234 cntrs
->port_xmit_discards
=
1235 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_UNSUPVL
);
1236 cntrs
->port_xmit_data
= ppd
->dd
->f_portcntr(ppd
,
1237 QIBPORTCNTR_WORDSEND
);
1238 cntrs
->port_rcv_data
= ppd
->dd
->f_portcntr(ppd
,
1239 QIBPORTCNTR_WORDRCV
);
1240 cntrs
->port_xmit_packets
= ppd
->dd
->f_portcntr(ppd
,
1241 QIBPORTCNTR_PKTSEND
);
1242 cntrs
->port_rcv_packets
= ppd
->dd
->f_portcntr(ppd
,
1243 QIBPORTCNTR_PKTRCV
);
1244 cntrs
->local_link_integrity_errors
=
1245 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_LLI
);
1246 cntrs
->excessive_buffer_overrun_errors
=
1247 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_EXCESSBUFOVFL
);
1248 cntrs
->vl15_dropped
=
1249 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_VL15PKTDROP
);
1258 * qib_ib_piobufavail - callback when a PIO buffer is available
1259 * @dd: the device pointer
1261 * This is called from qib_intr() at interrupt level when a PIO buffer is
1262 * available after qib_verbs_send() returned an error that no buffers were
1263 * available. Disable the interrupt if there are no more QPs waiting.
1265 void qib_ib_piobufavail(struct qib_devdata
*dd
)
1267 struct qib_ibdev
*dev
= &dd
->verbs_dev
;
1268 struct list_head
*list
;
1269 struct rvt_qp
*qps
[5];
1271 unsigned long flags
;
1273 struct qib_qp_priv
*priv
;
1275 list
= &dev
->piowait
;
1279 * Note: checking that the piowait list is empty and clearing
1280 * the buffer available interrupt needs to be atomic or we
1281 * could end up with QPs on the wait list with the interrupt
1284 spin_lock_irqsave(&dev
->rdi
.pending_lock
, flags
);
1285 while (!list_empty(list
)) {
1286 if (n
== ARRAY_SIZE(qps
))
1288 priv
= list_entry(list
->next
, struct qib_qp_priv
, iowait
);
1290 list_del_init(&priv
->iowait
);
1291 atomic_inc(&qp
->refcount
);
1294 dd
->f_wantpiobuf_intr(dd
, 0);
1296 spin_unlock_irqrestore(&dev
->rdi
.pending_lock
, flags
);
1298 for (i
= 0; i
< n
; i
++) {
1301 spin_lock_irqsave(&qp
->s_lock
, flags
);
1302 if (qp
->s_flags
& RVT_S_WAIT_PIO
) {
1303 qp
->s_flags
&= ~RVT_S_WAIT_PIO
;
1304 qib_schedule_send(qp
);
1306 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
1308 /* Notify qib_destroy_qp() if it is waiting. */
1309 if (atomic_dec_and_test(&qp
->refcount
))
1314 static int qib_query_port(struct rvt_dev_info
*rdi
, u8 port_num
,
1315 struct ib_port_attr
*props
)
1317 struct qib_ibdev
*ibdev
= container_of(rdi
, struct qib_ibdev
, rdi
);
1318 struct qib_devdata
*dd
= dd_from_dev(ibdev
);
1319 struct qib_pportdata
*ppd
= &dd
->pport
[port_num
- 1];
1323 props
->lid
= lid
? lid
: be16_to_cpu(IB_LID_PERMISSIVE
);
1324 props
->lmc
= ppd
->lmc
;
1325 props
->state
= dd
->f_iblink_state(ppd
->lastibcstat
);
1326 props
->phys_state
= dd
->f_ibphys_portstate(ppd
->lastibcstat
);
1327 props
->gid_tbl_len
= QIB_GUIDS_PER_PORT
;
1328 props
->active_width
= ppd
->link_width_active
;
1329 /* See rate_show() */
1330 props
->active_speed
= ppd
->link_speed_active
;
1331 props
->max_vl_num
= qib_num_vls(ppd
->vls_supported
);
1333 props
->max_mtu
= qib_ibmtu
? qib_ibmtu
: IB_MTU_4096
;
1334 switch (ppd
->ibmtu
) {
1353 props
->active_mtu
= mtu
;
1358 static int qib_modify_device(struct ib_device
*device
,
1359 int device_modify_mask
,
1360 struct ib_device_modify
*device_modify
)
1362 struct qib_devdata
*dd
= dd_from_ibdev(device
);
1366 if (device_modify_mask
& ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID
|
1367 IB_DEVICE_MODIFY_NODE_DESC
)) {
1372 if (device_modify_mask
& IB_DEVICE_MODIFY_NODE_DESC
) {
1373 memcpy(device
->node_desc
, device_modify
->node_desc
, 64);
1374 for (i
= 0; i
< dd
->num_pports
; i
++) {
1375 struct qib_ibport
*ibp
= &dd
->pport
[i
].ibport_data
;
1377 qib_node_desc_chg(ibp
);
1381 if (device_modify_mask
& IB_DEVICE_MODIFY_SYS_IMAGE_GUID
) {
1382 ib_qib_sys_image_guid
=
1383 cpu_to_be64(device_modify
->sys_image_guid
);
1384 for (i
= 0; i
< dd
->num_pports
; i
++) {
1385 struct qib_ibport
*ibp
= &dd
->pport
[i
].ibport_data
;
1387 qib_sys_guid_chg(ibp
);
1397 static int qib_shut_down_port(struct rvt_dev_info
*rdi
, u8 port_num
)
1399 struct qib_ibdev
*ibdev
= container_of(rdi
, struct qib_ibdev
, rdi
);
1400 struct qib_devdata
*dd
= dd_from_dev(ibdev
);
1401 struct qib_pportdata
*ppd
= &dd
->pport
[port_num
- 1];
1403 qib_set_linkstate(ppd
, QIB_IB_LINKDOWN
);
1408 static int qib_get_guid_be(struct rvt_dev_info
*rdi
, struct rvt_ibport
*rvp
,
1409 int guid_index
, __be64
*guid
)
1411 struct qib_ibport
*ibp
= container_of(rvp
, struct qib_ibport
, rvp
);
1412 struct qib_pportdata
*ppd
= ppd_from_ibp(ibp
);
1414 if (guid_index
== 0)
1416 else if (guid_index
< QIB_GUIDS_PER_PORT
)
1417 *guid
= ibp
->guids
[guid_index
- 1];
1424 int qib_check_ah(struct ib_device
*ibdev
, struct ib_ah_attr
*ah_attr
)
1426 if (ah_attr
->sl
> 15)
1432 static void qib_notify_new_ah(struct ib_device
*ibdev
,
1433 struct ib_ah_attr
*ah_attr
,
1436 struct qib_ibport
*ibp
;
1437 struct qib_pportdata
*ppd
;
1440 * Do not trust reading anything from rvt_ah at this point as it is not
1441 * done being setup. We can however modify things which we need to set.
1444 ibp
= to_iport(ibdev
, ah_attr
->port_num
);
1445 ppd
= ppd_from_ibp(ibp
);
1446 ah
->vl
= ibp
->sl_to_vl
[ah
->attr
.sl
];
1447 ah
->log_pmtu
= ilog2(ppd
->ibmtu
);
1450 struct ib_ah
*qib_create_qp0_ah(struct qib_ibport
*ibp
, u16 dlid
)
1452 struct ib_ah_attr attr
;
1453 struct ib_ah
*ah
= ERR_PTR(-EINVAL
);
1456 memset(&attr
, 0, sizeof(attr
));
1458 attr
.port_num
= ppd_from_ibp(ibp
)->port
;
1460 qp0
= rcu_dereference(ibp
->rvp
.qp
[0]);
1462 ah
= ib_create_ah(qp0
->ibqp
.pd
, &attr
);
1468 * qib_get_npkeys - return the size of the PKEY table for context 0
1469 * @dd: the qlogic_ib device
1471 unsigned qib_get_npkeys(struct qib_devdata
*dd
)
1473 return ARRAY_SIZE(dd
->rcd
[0]->pkeys
);
1477 * Return the indexed PKEY from the port PKEY table.
1478 * No need to validate rcd[ctxt]; the port is setup if we are here.
1480 unsigned qib_get_pkey(struct qib_ibport
*ibp
, unsigned index
)
1482 struct qib_pportdata
*ppd
= ppd_from_ibp(ibp
);
1483 struct qib_devdata
*dd
= ppd
->dd
;
1484 unsigned ctxt
= ppd
->hw_pidx
;
1487 /* dd->rcd null if mini_init or some init failures */
1488 if (!dd
->rcd
|| index
>= ARRAY_SIZE(dd
->rcd
[ctxt
]->pkeys
))
1491 ret
= dd
->rcd
[ctxt
]->pkeys
[index
];
1496 static void init_ibport(struct qib_pportdata
*ppd
)
1498 struct qib_verbs_counters cntrs
;
1499 struct qib_ibport
*ibp
= &ppd
->ibport_data
;
1501 spin_lock_init(&ibp
->rvp
.lock
);
1502 /* Set the prefix to the default value (see ch. 4.1.1) */
1503 ibp
->rvp
.gid_prefix
= IB_DEFAULT_GID_PREFIX
;
1504 ibp
->rvp
.sm_lid
= be16_to_cpu(IB_LID_PERMISSIVE
);
1505 ibp
->rvp
.port_cap_flags
= IB_PORT_SYS_IMAGE_GUID_SUP
|
1506 IB_PORT_CLIENT_REG_SUP
| IB_PORT_SL_MAP_SUP
|
1507 IB_PORT_TRAP_SUP
| IB_PORT_AUTO_MIGR_SUP
|
1508 IB_PORT_DR_NOTICE_SUP
| IB_PORT_CAP_MASK_NOTICE_SUP
|
1509 IB_PORT_OTHER_LOCAL_CHANGES_SUP
;
1510 if (ppd
->dd
->flags
& QIB_HAS_LINK_LATENCY
)
1511 ibp
->rvp
.port_cap_flags
|= IB_PORT_LINK_LATENCY_SUP
;
1512 ibp
->rvp
.pma_counter_select
[0] = IB_PMA_PORT_XMIT_DATA
;
1513 ibp
->rvp
.pma_counter_select
[1] = IB_PMA_PORT_RCV_DATA
;
1514 ibp
->rvp
.pma_counter_select
[2] = IB_PMA_PORT_XMIT_PKTS
;
1515 ibp
->rvp
.pma_counter_select
[3] = IB_PMA_PORT_RCV_PKTS
;
1516 ibp
->rvp
.pma_counter_select
[4] = IB_PMA_PORT_XMIT_WAIT
;
1518 /* Snapshot current HW counters to "clear" them. */
1519 qib_get_counters(ppd
, &cntrs
);
1520 ibp
->z_symbol_error_counter
= cntrs
.symbol_error_counter
;
1521 ibp
->z_link_error_recovery_counter
=
1522 cntrs
.link_error_recovery_counter
;
1523 ibp
->z_link_downed_counter
= cntrs
.link_downed_counter
;
1524 ibp
->z_port_rcv_errors
= cntrs
.port_rcv_errors
;
1525 ibp
->z_port_rcv_remphys_errors
= cntrs
.port_rcv_remphys_errors
;
1526 ibp
->z_port_xmit_discards
= cntrs
.port_xmit_discards
;
1527 ibp
->z_port_xmit_data
= cntrs
.port_xmit_data
;
1528 ibp
->z_port_rcv_data
= cntrs
.port_rcv_data
;
1529 ibp
->z_port_xmit_packets
= cntrs
.port_xmit_packets
;
1530 ibp
->z_port_rcv_packets
= cntrs
.port_rcv_packets
;
1531 ibp
->z_local_link_integrity_errors
=
1532 cntrs
.local_link_integrity_errors
;
1533 ibp
->z_excessive_buffer_overrun_errors
=
1534 cntrs
.excessive_buffer_overrun_errors
;
1535 ibp
->z_vl15_dropped
= cntrs
.vl15_dropped
;
1536 RCU_INIT_POINTER(ibp
->rvp
.qp
[0], NULL
);
1537 RCU_INIT_POINTER(ibp
->rvp
.qp
[1], NULL
);
1541 * qib_fill_device_attr - Fill in rvt dev info device attributes.
1542 * @dd: the device data structure
1544 static void qib_fill_device_attr(struct qib_devdata
*dd
)
1546 struct rvt_dev_info
*rdi
= &dd
->verbs_dev
.rdi
;
1548 memset(&rdi
->dparms
.props
, 0, sizeof(rdi
->dparms
.props
));
1550 rdi
->dparms
.props
.max_pd
= ib_qib_max_pds
;
1551 rdi
->dparms
.props
.max_ah
= ib_qib_max_ahs
;
1552 rdi
->dparms
.props
.device_cap_flags
= IB_DEVICE_BAD_PKEY_CNTR
|
1553 IB_DEVICE_BAD_QKEY_CNTR
| IB_DEVICE_SHUTDOWN_PORT
|
1554 IB_DEVICE_SYS_IMAGE_GUID
| IB_DEVICE_RC_RNR_NAK_GEN
|
1555 IB_DEVICE_PORT_ACTIVE_EVENT
| IB_DEVICE_SRQ_RESIZE
;
1556 rdi
->dparms
.props
.page_size_cap
= PAGE_SIZE
;
1557 rdi
->dparms
.props
.vendor_id
=
1558 QIB_SRC_OUI_1
<< 16 | QIB_SRC_OUI_2
<< 8 | QIB_SRC_OUI_3
;
1559 rdi
->dparms
.props
.vendor_part_id
= dd
->deviceid
;
1560 rdi
->dparms
.props
.hw_ver
= dd
->minrev
;
1561 rdi
->dparms
.props
.sys_image_guid
= ib_qib_sys_image_guid
;
1562 rdi
->dparms
.props
.max_mr_size
= ~0ULL;
1563 rdi
->dparms
.props
.max_qp
= ib_qib_max_qps
;
1564 rdi
->dparms
.props
.max_qp_wr
= ib_qib_max_qp_wrs
;
1565 rdi
->dparms
.props
.max_sge
= ib_qib_max_sges
;
1566 rdi
->dparms
.props
.max_sge_rd
= ib_qib_max_sges
;
1567 rdi
->dparms
.props
.max_cq
= ib_qib_max_cqs
;
1568 rdi
->dparms
.props
.max_cqe
= ib_qib_max_cqes
;
1569 rdi
->dparms
.props
.max_ah
= ib_qib_max_ahs
;
1570 rdi
->dparms
.props
.max_mr
= rdi
->lkey_table
.max
;
1571 rdi
->dparms
.props
.max_fmr
= rdi
->lkey_table
.max
;
1572 rdi
->dparms
.props
.max_map_per_fmr
= 32767;
1573 rdi
->dparms
.props
.max_qp_rd_atom
= QIB_MAX_RDMA_ATOMIC
;
1574 rdi
->dparms
.props
.max_qp_init_rd_atom
= 255;
1575 rdi
->dparms
.props
.max_srq
= ib_qib_max_srqs
;
1576 rdi
->dparms
.props
.max_srq_wr
= ib_qib_max_srq_wrs
;
1577 rdi
->dparms
.props
.max_srq_sge
= ib_qib_max_srq_sges
;
1578 rdi
->dparms
.props
.atomic_cap
= IB_ATOMIC_GLOB
;
1579 rdi
->dparms
.props
.max_pkeys
= qib_get_npkeys(dd
);
1580 rdi
->dparms
.props
.max_mcast_grp
= ib_qib_max_mcast_grps
;
1581 rdi
->dparms
.props
.max_mcast_qp_attach
= ib_qib_max_mcast_qp_attached
;
1582 rdi
->dparms
.props
.max_total_mcast_qp_attach
=
1583 rdi
->dparms
.props
.max_mcast_qp_attach
*
1584 rdi
->dparms
.props
.max_mcast_grp
;
1588 * qib_register_ib_device - register our device with the infiniband core
1589 * @dd: the device data structure
1590 * Return the allocated qib_ibdev pointer or NULL on error.
1592 int qib_register_ib_device(struct qib_devdata
*dd
)
1594 struct qib_ibdev
*dev
= &dd
->verbs_dev
;
1595 struct ib_device
*ibdev
= &dev
->rdi
.ibdev
;
1596 struct qib_pportdata
*ppd
= dd
->pport
;
1600 get_random_bytes(&dev
->qp_rnd
, sizeof(dev
->qp_rnd
));
1601 for (i
= 0; i
< dd
->num_pports
; i
++)
1602 init_ibport(ppd
+ i
);
1604 /* Only need to initialize non-zero fields. */
1605 setup_timer(&dev
->mem_timer
, mem_timer
, (unsigned long)dev
);
1607 qpt_mask
= dd
->qpn_mask
;
1609 INIT_LIST_HEAD(&dev
->piowait
);
1610 INIT_LIST_HEAD(&dev
->dmawait
);
1611 INIT_LIST_HEAD(&dev
->txwait
);
1612 INIT_LIST_HEAD(&dev
->memwait
);
1613 INIT_LIST_HEAD(&dev
->txreq_free
);
1615 if (ppd
->sdma_descq_cnt
) {
1616 dev
->pio_hdrs
= dma_alloc_coherent(&dd
->pcidev
->dev
,
1617 ppd
->sdma_descq_cnt
*
1618 sizeof(struct qib_pio_header
),
1619 &dev
->pio_hdrs_phys
,
1621 if (!dev
->pio_hdrs
) {
1627 for (i
= 0; i
< ppd
->sdma_descq_cnt
; i
++) {
1628 struct qib_verbs_txreq
*tx
;
1630 tx
= kzalloc(sizeof(*tx
), GFP_KERNEL
);
1636 list_add(&tx
->txreq
.list
, &dev
->txreq_free
);
1640 * The system image GUID is supposed to be the same for all
1641 * IB HCAs in a single system but since there can be other
1642 * device types in the system, we can't be sure this is unique.
1644 if (!ib_qib_sys_image_guid
)
1645 ib_qib_sys_image_guid
= ppd
->guid
;
1647 strlcpy(ibdev
->name
, "qib%d", IB_DEVICE_NAME_MAX
);
1648 ibdev
->owner
= THIS_MODULE
;
1649 ibdev
->node_guid
= ppd
->guid
;
1650 ibdev
->phys_port_cnt
= dd
->num_pports
;
1651 ibdev
->dma_device
= &dd
->pcidev
->dev
;
1652 ibdev
->modify_device
= qib_modify_device
;
1653 ibdev
->process_mad
= qib_process_mad
;
1655 snprintf(ibdev
->node_desc
, sizeof(ibdev
->node_desc
),
1656 "Intel Infiniband HCA %s", init_utsname()->nodename
);
1659 * Fill in rvt info object.
1661 dd
->verbs_dev
.rdi
.driver_f
.port_callback
= qib_create_port_files
;
1662 dd
->verbs_dev
.rdi
.driver_f
.get_card_name
= qib_get_card_name
;
1663 dd
->verbs_dev
.rdi
.driver_f
.get_pci_dev
= qib_get_pci_dev
;
1664 dd
->verbs_dev
.rdi
.driver_f
.check_ah
= qib_check_ah
;
1665 dd
->verbs_dev
.rdi
.driver_f
.check_send_wqe
= qib_check_send_wqe
;
1666 dd
->verbs_dev
.rdi
.driver_f
.notify_new_ah
= qib_notify_new_ah
;
1667 dd
->verbs_dev
.rdi
.driver_f
.alloc_qpn
= qib_alloc_qpn
;
1668 dd
->verbs_dev
.rdi
.driver_f
.qp_priv_alloc
= qib_qp_priv_alloc
;
1669 dd
->verbs_dev
.rdi
.driver_f
.qp_priv_free
= qib_qp_priv_free
;
1670 dd
->verbs_dev
.rdi
.driver_f
.free_all_qps
= qib_free_all_qps
;
1671 dd
->verbs_dev
.rdi
.driver_f
.notify_qp_reset
= qib_notify_qp_reset
;
1672 dd
->verbs_dev
.rdi
.driver_f
.do_send
= qib_do_send
;
1673 dd
->verbs_dev
.rdi
.driver_f
.schedule_send
= qib_schedule_send
;
1674 dd
->verbs_dev
.rdi
.driver_f
.quiesce_qp
= qib_quiesce_qp
;
1675 dd
->verbs_dev
.rdi
.driver_f
.stop_send_queue
= qib_stop_send_queue
;
1676 dd
->verbs_dev
.rdi
.driver_f
.flush_qp_waiters
= qib_flush_qp_waiters
;
1677 dd
->verbs_dev
.rdi
.driver_f
.notify_error_qp
= qib_notify_error_qp
;
1678 dd
->verbs_dev
.rdi
.driver_f
.mtu_to_path_mtu
= qib_mtu_to_path_mtu
;
1679 dd
->verbs_dev
.rdi
.driver_f
.mtu_from_qp
= qib_mtu_from_qp
;
1680 dd
->verbs_dev
.rdi
.driver_f
.get_pmtu_from_attr
= qib_get_pmtu_from_attr
;
1681 dd
->verbs_dev
.rdi
.driver_f
.schedule_send_no_lock
= _qib_schedule_send
;
1682 dd
->verbs_dev
.rdi
.driver_f
.query_port_state
= qib_query_port
;
1683 dd
->verbs_dev
.rdi
.driver_f
.shut_down_port
= qib_shut_down_port
;
1684 dd
->verbs_dev
.rdi
.driver_f
.cap_mask_chg
= qib_cap_mask_chg
;
1685 dd
->verbs_dev
.rdi
.driver_f
.notify_create_mad_agent
=
1686 qib_notify_create_mad_agent
;
1687 dd
->verbs_dev
.rdi
.driver_f
.notify_free_mad_agent
=
1688 qib_notify_free_mad_agent
;
1690 dd
->verbs_dev
.rdi
.dparms
.max_rdma_atomic
= QIB_MAX_RDMA_ATOMIC
;
1691 dd
->verbs_dev
.rdi
.driver_f
.get_guid_be
= qib_get_guid_be
;
1692 dd
->verbs_dev
.rdi
.dparms
.lkey_table_size
= qib_lkey_table_size
;
1693 dd
->verbs_dev
.rdi
.dparms
.qp_table_size
= ib_qib_qp_table_size
;
1694 dd
->verbs_dev
.rdi
.dparms
.qpn_start
= 1;
1695 dd
->verbs_dev
.rdi
.dparms
.qpn_res_start
= QIB_KD_QP
;
1696 dd
->verbs_dev
.rdi
.dparms
.qpn_res_end
= QIB_KD_QP
; /* Reserve one QP */
1697 dd
->verbs_dev
.rdi
.dparms
.qpn_inc
= 1;
1698 dd
->verbs_dev
.rdi
.dparms
.qos_shift
= 1;
1699 dd
->verbs_dev
.rdi
.dparms
.psn_mask
= QIB_PSN_MASK
;
1700 dd
->verbs_dev
.rdi
.dparms
.psn_shift
= QIB_PSN_SHIFT
;
1701 dd
->verbs_dev
.rdi
.dparms
.psn_modify_mask
= QIB_PSN_MASK
;
1702 dd
->verbs_dev
.rdi
.dparms
.nports
= dd
->num_pports
;
1703 dd
->verbs_dev
.rdi
.dparms
.npkeys
= qib_get_npkeys(dd
);
1704 dd
->verbs_dev
.rdi
.dparms
.node
= dd
->assigned_node_id
;
1705 dd
->verbs_dev
.rdi
.dparms
.core_cap_flags
= RDMA_CORE_PORT_IBA_IB
;
1706 dd
->verbs_dev
.rdi
.dparms
.max_mad_size
= IB_MGMT_MAD_SIZE
;
1708 snprintf(dd
->verbs_dev
.rdi
.dparms
.cq_name
,
1709 sizeof(dd
->verbs_dev
.rdi
.dparms
.cq_name
),
1710 "qib_cq%d", dd
->unit
);
1712 qib_fill_device_attr(dd
);
1715 for (i
= 0; i
< dd
->num_pports
; i
++, ppd
++) {
1716 ctxt
= ppd
->hw_pidx
;
1717 rvt_init_port(&dd
->verbs_dev
.rdi
,
1718 &ppd
->ibport_data
.rvp
,
1720 dd
->rcd
[ctxt
]->pkeys
);
1723 ret
= rvt_register_device(&dd
->verbs_dev
.rdi
);
1727 ret
= qib_verbs_register_sysfs(dd
);
1734 rvt_unregister_device(&dd
->verbs_dev
.rdi
);
1736 while (!list_empty(&dev
->txreq_free
)) {
1737 struct list_head
*l
= dev
->txreq_free
.next
;
1738 struct qib_verbs_txreq
*tx
;
1741 tx
= list_entry(l
, struct qib_verbs_txreq
, txreq
.list
);
1744 if (ppd
->sdma_descq_cnt
)
1745 dma_free_coherent(&dd
->pcidev
->dev
,
1746 ppd
->sdma_descq_cnt
*
1747 sizeof(struct qib_pio_header
),
1748 dev
->pio_hdrs
, dev
->pio_hdrs_phys
);
1750 qib_dev_err(dd
, "cannot register verbs: %d!\n", -ret
);
1754 void qib_unregister_ib_device(struct qib_devdata
*dd
)
1756 struct qib_ibdev
*dev
= &dd
->verbs_dev
;
1758 qib_verbs_unregister_sysfs(dd
);
1760 rvt_unregister_device(&dd
->verbs_dev
.rdi
);
1762 if (!list_empty(&dev
->piowait
))
1763 qib_dev_err(dd
, "piowait list not empty!\n");
1764 if (!list_empty(&dev
->dmawait
))
1765 qib_dev_err(dd
, "dmawait list not empty!\n");
1766 if (!list_empty(&dev
->txwait
))
1767 qib_dev_err(dd
, "txwait list not empty!\n");
1768 if (!list_empty(&dev
->memwait
))
1769 qib_dev_err(dd
, "memwait list not empty!\n");
1771 del_timer_sync(&dev
->mem_timer
);
1772 while (!list_empty(&dev
->txreq_free
)) {
1773 struct list_head
*l
= dev
->txreq_free
.next
;
1774 struct qib_verbs_txreq
*tx
;
1777 tx
= list_entry(l
, struct qib_verbs_txreq
, txreq
.list
);
1780 if (dd
->pport
->sdma_descq_cnt
)
1781 dma_free_coherent(&dd
->pcidev
->dev
,
1782 dd
->pport
->sdma_descq_cnt
*
1783 sizeof(struct qib_pio_header
),
1784 dev
->pio_hdrs
, dev
->pio_hdrs_phys
);
1788 * _qib_schedule_send - schedule progress
1791 * This schedules progress w/o regard to the s_flags.
1793 * It is only used in post send, which doesn't hold
1796 void _qib_schedule_send(struct rvt_qp
*qp
)
1798 struct qib_ibport
*ibp
=
1799 to_iport(qp
->ibqp
.device
, qp
->port_num
);
1800 struct qib_pportdata
*ppd
= ppd_from_ibp(ibp
);
1801 struct qib_qp_priv
*priv
= qp
->priv
;
1803 queue_work(ppd
->qib_wq
, &priv
->s_work
);
1807 * qib_schedule_send - schedule progress
1810 * This schedules qp progress. The s_lock
1813 void qib_schedule_send(struct rvt_qp
*qp
)
1815 if (qib_send_ok(qp
))
1816 _qib_schedule_send(qp
);