2 * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #include <rdma/ib_mad.h>
36 #include <rdma/ib_user_verbs.h>
38 #include <linux/module.h>
39 #include <linux/utsname.h>
40 #include <linux/rculist.h>
42 #include <linux/random.h>
43 #include <linux/vmalloc.h>
46 #include "qib_common.h"
48 static unsigned int ib_qib_qp_table_size
= 256;
49 module_param_named(qp_table_size
, ib_qib_qp_table_size
, uint
, S_IRUGO
);
50 MODULE_PARM_DESC(qp_table_size
, "QP table size");
52 unsigned int ib_qib_lkey_table_size
= 16;
53 module_param_named(lkey_table_size
, ib_qib_lkey_table_size
, uint
,
55 MODULE_PARM_DESC(lkey_table_size
,
56 "LKEY table size in bits (2^n, 1 <= n <= 23)");
58 static unsigned int ib_qib_max_pds
= 0xFFFF;
59 module_param_named(max_pds
, ib_qib_max_pds
, uint
, S_IRUGO
);
60 MODULE_PARM_DESC(max_pds
,
61 "Maximum number of protection domains to support");
63 static unsigned int ib_qib_max_ahs
= 0xFFFF;
64 module_param_named(max_ahs
, ib_qib_max_ahs
, uint
, S_IRUGO
);
65 MODULE_PARM_DESC(max_ahs
, "Maximum number of address handles to support");
67 unsigned int ib_qib_max_cqes
= 0x2FFFF;
68 module_param_named(max_cqes
, ib_qib_max_cqes
, uint
, S_IRUGO
);
69 MODULE_PARM_DESC(max_cqes
,
70 "Maximum number of completion queue entries to support");
72 unsigned int ib_qib_max_cqs
= 0x1FFFF;
73 module_param_named(max_cqs
, ib_qib_max_cqs
, uint
, S_IRUGO
);
74 MODULE_PARM_DESC(max_cqs
, "Maximum number of completion queues to support");
76 unsigned int ib_qib_max_qp_wrs
= 0x3FFF;
77 module_param_named(max_qp_wrs
, ib_qib_max_qp_wrs
, uint
, S_IRUGO
);
78 MODULE_PARM_DESC(max_qp_wrs
, "Maximum number of QP WRs to support");
80 unsigned int ib_qib_max_qps
= 16384;
81 module_param_named(max_qps
, ib_qib_max_qps
, uint
, S_IRUGO
);
82 MODULE_PARM_DESC(max_qps
, "Maximum number of QPs to support");
84 unsigned int ib_qib_max_sges
= 0x60;
85 module_param_named(max_sges
, ib_qib_max_sges
, uint
, S_IRUGO
);
86 MODULE_PARM_DESC(max_sges
, "Maximum number of SGEs to support");
88 unsigned int ib_qib_max_mcast_grps
= 16384;
89 module_param_named(max_mcast_grps
, ib_qib_max_mcast_grps
, uint
, S_IRUGO
);
90 MODULE_PARM_DESC(max_mcast_grps
,
91 "Maximum number of multicast groups to support");
93 unsigned int ib_qib_max_mcast_qp_attached
= 16;
94 module_param_named(max_mcast_qp_attached
, ib_qib_max_mcast_qp_attached
,
96 MODULE_PARM_DESC(max_mcast_qp_attached
,
97 "Maximum number of attached QPs to support");
99 unsigned int ib_qib_max_srqs
= 1024;
100 module_param_named(max_srqs
, ib_qib_max_srqs
, uint
, S_IRUGO
);
101 MODULE_PARM_DESC(max_srqs
, "Maximum number of SRQs to support");
103 unsigned int ib_qib_max_srq_sges
= 128;
104 module_param_named(max_srq_sges
, ib_qib_max_srq_sges
, uint
, S_IRUGO
);
105 MODULE_PARM_DESC(max_srq_sges
, "Maximum number of SRQ SGEs to support");
107 unsigned int ib_qib_max_srq_wrs
= 0x1FFFF;
108 module_param_named(max_srq_wrs
, ib_qib_max_srq_wrs
, uint
, S_IRUGO
);
109 MODULE_PARM_DESC(max_srq_wrs
, "Maximum number of SRQ WRs support");
111 static unsigned int ib_qib_disable_sma
;
112 module_param_named(disable_sma
, ib_qib_disable_sma
, uint
, S_IWUSR
| S_IRUGO
);
113 MODULE_PARM_DESC(disable_sma
, "Disable the SMA");
116 * Note that it is OK to post send work requests in the SQE and ERR
117 * states; qib_do_send() will process them and generate error
118 * completions as per IB 1.2 C10-96.
120 const int ib_qib_state_ops
[IB_QPS_ERR
+ 1] = {
122 [IB_QPS_INIT
] = QIB_POST_RECV_OK
,
123 [IB_QPS_RTR
] = QIB_POST_RECV_OK
| QIB_PROCESS_RECV_OK
,
124 [IB_QPS_RTS
] = QIB_POST_RECV_OK
| QIB_PROCESS_RECV_OK
|
125 QIB_POST_SEND_OK
| QIB_PROCESS_SEND_OK
|
126 QIB_PROCESS_NEXT_SEND_OK
,
127 [IB_QPS_SQD
] = QIB_POST_RECV_OK
| QIB_PROCESS_RECV_OK
|
128 QIB_POST_SEND_OK
| QIB_PROCESS_SEND_OK
,
129 [IB_QPS_SQE
] = QIB_POST_RECV_OK
| QIB_PROCESS_RECV_OK
|
130 QIB_POST_SEND_OK
| QIB_FLUSH_SEND
,
131 [IB_QPS_ERR
] = QIB_POST_RECV_OK
| QIB_FLUSH_RECV
|
132 QIB_POST_SEND_OK
| QIB_FLUSH_SEND
,
135 struct qib_ucontext
{
136 struct ib_ucontext ibucontext
;
139 static inline struct qib_ucontext
*to_iucontext(struct ib_ucontext
142 return container_of(ibucontext
, struct qib_ucontext
, ibucontext
);
146 * Translate ib_wr_opcode into ib_wc_opcode.
148 const enum ib_wc_opcode ib_qib_wc_opcode
[] = {
149 [IB_WR_RDMA_WRITE
] = IB_WC_RDMA_WRITE
,
150 [IB_WR_RDMA_WRITE_WITH_IMM
] = IB_WC_RDMA_WRITE
,
151 [IB_WR_SEND
] = IB_WC_SEND
,
152 [IB_WR_SEND_WITH_IMM
] = IB_WC_SEND
,
153 [IB_WR_RDMA_READ
] = IB_WC_RDMA_READ
,
154 [IB_WR_ATOMIC_CMP_AND_SWP
] = IB_WC_COMP_SWAP
,
155 [IB_WR_ATOMIC_FETCH_AND_ADD
] = IB_WC_FETCH_ADD
161 __be64 ib_qib_sys_image_guid
;
164 * qib_copy_sge - copy data to SGE memory
166 * @data: the data to copy
167 * @length: the length of the data
169 void qib_copy_sge(struct qib_sge_state
*ss
, void *data
, u32 length
, int release
)
171 struct qib_sge
*sge
= &ss
->sge
;
174 u32 len
= sge
->length
;
178 if (len
> sge
->sge_length
)
179 len
= sge
->sge_length
;
181 memcpy(sge
->vaddr
, data
, len
);
184 sge
->sge_length
-= len
;
185 if (sge
->sge_length
== 0) {
189 *sge
= *ss
->sg_list
++;
190 } else if (sge
->length
== 0 && sge
->mr
->lkey
) {
191 if (++sge
->n
>= QIB_SEGSZ
) {
192 if (++sge
->m
>= sge
->mr
->mapsz
)
197 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
199 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
207 * qib_skip_sge - skip over SGE memory - XXX almost dup of prev func
209 * @length: the number of bytes to skip
211 void qib_skip_sge(struct qib_sge_state
*ss
, u32 length
, int release
)
213 struct qib_sge
*sge
= &ss
->sge
;
216 u32 len
= sge
->length
;
220 if (len
> sge
->sge_length
)
221 len
= sge
->sge_length
;
225 sge
->sge_length
-= len
;
226 if (sge
->sge_length
== 0) {
230 *sge
= *ss
->sg_list
++;
231 } else if (sge
->length
== 0 && sge
->mr
->lkey
) {
232 if (++sge
->n
>= QIB_SEGSZ
) {
233 if (++sge
->m
>= sge
->mr
->mapsz
)
238 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
240 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
247 * Count the number of DMA descriptors needed to send length bytes of data.
248 * Don't modify the qib_sge_state to get the count.
249 * Return zero if any of the segments is not aligned.
251 static u32
qib_count_sge(struct qib_sge_state
*ss
, u32 length
)
253 struct qib_sge
*sg_list
= ss
->sg_list
;
254 struct qib_sge sge
= ss
->sge
;
255 u8 num_sge
= ss
->num_sge
;
256 u32 ndesc
= 1; /* count the header */
259 u32 len
= sge
.length
;
263 if (len
> sge
.sge_length
)
264 len
= sge
.sge_length
;
266 if (((long) sge
.vaddr
& (sizeof(u32
) - 1)) ||
267 (len
!= length
&& (len
& (sizeof(u32
) - 1)))) {
274 sge
.sge_length
-= len
;
275 if (sge
.sge_length
== 0) {
278 } else if (sge
.length
== 0 && sge
.mr
->lkey
) {
279 if (++sge
.n
>= QIB_SEGSZ
) {
280 if (++sge
.m
>= sge
.mr
->mapsz
)
285 sge
.mr
->map
[sge
.m
]->segs
[sge
.n
].vaddr
;
287 sge
.mr
->map
[sge
.m
]->segs
[sge
.n
].length
;
295 * Copy from the SGEs to the data buffer.
297 static void qib_copy_from_sge(void *data
, struct qib_sge_state
*ss
, u32 length
)
299 struct qib_sge
*sge
= &ss
->sge
;
302 u32 len
= sge
->length
;
306 if (len
> sge
->sge_length
)
307 len
= sge
->sge_length
;
309 memcpy(data
, sge
->vaddr
, len
);
312 sge
->sge_length
-= len
;
313 if (sge
->sge_length
== 0) {
315 *sge
= *ss
->sg_list
++;
316 } else if (sge
->length
== 0 && sge
->mr
->lkey
) {
317 if (++sge
->n
>= QIB_SEGSZ
) {
318 if (++sge
->m
>= sge
->mr
->mapsz
)
323 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
325 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
333 * qib_post_one_send - post one RC, UC, or UD send work request
334 * @qp: the QP to post on
335 * @wr: the work request to send
337 static int qib_post_one_send(struct qib_qp
*qp
, struct ib_send_wr
*wr
,
340 struct qib_swqe
*wqe
;
347 struct qib_lkey_table
*rkt
;
350 spin_lock_irqsave(&qp
->s_lock
, flags
);
352 /* Check that state is OK to post send. */
353 if (unlikely(!(ib_qib_state_ops
[qp
->state
] & QIB_POST_SEND_OK
)))
356 /* IB spec says that num_sge == 0 is OK. */
357 if (wr
->num_sge
> qp
->s_max_sge
)
361 * Don't allow RDMA reads or atomic operations on UC or
362 * undefined operations.
363 * Make sure buffer is large enough to hold the result for atomics.
365 if (wr
->opcode
== IB_WR_REG_MR
) {
366 if (qib_reg_mr(qp
, reg_wr(wr
)))
368 } else if (qp
->ibqp
.qp_type
== IB_QPT_UC
) {
369 if ((unsigned) wr
->opcode
>= IB_WR_RDMA_READ
)
371 } else if (qp
->ibqp
.qp_type
!= IB_QPT_RC
) {
372 /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
373 if (wr
->opcode
!= IB_WR_SEND
&&
374 wr
->opcode
!= IB_WR_SEND_WITH_IMM
)
376 /* Check UD destination address PD */
377 if (qp
->ibqp
.pd
!= ud_wr(wr
)->ah
->pd
)
379 } else if ((unsigned) wr
->opcode
> IB_WR_ATOMIC_FETCH_AND_ADD
)
381 else if (wr
->opcode
>= IB_WR_ATOMIC_CMP_AND_SWP
&&
383 wr
->sg_list
[0].length
< sizeof(u64
) ||
384 wr
->sg_list
[0].addr
& (sizeof(u64
) - 1)))
386 else if (wr
->opcode
>= IB_WR_RDMA_READ
&& !qp
->s_max_rd_atomic
)
389 next
= qp
->s_head
+ 1;
390 if (next
>= qp
->s_size
)
392 if (next
== qp
->s_last
) {
397 rkt
= &to_idev(qp
->ibqp
.device
)->lk_table
;
398 pd
= to_ipd(qp
->ibqp
.pd
);
399 wqe
= get_swqe_ptr(qp
, qp
->s_head
);
401 if (qp
->ibqp
.qp_type
!= IB_QPT_UC
&&
402 qp
->ibqp
.qp_type
!= IB_QPT_RC
)
403 memcpy(&wqe
->ud_wr
, ud_wr(wr
), sizeof(wqe
->ud_wr
));
404 else if (wr
->opcode
== IB_WR_REG_MR
)
405 memcpy(&wqe
->reg_wr
, reg_wr(wr
),
406 sizeof(wqe
->reg_wr
));
407 else if (wr
->opcode
== IB_WR_RDMA_WRITE_WITH_IMM
||
408 wr
->opcode
== IB_WR_RDMA_WRITE
||
409 wr
->opcode
== IB_WR_RDMA_READ
)
410 memcpy(&wqe
->rdma_wr
, rdma_wr(wr
), sizeof(wqe
->rdma_wr
));
411 else if (wr
->opcode
== IB_WR_ATOMIC_CMP_AND_SWP
||
412 wr
->opcode
== IB_WR_ATOMIC_FETCH_AND_ADD
)
413 memcpy(&wqe
->atomic_wr
, atomic_wr(wr
), sizeof(wqe
->atomic_wr
));
415 memcpy(&wqe
->wr
, wr
, sizeof(wqe
->wr
));
420 acc
= wr
->opcode
>= IB_WR_RDMA_READ
?
421 IB_ACCESS_LOCAL_WRITE
: 0;
422 for (i
= 0; i
< wr
->num_sge
; i
++) {
423 u32 length
= wr
->sg_list
[i
].length
;
428 ok
= qib_lkey_ok(rkt
, pd
, &wqe
->sg_list
[j
],
429 &wr
->sg_list
[i
], acc
);
431 goto bail_inval_free
;
432 wqe
->length
+= length
;
437 if (qp
->ibqp
.qp_type
== IB_QPT_UC
||
438 qp
->ibqp
.qp_type
== IB_QPT_RC
) {
439 if (wqe
->length
> 0x80000000U
)
440 goto bail_inval_free
;
441 } else if (wqe
->length
> (dd_from_ibdev(qp
->ibqp
.device
)->pport
+
442 qp
->port_num
- 1)->ibmtu
)
443 goto bail_inval_free
;
445 atomic_inc(&to_iah(ud_wr(wr
)->ah
)->refcount
);
446 wqe
->ssn
= qp
->s_ssn
++;
454 struct qib_sge
*sge
= &wqe
->sg_list
[--j
];
461 if (!ret
&& !wr
->next
&&
463 dd_from_ibdev(qp
->ibqp
.device
)->pport
+ qp
->port_num
- 1)) {
464 qib_schedule_send(qp
);
467 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
472 * qib_post_send - post a send on a QP
473 * @ibqp: the QP to post the send on
474 * @wr: the list of work requests to post
475 * @bad_wr: the first bad WR is put here
477 * This may be called from interrupt context.
479 static int qib_post_send(struct ib_qp
*ibqp
, struct ib_send_wr
*wr
,
480 struct ib_send_wr
**bad_wr
)
482 struct qib_qp
*qp
= to_iqp(ibqp
);
486 for (; wr
; wr
= wr
->next
) {
487 err
= qib_post_one_send(qp
, wr
, &scheduled
);
494 /* Try to do the send work in the caller's context. */
496 qib_do_send(&qp
->s_work
);
503 * qib_post_receive - post a receive on a QP
504 * @ibqp: the QP to post the receive on
505 * @wr: the WR to post
506 * @bad_wr: the first bad WR is put here
508 * This may be called from interrupt context.
510 static int qib_post_receive(struct ib_qp
*ibqp
, struct ib_recv_wr
*wr
,
511 struct ib_recv_wr
**bad_wr
)
513 struct qib_qp
*qp
= to_iqp(ibqp
);
514 struct qib_rwq
*wq
= qp
->r_rq
.wq
;
518 /* Check that state is OK to post receive. */
519 if (!(ib_qib_state_ops
[qp
->state
] & QIB_POST_RECV_OK
) || !wq
) {
525 for (; wr
; wr
= wr
->next
) {
526 struct qib_rwqe
*wqe
;
530 if ((unsigned) wr
->num_sge
> qp
->r_rq
.max_sge
) {
536 spin_lock_irqsave(&qp
->r_rq
.lock
, flags
);
538 if (next
>= qp
->r_rq
.size
)
540 if (next
== wq
->tail
) {
541 spin_unlock_irqrestore(&qp
->r_rq
.lock
, flags
);
547 wqe
= get_rwqe_ptr(&qp
->r_rq
, wq
->head
);
548 wqe
->wr_id
= wr
->wr_id
;
549 wqe
->num_sge
= wr
->num_sge
;
550 for (i
= 0; i
< wr
->num_sge
; i
++)
551 wqe
->sg_list
[i
] = wr
->sg_list
[i
];
552 /* Make sure queue entry is written before the head index. */
555 spin_unlock_irqrestore(&qp
->r_rq
.lock
, flags
);
564 * qib_qp_rcv - processing an incoming packet on a QP
565 * @rcd: the context pointer
566 * @hdr: the packet header
567 * @has_grh: true if the packet has a GRH
568 * @data: the packet data
569 * @tlen: the packet length
570 * @qp: the QP the packet came on
572 * This is called from qib_ib_rcv() to process an incoming packet
574 * Called at interrupt level.
576 static void qib_qp_rcv(struct qib_ctxtdata
*rcd
, struct qib_ib_header
*hdr
,
577 int has_grh
, void *data
, u32 tlen
, struct qib_qp
*qp
)
579 struct qib_ibport
*ibp
= &rcd
->ppd
->ibport_data
;
581 spin_lock(&qp
->r_lock
);
583 /* Check for valid receive state. */
584 if (!(ib_qib_state_ops
[qp
->state
] & QIB_PROCESS_RECV_OK
)) {
589 switch (qp
->ibqp
.qp_type
) {
592 if (ib_qib_disable_sma
)
596 qib_ud_rcv(ibp
, hdr
, has_grh
, data
, tlen
, qp
);
600 qib_rc_rcv(rcd
, hdr
, has_grh
, data
, tlen
, qp
);
604 qib_uc_rcv(ibp
, hdr
, has_grh
, data
, tlen
, qp
);
612 spin_unlock(&qp
->r_lock
);
616 * qib_ib_rcv - process an incoming packet
617 * @rcd: the context pointer
618 * @rhdr: the header of the packet
619 * @data: the packet payload
620 * @tlen: the packet length
622 * This is called from qib_kreceive() to process an incoming packet at
623 * interrupt level. Tlen is the length of the header + data + CRC in bytes.
625 void qib_ib_rcv(struct qib_ctxtdata
*rcd
, void *rhdr
, void *data
, u32 tlen
)
627 struct qib_pportdata
*ppd
= rcd
->ppd
;
628 struct qib_ibport
*ibp
= &ppd
->ibport_data
;
629 struct qib_ib_header
*hdr
= rhdr
;
630 struct qib_other_headers
*ohdr
;
637 /* 24 == LRH+BTH+CRC */
638 if (unlikely(tlen
< 24))
641 /* Check for a valid destination LID (see ch. 7.11.1). */
642 lid
= be16_to_cpu(hdr
->lrh
[1]);
643 if (lid
< QIB_MULTICAST_LID_BASE
) {
644 lid
&= ~((1 << ppd
->lmc
) - 1);
645 if (unlikely(lid
!= ppd
->lid
))
650 lnh
= be16_to_cpu(hdr
->lrh
[0]) & 3;
651 if (lnh
== QIB_LRH_BTH
)
653 else if (lnh
== QIB_LRH_GRH
) {
656 ohdr
= &hdr
->u
.l
.oth
;
657 if (hdr
->u
.l
.grh
.next_hdr
!= IB_GRH_NEXT_HDR
)
659 vtf
= be32_to_cpu(hdr
->u
.l
.grh
.version_tclass_flow
);
660 if ((vtf
>> IB_GRH_VERSION_SHIFT
) != IB_GRH_VERSION
)
665 opcode
= (be32_to_cpu(ohdr
->bth
[0]) >> 24) & 0x7f;
666 #ifdef CONFIG_DEBUG_FS
667 rcd
->opstats
->stats
[opcode
].n_bytes
+= tlen
;
668 rcd
->opstats
->stats
[opcode
].n_packets
++;
671 /* Get the destination QP number. */
672 qp_num
= be32_to_cpu(ohdr
->bth
[1]) & QIB_QPN_MASK
;
673 if (qp_num
== QIB_MULTICAST_QPN
) {
674 struct qib_mcast
*mcast
;
675 struct qib_mcast_qp
*p
;
677 if (lnh
!= QIB_LRH_GRH
)
679 mcast
= qib_mcast_find(ibp
, &hdr
->u
.l
.grh
.dgid
);
682 this_cpu_inc(ibp
->pmastats
->n_multicast_rcv
);
683 list_for_each_entry_rcu(p
, &mcast
->qp_list
, list
)
684 qib_qp_rcv(rcd
, hdr
, 1, data
, tlen
, p
->qp
);
686 * Notify qib_multicast_detach() if it is waiting for us
689 if (atomic_dec_return(&mcast
->refcount
) <= 1)
690 wake_up(&mcast
->wait
);
692 if (rcd
->lookaside_qp
) {
693 if (rcd
->lookaside_qpn
!= qp_num
) {
694 if (atomic_dec_and_test(
695 &rcd
->lookaside_qp
->refcount
))
697 &rcd
->lookaside_qp
->wait
);
698 rcd
->lookaside_qp
= NULL
;
701 if (!rcd
->lookaside_qp
) {
702 qp
= qib_lookup_qpn(ibp
, qp_num
);
705 rcd
->lookaside_qp
= qp
;
706 rcd
->lookaside_qpn
= qp_num
;
708 qp
= rcd
->lookaside_qp
;
709 this_cpu_inc(ibp
->pmastats
->n_unicast_rcv
);
710 qib_qp_rcv(rcd
, hdr
, lnh
== QIB_LRH_GRH
, data
, tlen
, qp
);
719 * This is called from a timer to check for QPs
720 * which need kernel memory in order to send a packet.
722 static void mem_timer(unsigned long data
)
724 struct qib_ibdev
*dev
= (struct qib_ibdev
*) data
;
725 struct list_head
*list
= &dev
->memwait
;
726 struct qib_qp
*qp
= NULL
;
729 spin_lock_irqsave(&dev
->pending_lock
, flags
);
730 if (!list_empty(list
)) {
731 qp
= list_entry(list
->next
, struct qib_qp
, iowait
);
732 list_del_init(&qp
->iowait
);
733 atomic_inc(&qp
->refcount
);
734 if (!list_empty(list
))
735 mod_timer(&dev
->mem_timer
, jiffies
+ 1);
737 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
740 spin_lock_irqsave(&qp
->s_lock
, flags
);
741 if (qp
->s_flags
& QIB_S_WAIT_KMEM
) {
742 qp
->s_flags
&= ~QIB_S_WAIT_KMEM
;
743 qib_schedule_send(qp
);
745 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
746 if (atomic_dec_and_test(&qp
->refcount
))
751 static void update_sge(struct qib_sge_state
*ss
, u32 length
)
753 struct qib_sge
*sge
= &ss
->sge
;
755 sge
->vaddr
+= length
;
756 sge
->length
-= length
;
757 sge
->sge_length
-= length
;
758 if (sge
->sge_length
== 0) {
760 *sge
= *ss
->sg_list
++;
761 } else if (sge
->length
== 0 && sge
->mr
->lkey
) {
762 if (++sge
->n
>= QIB_SEGSZ
) {
763 if (++sge
->m
>= sge
->mr
->mapsz
)
767 sge
->vaddr
= sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
768 sge
->length
= sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
772 #ifdef __LITTLE_ENDIAN
773 static inline u32
get_upper_bits(u32 data
, u32 shift
)
775 return data
>> shift
;
778 static inline u32
set_upper_bits(u32 data
, u32 shift
)
780 return data
<< shift
;
783 static inline u32
clear_upper_bytes(u32 data
, u32 n
, u32 off
)
785 data
<<= ((sizeof(u32
) - n
) * BITS_PER_BYTE
);
786 data
>>= ((sizeof(u32
) - n
- off
) * BITS_PER_BYTE
);
790 static inline u32
get_upper_bits(u32 data
, u32 shift
)
792 return data
<< shift
;
795 static inline u32
set_upper_bits(u32 data
, u32 shift
)
797 return data
>> shift
;
800 static inline u32
clear_upper_bytes(u32 data
, u32 n
, u32 off
)
802 data
>>= ((sizeof(u32
) - n
) * BITS_PER_BYTE
);
803 data
<<= ((sizeof(u32
) - n
- off
) * BITS_PER_BYTE
);
808 static void copy_io(u32 __iomem
*piobuf
, struct qib_sge_state
*ss
,
809 u32 length
, unsigned flush_wc
)
816 u32 len
= ss
->sge
.length
;
821 if (len
> ss
->sge
.sge_length
)
822 len
= ss
->sge
.sge_length
;
824 /* If the source address is not aligned, try to align it. */
825 off
= (unsigned long)ss
->sge
.vaddr
& (sizeof(u32
) - 1);
827 u32
*addr
= (u32
*)((unsigned long)ss
->sge
.vaddr
&
829 u32 v
= get_upper_bits(*addr
, off
* BITS_PER_BYTE
);
832 y
= sizeof(u32
) - off
;
835 if (len
+ extra
>= sizeof(u32
)) {
836 data
|= set_upper_bits(v
, extra
*
838 len
= sizeof(u32
) - extra
;
843 __raw_writel(data
, piobuf
);
848 /* Clear unused upper bytes */
849 data
|= clear_upper_bytes(v
, len
, extra
);
857 /* Source address is aligned. */
858 u32
*addr
= (u32
*) ss
->sge
.vaddr
;
859 int shift
= extra
* BITS_PER_BYTE
;
860 int ushift
= 32 - shift
;
863 while (l
>= sizeof(u32
)) {
866 data
|= set_upper_bits(v
, shift
);
867 __raw_writel(data
, piobuf
);
868 data
= get_upper_bits(v
, ushift
);
874 * We still have 'extra' number of bytes leftover.
879 if (l
+ extra
>= sizeof(u32
)) {
880 data
|= set_upper_bits(v
, shift
);
881 len
-= l
+ extra
- sizeof(u32
);
886 __raw_writel(data
, piobuf
);
891 /* Clear unused upper bytes */
892 data
|= clear_upper_bytes(v
, l
, extra
);
899 } else if (len
== length
) {
903 } else if (len
== length
) {
907 * Need to round up for the last dword in the
911 qib_pio_copy(piobuf
, ss
->sge
.vaddr
, w
- 1);
913 last
= ((u32
*) ss
->sge
.vaddr
)[w
- 1];
918 qib_pio_copy(piobuf
, ss
->sge
.vaddr
, w
);
921 extra
= len
& (sizeof(u32
) - 1);
923 u32 v
= ((u32
*) ss
->sge
.vaddr
)[w
];
925 /* Clear unused upper bytes */
926 data
= clear_upper_bytes(v
, extra
, 0);
932 /* Update address before sending packet. */
933 update_sge(ss
, length
);
935 /* must flush early everything before trigger word */
937 __raw_writel(last
, piobuf
);
938 /* be sure trigger word is written */
941 __raw_writel(last
, piobuf
);
944 static noinline
struct qib_verbs_txreq
*__get_txreq(struct qib_ibdev
*dev
,
947 struct qib_verbs_txreq
*tx
;
950 spin_lock_irqsave(&qp
->s_lock
, flags
);
951 spin_lock(&dev
->pending_lock
);
953 if (!list_empty(&dev
->txreq_free
)) {
954 struct list_head
*l
= dev
->txreq_free
.next
;
957 spin_unlock(&dev
->pending_lock
);
958 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
959 tx
= list_entry(l
, struct qib_verbs_txreq
, txreq
.list
);
961 if (ib_qib_state_ops
[qp
->state
] & QIB_PROCESS_RECV_OK
&&
962 list_empty(&qp
->iowait
)) {
964 qp
->s_flags
|= QIB_S_WAIT_TX
;
965 list_add_tail(&qp
->iowait
, &dev
->txwait
);
967 qp
->s_flags
&= ~QIB_S_BUSY
;
968 spin_unlock(&dev
->pending_lock
);
969 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
970 tx
= ERR_PTR(-EBUSY
);
975 static inline struct qib_verbs_txreq
*get_txreq(struct qib_ibdev
*dev
,
978 struct qib_verbs_txreq
*tx
;
981 spin_lock_irqsave(&dev
->pending_lock
, flags
);
982 /* assume the list non empty */
983 if (likely(!list_empty(&dev
->txreq_free
))) {
984 struct list_head
*l
= dev
->txreq_free
.next
;
987 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
988 tx
= list_entry(l
, struct qib_verbs_txreq
, txreq
.list
);
990 /* call slow path to get the extra lock */
991 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
992 tx
= __get_txreq(dev
, qp
);
997 void qib_put_txreq(struct qib_verbs_txreq
*tx
)
999 struct qib_ibdev
*dev
;
1001 unsigned long flags
;
1004 dev
= to_idev(qp
->ibqp
.device
);
1006 if (atomic_dec_and_test(&qp
->refcount
))
1012 if (tx
->txreq
.flags
& QIB_SDMA_TXREQ_F_FREEBUF
) {
1013 tx
->txreq
.flags
&= ~QIB_SDMA_TXREQ_F_FREEBUF
;
1014 dma_unmap_single(&dd_from_dev(dev
)->pcidev
->dev
,
1015 tx
->txreq
.addr
, tx
->hdr_dwords
<< 2,
1017 kfree(tx
->align_buf
);
1020 spin_lock_irqsave(&dev
->pending_lock
, flags
);
1022 /* Put struct back on free list */
1023 list_add(&tx
->txreq
.list
, &dev
->txreq_free
);
1025 if (!list_empty(&dev
->txwait
)) {
1026 /* Wake up first QP wanting a free struct */
1027 qp
= list_entry(dev
->txwait
.next
, struct qib_qp
, iowait
);
1028 list_del_init(&qp
->iowait
);
1029 atomic_inc(&qp
->refcount
);
1030 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
1032 spin_lock_irqsave(&qp
->s_lock
, flags
);
1033 if (qp
->s_flags
& QIB_S_WAIT_TX
) {
1034 qp
->s_flags
&= ~QIB_S_WAIT_TX
;
1035 qib_schedule_send(qp
);
1037 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
1039 if (atomic_dec_and_test(&qp
->refcount
))
1042 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
1046 * This is called when there are send DMA descriptors that might be
1049 * This is called with ppd->sdma_lock held.
1051 void qib_verbs_sdma_desc_avail(struct qib_pportdata
*ppd
, unsigned avail
)
1053 struct qib_qp
*qp
, *nqp
;
1054 struct qib_qp
*qps
[20];
1055 struct qib_ibdev
*dev
;
1059 dev
= &ppd
->dd
->verbs_dev
;
1060 spin_lock(&dev
->pending_lock
);
1062 /* Search wait list for first QP wanting DMA descriptors. */
1063 list_for_each_entry_safe(qp
, nqp
, &dev
->dmawait
, iowait
) {
1064 if (qp
->port_num
!= ppd
->port
)
1066 if (n
== ARRAY_SIZE(qps
))
1068 if (qp
->s_tx
->txreq
.sg_count
> avail
)
1070 avail
-= qp
->s_tx
->txreq
.sg_count
;
1071 list_del_init(&qp
->iowait
);
1072 atomic_inc(&qp
->refcount
);
1076 spin_unlock(&dev
->pending_lock
);
1078 for (i
= 0; i
< n
; i
++) {
1080 spin_lock(&qp
->s_lock
);
1081 if (qp
->s_flags
& QIB_S_WAIT_DMA_DESC
) {
1082 qp
->s_flags
&= ~QIB_S_WAIT_DMA_DESC
;
1083 qib_schedule_send(qp
);
1085 spin_unlock(&qp
->s_lock
);
1086 if (atomic_dec_and_test(&qp
->refcount
))
1092 * This is called with ppd->sdma_lock held.
1094 static void sdma_complete(struct qib_sdma_txreq
*cookie
, int status
)
1096 struct qib_verbs_txreq
*tx
=
1097 container_of(cookie
, struct qib_verbs_txreq
, txreq
);
1098 struct qib_qp
*qp
= tx
->qp
;
1100 spin_lock(&qp
->s_lock
);
1102 qib_send_complete(qp
, tx
->wqe
, IB_WC_SUCCESS
);
1103 else if (qp
->ibqp
.qp_type
== IB_QPT_RC
) {
1104 struct qib_ib_header
*hdr
;
1106 if (tx
->txreq
.flags
& QIB_SDMA_TXREQ_F_FREEBUF
)
1107 hdr
= &tx
->align_buf
->hdr
;
1109 struct qib_ibdev
*dev
= to_idev(qp
->ibqp
.device
);
1111 hdr
= &dev
->pio_hdrs
[tx
->hdr_inx
].hdr
;
1113 qib_rc_send_complete(qp
, hdr
);
1115 if (atomic_dec_and_test(&qp
->s_dma_busy
)) {
1116 if (qp
->state
== IB_QPS_RESET
)
1117 wake_up(&qp
->wait_dma
);
1118 else if (qp
->s_flags
& QIB_S_WAIT_DMA
) {
1119 qp
->s_flags
&= ~QIB_S_WAIT_DMA
;
1120 qib_schedule_send(qp
);
1123 spin_unlock(&qp
->s_lock
);
1128 static int wait_kmem(struct qib_ibdev
*dev
, struct qib_qp
*qp
)
1130 unsigned long flags
;
1133 spin_lock_irqsave(&qp
->s_lock
, flags
);
1134 if (ib_qib_state_ops
[qp
->state
] & QIB_PROCESS_RECV_OK
) {
1135 spin_lock(&dev
->pending_lock
);
1136 if (list_empty(&qp
->iowait
)) {
1137 if (list_empty(&dev
->memwait
))
1138 mod_timer(&dev
->mem_timer
, jiffies
+ 1);
1139 qp
->s_flags
|= QIB_S_WAIT_KMEM
;
1140 list_add_tail(&qp
->iowait
, &dev
->memwait
);
1142 spin_unlock(&dev
->pending_lock
);
1143 qp
->s_flags
&= ~QIB_S_BUSY
;
1146 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
1151 static int qib_verbs_send_dma(struct qib_qp
*qp
, struct qib_ib_header
*hdr
,
1152 u32 hdrwords
, struct qib_sge_state
*ss
, u32 len
,
1153 u32 plen
, u32 dwords
)
1155 struct qib_ibdev
*dev
= to_idev(qp
->ibqp
.device
);
1156 struct qib_devdata
*dd
= dd_from_dev(dev
);
1157 struct qib_ibport
*ibp
= to_iport(qp
->ibqp
.device
, qp
->port_num
);
1158 struct qib_pportdata
*ppd
= ppd_from_ibp(ibp
);
1159 struct qib_verbs_txreq
*tx
;
1160 struct qib_pio_header
*phdr
;
1168 /* resend previously constructed packet */
1169 ret
= qib_sdma_verbs_send(ppd
, tx
->ss
, tx
->dwords
, tx
);
1173 tx
= get_txreq(dev
, qp
);
1177 control
= dd
->f_setpbc_control(ppd
, plen
, qp
->s_srate
,
1178 be16_to_cpu(hdr
->lrh
[0]) >> 12);
1180 atomic_inc(&qp
->refcount
);
1181 tx
->wqe
= qp
->s_wqe
;
1182 tx
->mr
= qp
->s_rdma_mr
;
1184 qp
->s_rdma_mr
= NULL
;
1185 tx
->txreq
.callback
= sdma_complete
;
1186 if (dd
->flags
& QIB_HAS_SDMA_TIMEOUT
)
1187 tx
->txreq
.flags
= QIB_SDMA_TXREQ_F_HEADTOHOST
;
1189 tx
->txreq
.flags
= QIB_SDMA_TXREQ_F_INTREQ
;
1190 if (plen
+ 1 > dd
->piosize2kmax_dwords
)
1191 tx
->txreq
.flags
|= QIB_SDMA_TXREQ_F_USELARGEBUF
;
1195 * Don't try to DMA if it takes more descriptors than
1198 ndesc
= qib_count_sge(ss
, len
);
1199 if (ndesc
>= ppd
->sdma_descq_cnt
)
1204 phdr
= &dev
->pio_hdrs
[tx
->hdr_inx
];
1205 phdr
->pbc
[0] = cpu_to_le32(plen
);
1206 phdr
->pbc
[1] = cpu_to_le32(control
);
1207 memcpy(&phdr
->hdr
, hdr
, hdrwords
<< 2);
1208 tx
->txreq
.flags
|= QIB_SDMA_TXREQ_F_FREEDESC
;
1209 tx
->txreq
.sg_count
= ndesc
;
1210 tx
->txreq
.addr
= dev
->pio_hdrs_phys
+
1211 tx
->hdr_inx
* sizeof(struct qib_pio_header
);
1212 tx
->hdr_dwords
= hdrwords
+ 2; /* add PBC length */
1213 ret
= qib_sdma_verbs_send(ppd
, ss
, dwords
, tx
);
1217 /* Allocate a buffer and copy the header and payload to it. */
1218 tx
->hdr_dwords
= plen
+ 1;
1219 phdr
= kmalloc(tx
->hdr_dwords
<< 2, GFP_ATOMIC
);
1222 phdr
->pbc
[0] = cpu_to_le32(plen
);
1223 phdr
->pbc
[1] = cpu_to_le32(control
);
1224 memcpy(&phdr
->hdr
, hdr
, hdrwords
<< 2);
1225 qib_copy_from_sge((u32
*) &phdr
->hdr
+ hdrwords
, ss
, len
);
1227 tx
->txreq
.addr
= dma_map_single(&dd
->pcidev
->dev
, phdr
,
1228 tx
->hdr_dwords
<< 2, DMA_TO_DEVICE
);
1229 if (dma_mapping_error(&dd
->pcidev
->dev
, tx
->txreq
.addr
))
1231 tx
->align_buf
= phdr
;
1232 tx
->txreq
.flags
|= QIB_SDMA_TXREQ_F_FREEBUF
;
1233 tx
->txreq
.sg_count
= 1;
1234 ret
= qib_sdma_verbs_send(ppd
, NULL
, 0, tx
);
1241 ret
= wait_kmem(dev
, qp
);
1252 * If we are now in the error state, return zero to flush the
1253 * send work request.
1255 static int no_bufs_available(struct qib_qp
*qp
)
1257 struct qib_ibdev
*dev
= to_idev(qp
->ibqp
.device
);
1258 struct qib_devdata
*dd
;
1259 unsigned long flags
;
1263 * Note that as soon as want_buffer() is called and
1264 * possibly before it returns, qib_ib_piobufavail()
1265 * could be called. Therefore, put QP on the I/O wait list before
1266 * enabling the PIO avail interrupt.
1268 spin_lock_irqsave(&qp
->s_lock
, flags
);
1269 if (ib_qib_state_ops
[qp
->state
] & QIB_PROCESS_RECV_OK
) {
1270 spin_lock(&dev
->pending_lock
);
1271 if (list_empty(&qp
->iowait
)) {
1273 qp
->s_flags
|= QIB_S_WAIT_PIO
;
1274 list_add_tail(&qp
->iowait
, &dev
->piowait
);
1275 dd
= dd_from_dev(dev
);
1276 dd
->f_wantpiobuf_intr(dd
, 1);
1278 spin_unlock(&dev
->pending_lock
);
1279 qp
->s_flags
&= ~QIB_S_BUSY
;
1282 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
1286 static int qib_verbs_send_pio(struct qib_qp
*qp
, struct qib_ib_header
*ibhdr
,
1287 u32 hdrwords
, struct qib_sge_state
*ss
, u32 len
,
1288 u32 plen
, u32 dwords
)
1290 struct qib_devdata
*dd
= dd_from_ibdev(qp
->ibqp
.device
);
1291 struct qib_pportdata
*ppd
= dd
->pport
+ qp
->port_num
- 1;
1292 u32
*hdr
= (u32
*) ibhdr
;
1293 u32 __iomem
*piobuf_orig
;
1294 u32 __iomem
*piobuf
;
1296 unsigned long flags
;
1301 control
= dd
->f_setpbc_control(ppd
, plen
, qp
->s_srate
,
1302 be16_to_cpu(ibhdr
->lrh
[0]) >> 12);
1303 pbc
= ((u64
) control
<< 32) | plen
;
1304 piobuf
= dd
->f_getsendbuf(ppd
, pbc
, &pbufn
);
1305 if (unlikely(piobuf
== NULL
))
1306 return no_bufs_available(qp
);
1310 * We have to flush after the PBC for correctness on some cpus
1311 * or WC buffer can be written out of order.
1313 writeq(pbc
, piobuf
);
1314 piobuf_orig
= piobuf
;
1317 flush_wc
= dd
->flags
& QIB_PIO_FLUSH_WC
;
1320 * If there is just the header portion, must flush before
1321 * writing last word of header for correctness, and after
1322 * the last header word (trigger word).
1326 qib_pio_copy(piobuf
, hdr
, hdrwords
- 1);
1328 __raw_writel(hdr
[hdrwords
- 1], piobuf
+ hdrwords
- 1);
1331 qib_pio_copy(piobuf
, hdr
, hdrwords
);
1337 qib_pio_copy(piobuf
, hdr
, hdrwords
);
1340 /* The common case is aligned and contained in one segment. */
1341 if (likely(ss
->num_sge
== 1 && len
<= ss
->sge
.length
&&
1342 !((unsigned long)ss
->sge
.vaddr
& (sizeof(u32
) - 1)))) {
1343 u32
*addr
= (u32
*) ss
->sge
.vaddr
;
1345 /* Update address before sending packet. */
1346 update_sge(ss
, len
);
1348 qib_pio_copy(piobuf
, addr
, dwords
- 1);
1349 /* must flush early everything before trigger word */
1351 __raw_writel(addr
[dwords
- 1], piobuf
+ dwords
- 1);
1352 /* be sure trigger word is written */
1355 qib_pio_copy(piobuf
, addr
, dwords
);
1358 copy_io(piobuf
, ss
, len
, flush_wc
);
1360 if (dd
->flags
& QIB_USE_SPCL_TRIG
) {
1361 u32 spcl_off
= (pbufn
>= dd
->piobcnt2k
) ? 2047 : 1023;
1364 __raw_writel(0xaebecede, piobuf_orig
+ spcl_off
);
1366 qib_sendbuf_done(dd
, pbufn
);
1367 if (qp
->s_rdma_mr
) {
1368 qib_put_mr(qp
->s_rdma_mr
);
1369 qp
->s_rdma_mr
= NULL
;
1372 spin_lock_irqsave(&qp
->s_lock
, flags
);
1373 qib_send_complete(qp
, qp
->s_wqe
, IB_WC_SUCCESS
);
1374 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
1375 } else if (qp
->ibqp
.qp_type
== IB_QPT_RC
) {
1376 spin_lock_irqsave(&qp
->s_lock
, flags
);
1377 qib_rc_send_complete(qp
, ibhdr
);
1378 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
1384 * qib_verbs_send - send a packet
1385 * @qp: the QP to send on
1386 * @hdr: the packet header
1387 * @hdrwords: the number of 32-bit words in the header
1388 * @ss: the SGE to send
1389 * @len: the length of the packet in bytes
1391 * Return zero if packet is sent or queued OK.
1392 * Return non-zero and clear qp->s_flags QIB_S_BUSY otherwise.
1394 int qib_verbs_send(struct qib_qp
*qp
, struct qib_ib_header
*hdr
,
1395 u32 hdrwords
, struct qib_sge_state
*ss
, u32 len
)
1397 struct qib_devdata
*dd
= dd_from_ibdev(qp
->ibqp
.device
);
1400 u32 dwords
= (len
+ 3) >> 2;
1403 * Calculate the send buffer trigger address.
1404 * The +1 counts for the pbc control dword following the pbc length.
1406 plen
= hdrwords
+ dwords
+ 1;
1409 * VL15 packets (IB_QPT_SMI) will always use PIO, so we
1410 * can defer SDMA restart until link goes ACTIVE without
1411 * worrying about just how we got there.
1413 if (qp
->ibqp
.qp_type
== IB_QPT_SMI
||
1414 !(dd
->flags
& QIB_HAS_SEND_DMA
))
1415 ret
= qib_verbs_send_pio(qp
, hdr
, hdrwords
, ss
, len
,
1418 ret
= qib_verbs_send_dma(qp
, hdr
, hdrwords
, ss
, len
,
1424 int qib_snapshot_counters(struct qib_pportdata
*ppd
, u64
*swords
,
1425 u64
*rwords
, u64
*spkts
, u64
*rpkts
,
1429 struct qib_devdata
*dd
= ppd
->dd
;
1431 if (!(dd
->flags
& QIB_PRESENT
)) {
1432 /* no hardware, freeze, etc. */
1436 *swords
= dd
->f_portcntr(ppd
, QIBPORTCNTR_WORDSEND
);
1437 *rwords
= dd
->f_portcntr(ppd
, QIBPORTCNTR_WORDRCV
);
1438 *spkts
= dd
->f_portcntr(ppd
, QIBPORTCNTR_PKTSEND
);
1439 *rpkts
= dd
->f_portcntr(ppd
, QIBPORTCNTR_PKTRCV
);
1440 *xmit_wait
= dd
->f_portcntr(ppd
, QIBPORTCNTR_SENDSTALL
);
1449 * qib_get_counters - get various chip counters
1450 * @dd: the qlogic_ib device
1451 * @cntrs: counters are placed here
1453 * Return the counters needed by recv_pma_get_portcounters().
1455 int qib_get_counters(struct qib_pportdata
*ppd
,
1456 struct qib_verbs_counters
*cntrs
)
1460 if (!(ppd
->dd
->flags
& QIB_PRESENT
)) {
1461 /* no hardware, freeze, etc. */
1465 cntrs
->symbol_error_counter
=
1466 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_IBSYMBOLERR
);
1467 cntrs
->link_error_recovery_counter
=
1468 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_IBLINKERRRECOV
);
1470 * The link downed counter counts when the other side downs the
1471 * connection. We add in the number of times we downed the link
1472 * due to local link integrity errors to compensate.
1474 cntrs
->link_downed_counter
=
1475 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_IBLINKDOWN
);
1476 cntrs
->port_rcv_errors
=
1477 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_RXDROPPKT
) +
1478 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_RCVOVFL
) +
1479 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_ERR_RLEN
) +
1480 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_INVALIDRLEN
) +
1481 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_ERRLINK
) +
1482 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_ERRICRC
) +
1483 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_ERRVCRC
) +
1484 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_ERRLPCRC
) +
1485 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_BADFORMAT
);
1486 cntrs
->port_rcv_errors
+=
1487 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_RXLOCALPHYERR
);
1488 cntrs
->port_rcv_errors
+=
1489 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_RXVLERR
);
1490 cntrs
->port_rcv_remphys_errors
=
1491 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_RCVEBP
);
1492 cntrs
->port_xmit_discards
=
1493 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_UNSUPVL
);
1494 cntrs
->port_xmit_data
= ppd
->dd
->f_portcntr(ppd
,
1495 QIBPORTCNTR_WORDSEND
);
1496 cntrs
->port_rcv_data
= ppd
->dd
->f_portcntr(ppd
,
1497 QIBPORTCNTR_WORDRCV
);
1498 cntrs
->port_xmit_packets
= ppd
->dd
->f_portcntr(ppd
,
1499 QIBPORTCNTR_PKTSEND
);
1500 cntrs
->port_rcv_packets
= ppd
->dd
->f_portcntr(ppd
,
1501 QIBPORTCNTR_PKTRCV
);
1502 cntrs
->local_link_integrity_errors
=
1503 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_LLI
);
1504 cntrs
->excessive_buffer_overrun_errors
=
1505 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_EXCESSBUFOVFL
);
1506 cntrs
->vl15_dropped
=
1507 ppd
->dd
->f_portcntr(ppd
, QIBPORTCNTR_VL15PKTDROP
);
1516 * qib_ib_piobufavail - callback when a PIO buffer is available
1517 * @dd: the device pointer
1519 * This is called from qib_intr() at interrupt level when a PIO buffer is
1520 * available after qib_verbs_send() returned an error that no buffers were
1521 * available. Disable the interrupt if there are no more QPs waiting.
1523 void qib_ib_piobufavail(struct qib_devdata
*dd
)
1525 struct qib_ibdev
*dev
= &dd
->verbs_dev
;
1526 struct list_head
*list
;
1527 struct qib_qp
*qps
[5];
1529 unsigned long flags
;
1532 list
= &dev
->piowait
;
1536 * Note: checking that the piowait list is empty and clearing
1537 * the buffer available interrupt needs to be atomic or we
1538 * could end up with QPs on the wait list with the interrupt
1541 spin_lock_irqsave(&dev
->pending_lock
, flags
);
1542 while (!list_empty(list
)) {
1543 if (n
== ARRAY_SIZE(qps
))
1545 qp
= list_entry(list
->next
, struct qib_qp
, iowait
);
1546 list_del_init(&qp
->iowait
);
1547 atomic_inc(&qp
->refcount
);
1550 dd
->f_wantpiobuf_intr(dd
, 0);
1552 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
1554 for (i
= 0; i
< n
; i
++) {
1557 spin_lock_irqsave(&qp
->s_lock
, flags
);
1558 if (qp
->s_flags
& QIB_S_WAIT_PIO
) {
1559 qp
->s_flags
&= ~QIB_S_WAIT_PIO
;
1560 qib_schedule_send(qp
);
1562 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
1564 /* Notify qib_destroy_qp() if it is waiting. */
1565 if (atomic_dec_and_test(&qp
->refcount
))
1570 static int qib_query_device(struct ib_device
*ibdev
, struct ib_device_attr
*props
,
1571 struct ib_udata
*uhw
)
1573 struct qib_devdata
*dd
= dd_from_ibdev(ibdev
);
1574 struct qib_ibdev
*dev
= to_idev(ibdev
);
1576 if (uhw
->inlen
|| uhw
->outlen
)
1578 memset(props
, 0, sizeof(*props
));
1580 props
->device_cap_flags
= IB_DEVICE_BAD_PKEY_CNTR
|
1581 IB_DEVICE_BAD_QKEY_CNTR
| IB_DEVICE_SHUTDOWN_PORT
|
1582 IB_DEVICE_SYS_IMAGE_GUID
| IB_DEVICE_RC_RNR_NAK_GEN
|
1583 IB_DEVICE_PORT_ACTIVE_EVENT
| IB_DEVICE_SRQ_RESIZE
;
1584 props
->page_size_cap
= PAGE_SIZE
;
1586 QIB_SRC_OUI_1
<< 16 | QIB_SRC_OUI_2
<< 8 | QIB_SRC_OUI_3
;
1587 props
->vendor_part_id
= dd
->deviceid
;
1588 props
->hw_ver
= dd
->minrev
;
1589 props
->sys_image_guid
= ib_qib_sys_image_guid
;
1590 props
->max_mr_size
= ~0ULL;
1591 props
->max_qp
= ib_qib_max_qps
;
1592 props
->max_qp_wr
= ib_qib_max_qp_wrs
;
1593 props
->max_sge
= ib_qib_max_sges
;
1594 props
->max_sge_rd
= ib_qib_max_sges
;
1595 props
->max_cq
= ib_qib_max_cqs
;
1596 props
->max_ah
= ib_qib_max_ahs
;
1597 props
->max_cqe
= ib_qib_max_cqes
;
1598 props
->max_mr
= dev
->lk_table
.max
;
1599 props
->max_fmr
= dev
->lk_table
.max
;
1600 props
->max_map_per_fmr
= 32767;
1601 props
->max_pd
= ib_qib_max_pds
;
1602 props
->max_qp_rd_atom
= QIB_MAX_RDMA_ATOMIC
;
1603 props
->max_qp_init_rd_atom
= 255;
1604 /* props->max_res_rd_atom */
1605 props
->max_srq
= ib_qib_max_srqs
;
1606 props
->max_srq_wr
= ib_qib_max_srq_wrs
;
1607 props
->max_srq_sge
= ib_qib_max_srq_sges
;
1608 /* props->local_ca_ack_delay */
1609 props
->atomic_cap
= IB_ATOMIC_GLOB
;
1610 props
->max_pkeys
= qib_get_npkeys(dd
);
1611 props
->max_mcast_grp
= ib_qib_max_mcast_grps
;
1612 props
->max_mcast_qp_attach
= ib_qib_max_mcast_qp_attached
;
1613 props
->max_total_mcast_qp_attach
= props
->max_mcast_qp_attach
*
1614 props
->max_mcast_grp
;
1619 static int qib_query_port(struct ib_device
*ibdev
, u8 port
,
1620 struct ib_port_attr
*props
)
1622 struct qib_devdata
*dd
= dd_from_ibdev(ibdev
);
1623 struct qib_ibport
*ibp
= to_iport(ibdev
, port
);
1624 struct qib_pportdata
*ppd
= ppd_from_ibp(ibp
);
1628 memset(props
, 0, sizeof(*props
));
1629 props
->lid
= lid
? lid
: be16_to_cpu(IB_LID_PERMISSIVE
);
1630 props
->lmc
= ppd
->lmc
;
1631 props
->sm_lid
= ibp
->sm_lid
;
1632 props
->sm_sl
= ibp
->sm_sl
;
1633 props
->state
= dd
->f_iblink_state(ppd
->lastibcstat
);
1634 props
->phys_state
= dd
->f_ibphys_portstate(ppd
->lastibcstat
);
1635 props
->port_cap_flags
= ibp
->port_cap_flags
;
1636 props
->gid_tbl_len
= QIB_GUIDS_PER_PORT
;
1637 props
->max_msg_sz
= 0x80000000;
1638 props
->pkey_tbl_len
= qib_get_npkeys(dd
);
1639 props
->bad_pkey_cntr
= ibp
->pkey_violations
;
1640 props
->qkey_viol_cntr
= ibp
->qkey_violations
;
1641 props
->active_width
= ppd
->link_width_active
;
1642 /* See rate_show() */
1643 props
->active_speed
= ppd
->link_speed_active
;
1644 props
->max_vl_num
= qib_num_vls(ppd
->vls_supported
);
1645 props
->init_type_reply
= 0;
1647 props
->max_mtu
= qib_ibmtu
? qib_ibmtu
: IB_MTU_4096
;
1648 switch (ppd
->ibmtu
) {
1667 props
->active_mtu
= mtu
;
1668 props
->subnet_timeout
= ibp
->subnet_timeout
;
1673 static int qib_modify_device(struct ib_device
*device
,
1674 int device_modify_mask
,
1675 struct ib_device_modify
*device_modify
)
1677 struct qib_devdata
*dd
= dd_from_ibdev(device
);
1681 if (device_modify_mask
& ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID
|
1682 IB_DEVICE_MODIFY_NODE_DESC
)) {
1687 if (device_modify_mask
& IB_DEVICE_MODIFY_NODE_DESC
) {
1688 memcpy(device
->node_desc
, device_modify
->node_desc
, 64);
1689 for (i
= 0; i
< dd
->num_pports
; i
++) {
1690 struct qib_ibport
*ibp
= &dd
->pport
[i
].ibport_data
;
1692 qib_node_desc_chg(ibp
);
1696 if (device_modify_mask
& IB_DEVICE_MODIFY_SYS_IMAGE_GUID
) {
1697 ib_qib_sys_image_guid
=
1698 cpu_to_be64(device_modify
->sys_image_guid
);
1699 for (i
= 0; i
< dd
->num_pports
; i
++) {
1700 struct qib_ibport
*ibp
= &dd
->pport
[i
].ibport_data
;
1702 qib_sys_guid_chg(ibp
);
1712 static int qib_modify_port(struct ib_device
*ibdev
, u8 port
,
1713 int port_modify_mask
, struct ib_port_modify
*props
)
1715 struct qib_ibport
*ibp
= to_iport(ibdev
, port
);
1716 struct qib_pportdata
*ppd
= ppd_from_ibp(ibp
);
1718 ibp
->port_cap_flags
|= props
->set_port_cap_mask
;
1719 ibp
->port_cap_flags
&= ~props
->clr_port_cap_mask
;
1720 if (props
->set_port_cap_mask
|| props
->clr_port_cap_mask
)
1721 qib_cap_mask_chg(ibp
);
1722 if (port_modify_mask
& IB_PORT_SHUTDOWN
)
1723 qib_set_linkstate(ppd
, QIB_IB_LINKDOWN
);
1724 if (port_modify_mask
& IB_PORT_RESET_QKEY_CNTR
)
1725 ibp
->qkey_violations
= 0;
1729 static int qib_query_gid(struct ib_device
*ibdev
, u8 port
,
1730 int index
, union ib_gid
*gid
)
1732 struct qib_devdata
*dd
= dd_from_ibdev(ibdev
);
1735 if (!port
|| port
> dd
->num_pports
)
1738 struct qib_ibport
*ibp
= to_iport(ibdev
, port
);
1739 struct qib_pportdata
*ppd
= ppd_from_ibp(ibp
);
1741 gid
->global
.subnet_prefix
= ibp
->gid_prefix
;
1743 gid
->global
.interface_id
= ppd
->guid
;
1744 else if (index
< QIB_GUIDS_PER_PORT
)
1745 gid
->global
.interface_id
= ibp
->guids
[index
- 1];
1753 static struct ib_pd
*qib_alloc_pd(struct ib_device
*ibdev
,
1754 struct ib_ucontext
*context
,
1755 struct ib_udata
*udata
)
1757 struct qib_ibdev
*dev
= to_idev(ibdev
);
1762 * This is actually totally arbitrary. Some correctness tests
1763 * assume there's a maximum number of PDs that can be allocated.
1764 * We don't actually have this limit, but we fail the test if
1765 * we allow allocations of more than we report for this value.
1768 pd
= kmalloc(sizeof(*pd
), GFP_KERNEL
);
1770 ret
= ERR_PTR(-ENOMEM
);
1774 spin_lock(&dev
->n_pds_lock
);
1775 if (dev
->n_pds_allocated
== ib_qib_max_pds
) {
1776 spin_unlock(&dev
->n_pds_lock
);
1778 ret
= ERR_PTR(-ENOMEM
);
1782 dev
->n_pds_allocated
++;
1783 spin_unlock(&dev
->n_pds_lock
);
1785 /* ib_alloc_pd() will initialize pd->ibpd. */
1786 pd
->user
= udata
!= NULL
;
1794 static int qib_dealloc_pd(struct ib_pd
*ibpd
)
1796 struct qib_pd
*pd
= to_ipd(ibpd
);
1797 struct qib_ibdev
*dev
= to_idev(ibpd
->device
);
1799 spin_lock(&dev
->n_pds_lock
);
1800 dev
->n_pds_allocated
--;
1801 spin_unlock(&dev
->n_pds_lock
);
1808 int qib_check_ah(struct ib_device
*ibdev
, struct ib_ah_attr
*ah_attr
)
1810 /* A multicast address requires a GRH (see ch. 8.4.1). */
1811 if (ah_attr
->dlid
>= QIB_MULTICAST_LID_BASE
&&
1812 ah_attr
->dlid
!= QIB_PERMISSIVE_LID
&&
1813 !(ah_attr
->ah_flags
& IB_AH_GRH
))
1815 if ((ah_attr
->ah_flags
& IB_AH_GRH
) &&
1816 ah_attr
->grh
.sgid_index
>= QIB_GUIDS_PER_PORT
)
1818 if (ah_attr
->dlid
== 0)
1820 if (ah_attr
->port_num
< 1 ||
1821 ah_attr
->port_num
> ibdev
->phys_port_cnt
)
1823 if (ah_attr
->static_rate
!= IB_RATE_PORT_CURRENT
&&
1824 ib_rate_to_mult(ah_attr
->static_rate
) < 0)
1826 if (ah_attr
->sl
> 15)
1834 * qib_create_ah - create an address handle
1835 * @pd: the protection domain
1836 * @ah_attr: the attributes of the AH
1838 * This may be called from interrupt context.
1840 static struct ib_ah
*qib_create_ah(struct ib_pd
*pd
,
1841 struct ib_ah_attr
*ah_attr
)
1845 struct qib_ibdev
*dev
= to_idev(pd
->device
);
1846 unsigned long flags
;
1848 if (qib_check_ah(pd
->device
, ah_attr
)) {
1849 ret
= ERR_PTR(-EINVAL
);
1853 ah
= kmalloc(sizeof(*ah
), GFP_ATOMIC
);
1855 ret
= ERR_PTR(-ENOMEM
);
1859 spin_lock_irqsave(&dev
->n_ahs_lock
, flags
);
1860 if (dev
->n_ahs_allocated
== ib_qib_max_ahs
) {
1861 spin_unlock_irqrestore(&dev
->n_ahs_lock
, flags
);
1863 ret
= ERR_PTR(-ENOMEM
);
1867 dev
->n_ahs_allocated
++;
1868 spin_unlock_irqrestore(&dev
->n_ahs_lock
, flags
);
1870 /* ib_create_ah() will initialize ah->ibah. */
1871 ah
->attr
= *ah_attr
;
1872 atomic_set(&ah
->refcount
, 0);
1880 struct ib_ah
*qib_create_qp0_ah(struct qib_ibport
*ibp
, u16 dlid
)
1882 struct ib_ah_attr attr
;
1883 struct ib_ah
*ah
= ERR_PTR(-EINVAL
);
1886 memset(&attr
, 0, sizeof(attr
));
1888 attr
.port_num
= ppd_from_ibp(ibp
)->port
;
1890 qp0
= rcu_dereference(ibp
->qp0
);
1892 ah
= ib_create_ah(qp0
->ibqp
.pd
, &attr
);
1898 * qib_destroy_ah - destroy an address handle
1899 * @ibah: the AH to destroy
1901 * This may be called from interrupt context.
1903 static int qib_destroy_ah(struct ib_ah
*ibah
)
1905 struct qib_ibdev
*dev
= to_idev(ibah
->device
);
1906 struct qib_ah
*ah
= to_iah(ibah
);
1907 unsigned long flags
;
1909 if (atomic_read(&ah
->refcount
) != 0)
1912 spin_lock_irqsave(&dev
->n_ahs_lock
, flags
);
1913 dev
->n_ahs_allocated
--;
1914 spin_unlock_irqrestore(&dev
->n_ahs_lock
, flags
);
1921 static int qib_modify_ah(struct ib_ah
*ibah
, struct ib_ah_attr
*ah_attr
)
1923 struct qib_ah
*ah
= to_iah(ibah
);
1925 if (qib_check_ah(ibah
->device
, ah_attr
))
1928 ah
->attr
= *ah_attr
;
1933 static int qib_query_ah(struct ib_ah
*ibah
, struct ib_ah_attr
*ah_attr
)
1935 struct qib_ah
*ah
= to_iah(ibah
);
1937 *ah_attr
= ah
->attr
;
1943 * qib_get_npkeys - return the size of the PKEY table for context 0
1944 * @dd: the qlogic_ib device
1946 unsigned qib_get_npkeys(struct qib_devdata
*dd
)
1948 return ARRAY_SIZE(dd
->rcd
[0]->pkeys
);
1952 * Return the indexed PKEY from the port PKEY table.
1953 * No need to validate rcd[ctxt]; the port is setup if we are here.
1955 unsigned qib_get_pkey(struct qib_ibport
*ibp
, unsigned index
)
1957 struct qib_pportdata
*ppd
= ppd_from_ibp(ibp
);
1958 struct qib_devdata
*dd
= ppd
->dd
;
1959 unsigned ctxt
= ppd
->hw_pidx
;
1962 /* dd->rcd null if mini_init or some init failures */
1963 if (!dd
->rcd
|| index
>= ARRAY_SIZE(dd
->rcd
[ctxt
]->pkeys
))
1966 ret
= dd
->rcd
[ctxt
]->pkeys
[index
];
1971 static int qib_query_pkey(struct ib_device
*ibdev
, u8 port
, u16 index
,
1974 struct qib_devdata
*dd
= dd_from_ibdev(ibdev
);
1977 if (index
>= qib_get_npkeys(dd
)) {
1982 *pkey
= qib_get_pkey(to_iport(ibdev
, port
), index
);
1990 * qib_alloc_ucontext - allocate a ucontest
1991 * @ibdev: the infiniband device
1992 * @udata: not used by the QLogic_IB driver
1995 static struct ib_ucontext
*qib_alloc_ucontext(struct ib_device
*ibdev
,
1996 struct ib_udata
*udata
)
1998 struct qib_ucontext
*context
;
1999 struct ib_ucontext
*ret
;
2001 context
= kmalloc(sizeof(*context
), GFP_KERNEL
);
2003 ret
= ERR_PTR(-ENOMEM
);
2007 ret
= &context
->ibucontext
;
2013 static int qib_dealloc_ucontext(struct ib_ucontext
*context
)
2015 kfree(to_iucontext(context
));
2019 static void init_ibport(struct qib_pportdata
*ppd
)
2021 struct qib_verbs_counters cntrs
;
2022 struct qib_ibport
*ibp
= &ppd
->ibport_data
;
2024 spin_lock_init(&ibp
->lock
);
2025 /* Set the prefix to the default value (see ch. 4.1.1) */
2026 ibp
->gid_prefix
= IB_DEFAULT_GID_PREFIX
;
2027 ibp
->sm_lid
= be16_to_cpu(IB_LID_PERMISSIVE
);
2028 ibp
->port_cap_flags
= IB_PORT_SYS_IMAGE_GUID_SUP
|
2029 IB_PORT_CLIENT_REG_SUP
| IB_PORT_SL_MAP_SUP
|
2030 IB_PORT_TRAP_SUP
| IB_PORT_AUTO_MIGR_SUP
|
2031 IB_PORT_DR_NOTICE_SUP
| IB_PORT_CAP_MASK_NOTICE_SUP
|
2032 IB_PORT_OTHER_LOCAL_CHANGES_SUP
;
2033 if (ppd
->dd
->flags
& QIB_HAS_LINK_LATENCY
)
2034 ibp
->port_cap_flags
|= IB_PORT_LINK_LATENCY_SUP
;
2035 ibp
->pma_counter_select
[0] = IB_PMA_PORT_XMIT_DATA
;
2036 ibp
->pma_counter_select
[1] = IB_PMA_PORT_RCV_DATA
;
2037 ibp
->pma_counter_select
[2] = IB_PMA_PORT_XMIT_PKTS
;
2038 ibp
->pma_counter_select
[3] = IB_PMA_PORT_RCV_PKTS
;
2039 ibp
->pma_counter_select
[4] = IB_PMA_PORT_XMIT_WAIT
;
2041 /* Snapshot current HW counters to "clear" them. */
2042 qib_get_counters(ppd
, &cntrs
);
2043 ibp
->z_symbol_error_counter
= cntrs
.symbol_error_counter
;
2044 ibp
->z_link_error_recovery_counter
=
2045 cntrs
.link_error_recovery_counter
;
2046 ibp
->z_link_downed_counter
= cntrs
.link_downed_counter
;
2047 ibp
->z_port_rcv_errors
= cntrs
.port_rcv_errors
;
2048 ibp
->z_port_rcv_remphys_errors
= cntrs
.port_rcv_remphys_errors
;
2049 ibp
->z_port_xmit_discards
= cntrs
.port_xmit_discards
;
2050 ibp
->z_port_xmit_data
= cntrs
.port_xmit_data
;
2051 ibp
->z_port_rcv_data
= cntrs
.port_rcv_data
;
2052 ibp
->z_port_xmit_packets
= cntrs
.port_xmit_packets
;
2053 ibp
->z_port_rcv_packets
= cntrs
.port_rcv_packets
;
2054 ibp
->z_local_link_integrity_errors
=
2055 cntrs
.local_link_integrity_errors
;
2056 ibp
->z_excessive_buffer_overrun_errors
=
2057 cntrs
.excessive_buffer_overrun_errors
;
2058 ibp
->z_vl15_dropped
= cntrs
.vl15_dropped
;
2059 RCU_INIT_POINTER(ibp
->qp0
, NULL
);
2060 RCU_INIT_POINTER(ibp
->qp1
, NULL
);
2063 static int qib_port_immutable(struct ib_device
*ibdev
, u8 port_num
,
2064 struct ib_port_immutable
*immutable
)
2066 struct ib_port_attr attr
;
2069 err
= qib_query_port(ibdev
, port_num
, &attr
);
2073 immutable
->pkey_tbl_len
= attr
.pkey_tbl_len
;
2074 immutable
->gid_tbl_len
= attr
.gid_tbl_len
;
2075 immutable
->core_cap_flags
= RDMA_CORE_PORT_IBA_IB
;
2076 immutable
->max_mad_size
= IB_MGMT_MAD_SIZE
;
2082 * qib_register_ib_device - register our device with the infiniband core
2083 * @dd: the device data structure
2084 * Return the allocated qib_ibdev pointer or NULL on error.
2086 int qib_register_ib_device(struct qib_devdata
*dd
)
2088 struct qib_ibdev
*dev
= &dd
->verbs_dev
;
2089 struct ib_device
*ibdev
= &dev
->ibdev
;
2090 struct qib_pportdata
*ppd
= dd
->pport
;
2091 unsigned i
, lk_tab_size
;
2094 dev
->qp_table_size
= ib_qib_qp_table_size
;
2095 get_random_bytes(&dev
->qp_rnd
, sizeof(dev
->qp_rnd
));
2096 dev
->qp_table
= kmalloc_array(
2098 sizeof(*dev
->qp_table
),
2100 if (!dev
->qp_table
) {
2104 for (i
= 0; i
< dev
->qp_table_size
; i
++)
2105 RCU_INIT_POINTER(dev
->qp_table
[i
], NULL
);
2107 for (i
= 0; i
< dd
->num_pports
; i
++)
2108 init_ibport(ppd
+ i
);
2110 /* Only need to initialize non-zero fields. */
2111 spin_lock_init(&dev
->qpt_lock
);
2112 spin_lock_init(&dev
->n_pds_lock
);
2113 spin_lock_init(&dev
->n_ahs_lock
);
2114 spin_lock_init(&dev
->n_cqs_lock
);
2115 spin_lock_init(&dev
->n_qps_lock
);
2116 spin_lock_init(&dev
->n_srqs_lock
);
2117 spin_lock_init(&dev
->n_mcast_grps_lock
);
2118 init_timer(&dev
->mem_timer
);
2119 dev
->mem_timer
.function
= mem_timer
;
2120 dev
->mem_timer
.data
= (unsigned long) dev
;
2122 qib_init_qpn_table(dd
, &dev
->qpn_table
);
2125 * The top ib_qib_lkey_table_size bits are used to index the
2126 * table. The lower 8 bits can be owned by the user (copied from
2127 * the LKEY). The remaining bits act as a generation number or tag.
2129 spin_lock_init(&dev
->lk_table
.lock
);
2130 /* insure generation is at least 4 bits see keys.c */
2131 if (ib_qib_lkey_table_size
> MAX_LKEY_TABLE_BITS
) {
2132 qib_dev_warn(dd
, "lkey bits %u too large, reduced to %u\n",
2133 ib_qib_lkey_table_size
, MAX_LKEY_TABLE_BITS
);
2134 ib_qib_lkey_table_size
= MAX_LKEY_TABLE_BITS
;
2136 dev
->lk_table
.max
= 1 << ib_qib_lkey_table_size
;
2137 lk_tab_size
= dev
->lk_table
.max
* sizeof(*dev
->lk_table
.table
);
2138 dev
->lk_table
.table
= (struct qib_mregion __rcu
**)
2139 vmalloc(lk_tab_size
);
2140 if (dev
->lk_table
.table
== NULL
) {
2144 RCU_INIT_POINTER(dev
->dma_mr
, NULL
);
2145 for (i
= 0; i
< dev
->lk_table
.max
; i
++)
2146 RCU_INIT_POINTER(dev
->lk_table
.table
[i
], NULL
);
2147 INIT_LIST_HEAD(&dev
->pending_mmaps
);
2148 spin_lock_init(&dev
->pending_lock
);
2149 dev
->mmap_offset
= PAGE_SIZE
;
2150 spin_lock_init(&dev
->mmap_offset_lock
);
2151 INIT_LIST_HEAD(&dev
->piowait
);
2152 INIT_LIST_HEAD(&dev
->dmawait
);
2153 INIT_LIST_HEAD(&dev
->txwait
);
2154 INIT_LIST_HEAD(&dev
->memwait
);
2155 INIT_LIST_HEAD(&dev
->txreq_free
);
2157 if (ppd
->sdma_descq_cnt
) {
2158 dev
->pio_hdrs
= dma_alloc_coherent(&dd
->pcidev
->dev
,
2159 ppd
->sdma_descq_cnt
*
2160 sizeof(struct qib_pio_header
),
2161 &dev
->pio_hdrs_phys
,
2163 if (!dev
->pio_hdrs
) {
2169 for (i
= 0; i
< ppd
->sdma_descq_cnt
; i
++) {
2170 struct qib_verbs_txreq
*tx
;
2172 tx
= kzalloc(sizeof(*tx
), GFP_KERNEL
);
2178 list_add(&tx
->txreq
.list
, &dev
->txreq_free
);
2182 * The system image GUID is supposed to be the same for all
2183 * IB HCAs in a single system but since there can be other
2184 * device types in the system, we can't be sure this is unique.
2186 if (!ib_qib_sys_image_guid
)
2187 ib_qib_sys_image_guid
= ppd
->guid
;
2189 strlcpy(ibdev
->name
, "qib%d", IB_DEVICE_NAME_MAX
);
2190 ibdev
->owner
= THIS_MODULE
;
2191 ibdev
->node_guid
= ppd
->guid
;
2192 ibdev
->uverbs_abi_ver
= QIB_UVERBS_ABI_VERSION
;
2193 ibdev
->uverbs_cmd_mask
=
2194 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT
) |
2195 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE
) |
2196 (1ull << IB_USER_VERBS_CMD_QUERY_PORT
) |
2197 (1ull << IB_USER_VERBS_CMD_ALLOC_PD
) |
2198 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD
) |
2199 (1ull << IB_USER_VERBS_CMD_CREATE_AH
) |
2200 (1ull << IB_USER_VERBS_CMD_MODIFY_AH
) |
2201 (1ull << IB_USER_VERBS_CMD_QUERY_AH
) |
2202 (1ull << IB_USER_VERBS_CMD_DESTROY_AH
) |
2203 (1ull << IB_USER_VERBS_CMD_REG_MR
) |
2204 (1ull << IB_USER_VERBS_CMD_DEREG_MR
) |
2205 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL
) |
2206 (1ull << IB_USER_VERBS_CMD_CREATE_CQ
) |
2207 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ
) |
2208 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ
) |
2209 (1ull << IB_USER_VERBS_CMD_POLL_CQ
) |
2210 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ
) |
2211 (1ull << IB_USER_VERBS_CMD_CREATE_QP
) |
2212 (1ull << IB_USER_VERBS_CMD_QUERY_QP
) |
2213 (1ull << IB_USER_VERBS_CMD_MODIFY_QP
) |
2214 (1ull << IB_USER_VERBS_CMD_DESTROY_QP
) |
2215 (1ull << IB_USER_VERBS_CMD_POST_SEND
) |
2216 (1ull << IB_USER_VERBS_CMD_POST_RECV
) |
2217 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST
) |
2218 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST
) |
2219 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ
) |
2220 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ
) |
2221 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ
) |
2222 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ
) |
2223 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV
);
2224 ibdev
->node_type
= RDMA_NODE_IB_CA
;
2225 ibdev
->phys_port_cnt
= dd
->num_pports
;
2226 ibdev
->num_comp_vectors
= 1;
2227 ibdev
->dma_device
= &dd
->pcidev
->dev
;
2228 ibdev
->query_device
= qib_query_device
;
2229 ibdev
->modify_device
= qib_modify_device
;
2230 ibdev
->query_port
= qib_query_port
;
2231 ibdev
->modify_port
= qib_modify_port
;
2232 ibdev
->query_pkey
= qib_query_pkey
;
2233 ibdev
->query_gid
= qib_query_gid
;
2234 ibdev
->alloc_ucontext
= qib_alloc_ucontext
;
2235 ibdev
->dealloc_ucontext
= qib_dealloc_ucontext
;
2236 ibdev
->alloc_pd
= qib_alloc_pd
;
2237 ibdev
->dealloc_pd
= qib_dealloc_pd
;
2238 ibdev
->create_ah
= qib_create_ah
;
2239 ibdev
->destroy_ah
= qib_destroy_ah
;
2240 ibdev
->modify_ah
= qib_modify_ah
;
2241 ibdev
->query_ah
= qib_query_ah
;
2242 ibdev
->create_srq
= qib_create_srq
;
2243 ibdev
->modify_srq
= qib_modify_srq
;
2244 ibdev
->query_srq
= qib_query_srq
;
2245 ibdev
->destroy_srq
= qib_destroy_srq
;
2246 ibdev
->create_qp
= qib_create_qp
;
2247 ibdev
->modify_qp
= qib_modify_qp
;
2248 ibdev
->query_qp
= qib_query_qp
;
2249 ibdev
->destroy_qp
= qib_destroy_qp
;
2250 ibdev
->post_send
= qib_post_send
;
2251 ibdev
->post_recv
= qib_post_receive
;
2252 ibdev
->post_srq_recv
= qib_post_srq_receive
;
2253 ibdev
->create_cq
= qib_create_cq
;
2254 ibdev
->destroy_cq
= qib_destroy_cq
;
2255 ibdev
->resize_cq
= qib_resize_cq
;
2256 ibdev
->poll_cq
= qib_poll_cq
;
2257 ibdev
->req_notify_cq
= qib_req_notify_cq
;
2258 ibdev
->get_dma_mr
= qib_get_dma_mr
;
2259 ibdev
->reg_phys_mr
= qib_reg_phys_mr
;
2260 ibdev
->reg_user_mr
= qib_reg_user_mr
;
2261 ibdev
->dereg_mr
= qib_dereg_mr
;
2262 ibdev
->alloc_mr
= qib_alloc_mr
;
2263 ibdev
->map_mr_sg
= qib_map_mr_sg
;
2264 ibdev
->alloc_fmr
= qib_alloc_fmr
;
2265 ibdev
->map_phys_fmr
= qib_map_phys_fmr
;
2266 ibdev
->unmap_fmr
= qib_unmap_fmr
;
2267 ibdev
->dealloc_fmr
= qib_dealloc_fmr
;
2268 ibdev
->attach_mcast
= qib_multicast_attach
;
2269 ibdev
->detach_mcast
= qib_multicast_detach
;
2270 ibdev
->process_mad
= qib_process_mad
;
2271 ibdev
->mmap
= qib_mmap
;
2272 ibdev
->dma_ops
= &qib_dma_mapping_ops
;
2273 ibdev
->get_port_immutable
= qib_port_immutable
;
2275 snprintf(ibdev
->node_desc
, sizeof(ibdev
->node_desc
),
2276 "Intel Infiniband HCA %s", init_utsname()->nodename
);
2278 ret
= ib_register_device(ibdev
, qib_create_port_files
);
2282 ret
= qib_create_agents(dev
);
2286 ret
= qib_verbs_register_sysfs(dd
);
2293 qib_free_agents(dev
);
2295 ib_unregister_device(ibdev
);
2298 while (!list_empty(&dev
->txreq_free
)) {
2299 struct list_head
*l
= dev
->txreq_free
.next
;
2300 struct qib_verbs_txreq
*tx
;
2303 tx
= list_entry(l
, struct qib_verbs_txreq
, txreq
.list
);
2306 if (ppd
->sdma_descq_cnt
)
2307 dma_free_coherent(&dd
->pcidev
->dev
,
2308 ppd
->sdma_descq_cnt
*
2309 sizeof(struct qib_pio_header
),
2310 dev
->pio_hdrs
, dev
->pio_hdrs_phys
);
2312 vfree(dev
->lk_table
.table
);
2314 kfree(dev
->qp_table
);
2316 qib_dev_err(dd
, "cannot register verbs: %d!\n", -ret
);
2321 void qib_unregister_ib_device(struct qib_devdata
*dd
)
2323 struct qib_ibdev
*dev
= &dd
->verbs_dev
;
2324 struct ib_device
*ibdev
= &dev
->ibdev
;
2326 unsigned lk_tab_size
;
2328 qib_verbs_unregister_sysfs(dd
);
2330 qib_free_agents(dev
);
2332 ib_unregister_device(ibdev
);
2334 if (!list_empty(&dev
->piowait
))
2335 qib_dev_err(dd
, "piowait list not empty!\n");
2336 if (!list_empty(&dev
->dmawait
))
2337 qib_dev_err(dd
, "dmawait list not empty!\n");
2338 if (!list_empty(&dev
->txwait
))
2339 qib_dev_err(dd
, "txwait list not empty!\n");
2340 if (!list_empty(&dev
->memwait
))
2341 qib_dev_err(dd
, "memwait list not empty!\n");
2343 qib_dev_err(dd
, "DMA MR not NULL!\n");
2345 qps_inuse
= qib_free_all_qps(dd
);
2347 qib_dev_err(dd
, "QP memory leak! %u still in use\n",
2350 del_timer_sync(&dev
->mem_timer
);
2351 qib_free_qpn_table(&dev
->qpn_table
);
2352 while (!list_empty(&dev
->txreq_free
)) {
2353 struct list_head
*l
= dev
->txreq_free
.next
;
2354 struct qib_verbs_txreq
*tx
;
2357 tx
= list_entry(l
, struct qib_verbs_txreq
, txreq
.list
);
2360 if (dd
->pport
->sdma_descq_cnt
)
2361 dma_free_coherent(&dd
->pcidev
->dev
,
2362 dd
->pport
->sdma_descq_cnt
*
2363 sizeof(struct qib_pio_header
),
2364 dev
->pio_hdrs
, dev
->pio_hdrs_phys
);
2365 lk_tab_size
= dev
->lk_table
.max
* sizeof(*dev
->lk_table
.table
);
2366 vfree(dev
->lk_table
.table
);
2367 kfree(dev
->qp_table
);
2371 * This must be called with s_lock held.
2373 void qib_schedule_send(struct qib_qp
*qp
)
2375 if (qib_send_ok(qp
)) {
2376 struct qib_ibport
*ibp
=
2377 to_iport(qp
->ibqp
.device
, qp
->port_num
);
2378 struct qib_pportdata
*ppd
= ppd_from_ibp(ibp
);
2380 queue_work(ppd
->qib_wq
, &qp
->s_work
);