2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <rdma/ib_mad.h>
35 #include <rdma/ib_user_verbs.h>
37 #include <linux/utsname.h>
39 #include "ipath_kernel.h"
40 #include "ipath_verbs.h"
41 #include "ipath_common.h"
43 static unsigned int ib_ipath_qp_table_size
= 251;
44 module_param_named(qp_table_size
, ib_ipath_qp_table_size
, uint
, S_IRUGO
);
45 MODULE_PARM_DESC(qp_table_size
, "QP table size");
47 unsigned int ib_ipath_lkey_table_size
= 12;
48 module_param_named(lkey_table_size
, ib_ipath_lkey_table_size
, uint
,
50 MODULE_PARM_DESC(lkey_table_size
,
51 "LKEY table size in bits (2^n, 1 <= n <= 23)");
53 static unsigned int ib_ipath_max_pds
= 0xFFFF;
54 module_param_named(max_pds
, ib_ipath_max_pds
, uint
, S_IWUSR
| S_IRUGO
);
55 MODULE_PARM_DESC(max_pds
,
56 "Maximum number of protection domains to support");
58 static unsigned int ib_ipath_max_ahs
= 0xFFFF;
59 module_param_named(max_ahs
, ib_ipath_max_ahs
, uint
, S_IWUSR
| S_IRUGO
);
60 MODULE_PARM_DESC(max_ahs
, "Maximum number of address handles to support");
62 unsigned int ib_ipath_max_cqes
= 0x2FFFF;
63 module_param_named(max_cqes
, ib_ipath_max_cqes
, uint
, S_IWUSR
| S_IRUGO
);
64 MODULE_PARM_DESC(max_cqes
,
65 "Maximum number of completion queue entries to support");
67 unsigned int ib_ipath_max_cqs
= 0x1FFFF;
68 module_param_named(max_cqs
, ib_ipath_max_cqs
, uint
, S_IWUSR
| S_IRUGO
);
69 MODULE_PARM_DESC(max_cqs
, "Maximum number of completion queues to support");
71 unsigned int ib_ipath_max_qp_wrs
= 0x3FFF;
72 module_param_named(max_qp_wrs
, ib_ipath_max_qp_wrs
, uint
,
74 MODULE_PARM_DESC(max_qp_wrs
, "Maximum number of QP WRs to support");
76 unsigned int ib_ipath_max_qps
= 16384;
77 module_param_named(max_qps
, ib_ipath_max_qps
, uint
, S_IWUSR
| S_IRUGO
);
78 MODULE_PARM_DESC(max_qps
, "Maximum number of QPs to support");
80 unsigned int ib_ipath_max_sges
= 0x60;
81 module_param_named(max_sges
, ib_ipath_max_sges
, uint
, S_IWUSR
| S_IRUGO
);
82 MODULE_PARM_DESC(max_sges
, "Maximum number of SGEs to support");
84 unsigned int ib_ipath_max_mcast_grps
= 16384;
85 module_param_named(max_mcast_grps
, ib_ipath_max_mcast_grps
, uint
,
87 MODULE_PARM_DESC(max_mcast_grps
,
88 "Maximum number of multicast groups to support");
90 unsigned int ib_ipath_max_mcast_qp_attached
= 16;
91 module_param_named(max_mcast_qp_attached
, ib_ipath_max_mcast_qp_attached
,
92 uint
, S_IWUSR
| S_IRUGO
);
93 MODULE_PARM_DESC(max_mcast_qp_attached
,
94 "Maximum number of attached QPs to support");
96 unsigned int ib_ipath_max_srqs
= 1024;
97 module_param_named(max_srqs
, ib_ipath_max_srqs
, uint
, S_IWUSR
| S_IRUGO
);
98 MODULE_PARM_DESC(max_srqs
, "Maximum number of SRQs to support");
100 unsigned int ib_ipath_max_srq_sges
= 128;
101 module_param_named(max_srq_sges
, ib_ipath_max_srq_sges
,
102 uint
, S_IWUSR
| S_IRUGO
);
103 MODULE_PARM_DESC(max_srq_sges
, "Maximum number of SRQ SGEs to support");
105 unsigned int ib_ipath_max_srq_wrs
= 0x1FFFF;
106 module_param_named(max_srq_wrs
, ib_ipath_max_srq_wrs
,
107 uint
, S_IWUSR
| S_IRUGO
);
108 MODULE_PARM_DESC(max_srq_wrs
, "Maximum number of SRQ WRs support");
110 static unsigned int ib_ipath_disable_sma
;
111 module_param_named(disable_sma
, ib_ipath_disable_sma
, uint
, S_IWUSR
| S_IRUGO
);
112 MODULE_PARM_DESC(ib_ipath_disable_sma
, "Disable the SMA");
114 const int ib_ipath_state_ops
[IB_QPS_ERR
+ 1] = {
116 [IB_QPS_INIT
] = IPATH_POST_RECV_OK
,
117 [IB_QPS_RTR
] = IPATH_POST_RECV_OK
| IPATH_PROCESS_RECV_OK
,
118 [IB_QPS_RTS
] = IPATH_POST_RECV_OK
| IPATH_PROCESS_RECV_OK
|
119 IPATH_POST_SEND_OK
| IPATH_PROCESS_SEND_OK
,
120 [IB_QPS_SQD
] = IPATH_POST_RECV_OK
| IPATH_PROCESS_RECV_OK
|
122 [IB_QPS_SQE
] = IPATH_POST_RECV_OK
| IPATH_PROCESS_RECV_OK
,
126 struct ipath_ucontext
{
127 struct ib_ucontext ibucontext
;
130 static inline struct ipath_ucontext
*to_iucontext(struct ib_ucontext
133 return container_of(ibucontext
, struct ipath_ucontext
, ibucontext
);
137 * Translate ib_wr_opcode into ib_wc_opcode.
139 const enum ib_wc_opcode ib_ipath_wc_opcode
[] = {
140 [IB_WR_RDMA_WRITE
] = IB_WC_RDMA_WRITE
,
141 [IB_WR_RDMA_WRITE_WITH_IMM
] = IB_WC_RDMA_WRITE
,
142 [IB_WR_SEND
] = IB_WC_SEND
,
143 [IB_WR_SEND_WITH_IMM
] = IB_WC_SEND
,
144 [IB_WR_RDMA_READ
] = IB_WC_RDMA_READ
,
145 [IB_WR_ATOMIC_CMP_AND_SWP
] = IB_WC_COMP_SWAP
,
146 [IB_WR_ATOMIC_FETCH_AND_ADD
] = IB_WC_FETCH_ADD
152 static __be64 sys_image_guid
;
155 * ipath_copy_sge - copy data to SGE memory
157 * @data: the data to copy
158 * @length: the length of the data
160 void ipath_copy_sge(struct ipath_sge_state
*ss
, void *data
, u32 length
)
162 struct ipath_sge
*sge
= &ss
->sge
;
165 u32 len
= sge
->length
;
169 if (len
> sge
->sge_length
)
170 len
= sge
->sge_length
;
172 memcpy(sge
->vaddr
, data
, len
);
175 sge
->sge_length
-= len
;
176 if (sge
->sge_length
== 0) {
178 *sge
= *ss
->sg_list
++;
179 } else if (sge
->length
== 0 && sge
->mr
!= NULL
) {
180 if (++sge
->n
>= IPATH_SEGSZ
) {
181 if (++sge
->m
>= sge
->mr
->mapsz
)
186 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
188 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
196 * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
198 * @length: the number of bytes to skip
200 void ipath_skip_sge(struct ipath_sge_state
*ss
, u32 length
)
202 struct ipath_sge
*sge
= &ss
->sge
;
205 u32 len
= sge
->length
;
209 if (len
> sge
->sge_length
)
210 len
= sge
->sge_length
;
214 sge
->sge_length
-= len
;
215 if (sge
->sge_length
== 0) {
217 *sge
= *ss
->sg_list
++;
218 } else if (sge
->length
== 0 && sge
->mr
!= NULL
) {
219 if (++sge
->n
>= IPATH_SEGSZ
) {
220 if (++sge
->m
>= sge
->mr
->mapsz
)
225 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
227 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
233 static void ipath_flush_wqe(struct ipath_qp
*qp
, struct ib_send_wr
*wr
)
237 memset(&wc
, 0, sizeof(wc
));
238 wc
.wr_id
= wr
->wr_id
;
239 wc
.status
= IB_WC_WR_FLUSH_ERR
;
240 wc
.opcode
= ib_ipath_wc_opcode
[wr
->opcode
];
242 ipath_cq_enter(to_icq(qp
->ibqp
.send_cq
), &wc
, 1);
246 * ipath_post_one_send - post one RC, UC, or UD send work request
247 * @qp: the QP to post on
248 * @wr: the work request to send
250 static int ipath_post_one_send(struct ipath_qp
*qp
, struct ib_send_wr
*wr
)
252 struct ipath_swqe
*wqe
;
260 spin_lock_irqsave(&qp
->s_lock
, flags
);
262 /* Check that state is OK to post send. */
263 if (unlikely(!(ib_ipath_state_ops
[qp
->state
] & IPATH_POST_SEND_OK
))) {
264 if (qp
->state
!= IB_QPS_SQE
&& qp
->state
!= IB_QPS_ERR
)
266 /* C10-96 says generate a flushed completion entry. */
267 ipath_flush_wqe(qp
, wr
);
272 /* IB spec says that num_sge == 0 is OK. */
273 if (wr
->num_sge
> qp
->s_max_sge
)
277 * Don't allow RDMA reads or atomic operations on UC or
278 * undefined operations.
279 * Make sure buffer is large enough to hold the result for atomics.
281 if (qp
->ibqp
.qp_type
== IB_QPT_UC
) {
282 if ((unsigned) wr
->opcode
>= IB_WR_RDMA_READ
)
284 } else if (qp
->ibqp
.qp_type
== IB_QPT_UD
) {
285 /* Check UD opcode */
286 if (wr
->opcode
!= IB_WR_SEND
&&
287 wr
->opcode
!= IB_WR_SEND_WITH_IMM
)
289 /* Check UD destination address PD */
290 if (qp
->ibqp
.pd
!= wr
->wr
.ud
.ah
->pd
)
292 } else if ((unsigned) wr
->opcode
> IB_WR_ATOMIC_FETCH_AND_ADD
)
294 else if (wr
->opcode
>= IB_WR_ATOMIC_CMP_AND_SWP
&&
296 wr
->sg_list
[0].length
< sizeof(u64
) ||
297 wr
->sg_list
[0].addr
& (sizeof(u64
) - 1)))
299 else if (wr
->opcode
>= IB_WR_RDMA_READ
&& !qp
->s_max_rd_atomic
)
302 next
= qp
->s_head
+ 1;
303 if (next
>= qp
->s_size
)
305 if (next
== qp
->s_last
) {
310 wqe
= get_swqe_ptr(qp
, qp
->s_head
);
312 wqe
->ssn
= qp
->s_ssn
++;
315 acc
= wr
->opcode
>= IB_WR_RDMA_READ
?
316 IB_ACCESS_LOCAL_WRITE
: 0;
317 for (i
= 0, j
= 0; i
< wr
->num_sge
; i
++) {
318 u32 length
= wr
->sg_list
[i
].length
;
323 ok
= ipath_lkey_ok(qp
, &wqe
->sg_list
[j
],
324 &wr
->sg_list
[i
], acc
);
327 wqe
->length
+= length
;
332 if (qp
->ibqp
.qp_type
== IB_QPT_UC
||
333 qp
->ibqp
.qp_type
== IB_QPT_RC
) {
334 if (wqe
->length
> 0x80000000U
)
336 } else if (wqe
->length
> to_idev(qp
->ibqp
.device
)->dd
->ipath_ibmtu
)
346 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
351 * ipath_post_send - post a send on a QP
352 * @ibqp: the QP to post the send on
353 * @wr: the list of work requests to post
354 * @bad_wr: the first bad WR is put here
356 * This may be called from interrupt context.
358 static int ipath_post_send(struct ib_qp
*ibqp
, struct ib_send_wr
*wr
,
359 struct ib_send_wr
**bad_wr
)
361 struct ipath_qp
*qp
= to_iqp(ibqp
);
364 for (; wr
; wr
= wr
->next
) {
365 err
= ipath_post_one_send(qp
, wr
);
372 /* Try to do the send work in the caller's context. */
373 ipath_do_send((unsigned long) qp
);
380 * ipath_post_receive - post a receive on a QP
381 * @ibqp: the QP to post the receive on
382 * @wr: the WR to post
383 * @bad_wr: the first bad WR is put here
385 * This may be called from interrupt context.
387 static int ipath_post_receive(struct ib_qp
*ibqp
, struct ib_recv_wr
*wr
,
388 struct ib_recv_wr
**bad_wr
)
390 struct ipath_qp
*qp
= to_iqp(ibqp
);
391 struct ipath_rwq
*wq
= qp
->r_rq
.wq
;
395 /* Check that state is OK to post receive. */
396 if (!(ib_ipath_state_ops
[qp
->state
] & IPATH_POST_RECV_OK
) || !wq
) {
402 for (; wr
; wr
= wr
->next
) {
403 struct ipath_rwqe
*wqe
;
407 if ((unsigned) wr
->num_sge
> qp
->r_rq
.max_sge
) {
413 spin_lock_irqsave(&qp
->r_rq
.lock
, flags
);
415 if (next
>= qp
->r_rq
.size
)
417 if (next
== wq
->tail
) {
418 spin_unlock_irqrestore(&qp
->r_rq
.lock
, flags
);
424 wqe
= get_rwqe_ptr(&qp
->r_rq
, wq
->head
);
425 wqe
->wr_id
= wr
->wr_id
;
426 wqe
->num_sge
= wr
->num_sge
;
427 for (i
= 0; i
< wr
->num_sge
; i
++)
428 wqe
->sg_list
[i
] = wr
->sg_list
[i
];
429 /* Make sure queue entry is written before the head index. */
432 spin_unlock_irqrestore(&qp
->r_rq
.lock
, flags
);
441 * ipath_qp_rcv - processing an incoming packet on a QP
442 * @dev: the device the packet came on
443 * @hdr: the packet header
444 * @has_grh: true if the packet has a GRH
445 * @data: the packet data
446 * @tlen: the packet length
447 * @qp: the QP the packet came on
449 * This is called from ipath_ib_rcv() to process an incoming packet
451 * Called at interrupt level.
453 static void ipath_qp_rcv(struct ipath_ibdev
*dev
,
454 struct ipath_ib_header
*hdr
, int has_grh
,
455 void *data
, u32 tlen
, struct ipath_qp
*qp
)
457 /* Check for valid receive state. */
458 if (!(ib_ipath_state_ops
[qp
->state
] & IPATH_PROCESS_RECV_OK
)) {
463 switch (qp
->ibqp
.qp_type
) {
466 if (ib_ipath_disable_sma
)
470 ipath_ud_rcv(dev
, hdr
, has_grh
, data
, tlen
, qp
);
474 ipath_rc_rcv(dev
, hdr
, has_grh
, data
, tlen
, qp
);
478 ipath_uc_rcv(dev
, hdr
, has_grh
, data
, tlen
, qp
);
487 * ipath_ib_rcv - process an incoming packet
488 * @arg: the device pointer
489 * @rhdr: the header of the packet
490 * @data: the packet data
491 * @tlen: the packet length
493 * This is called from ipath_kreceive() to process an incoming packet at
494 * interrupt level. Tlen is the length of the header + data + CRC in bytes.
496 void ipath_ib_rcv(struct ipath_ibdev
*dev
, void *rhdr
, void *data
,
499 struct ipath_ib_header
*hdr
= rhdr
;
500 struct ipath_other_headers
*ohdr
;
507 if (unlikely(dev
== NULL
))
510 if (unlikely(tlen
< 24)) { /* LRH+BTH+CRC */
515 /* Check for a valid destination LID (see ch. 7.11.1). */
516 lid
= be16_to_cpu(hdr
->lrh
[1]);
517 if (lid
< IPATH_MULTICAST_LID_BASE
) {
518 lid
&= ~((1 << dev
->dd
->ipath_lmc
) - 1);
519 if (unlikely(lid
!= dev
->dd
->ipath_lid
)) {
526 lnh
= be16_to_cpu(hdr
->lrh
[0]) & 3;
527 if (lnh
== IPATH_LRH_BTH
)
529 else if (lnh
== IPATH_LRH_GRH
)
530 ohdr
= &hdr
->u
.l
.oth
;
536 opcode
= be32_to_cpu(ohdr
->bth
[0]) >> 24;
537 dev
->opstats
[opcode
].n_bytes
+= tlen
;
538 dev
->opstats
[opcode
].n_packets
++;
540 /* Get the destination QP number. */
541 qp_num
= be32_to_cpu(ohdr
->bth
[1]) & IPATH_QPN_MASK
;
542 if (qp_num
== IPATH_MULTICAST_QPN
) {
543 struct ipath_mcast
*mcast
;
544 struct ipath_mcast_qp
*p
;
546 if (lnh
!= IPATH_LRH_GRH
) {
550 mcast
= ipath_mcast_find(&hdr
->u
.l
.grh
.dgid
);
555 dev
->n_multicast_rcv
++;
556 list_for_each_entry_rcu(p
, &mcast
->qp_list
, list
)
557 ipath_qp_rcv(dev
, hdr
, 1, data
, tlen
, p
->qp
);
559 * Notify ipath_multicast_detach() if it is waiting for us
562 if (atomic_dec_return(&mcast
->refcount
) <= 1)
563 wake_up(&mcast
->wait
);
565 qp
= ipath_lookup_qpn(&dev
->qp_table
, qp_num
);
567 dev
->n_unicast_rcv
++;
568 ipath_qp_rcv(dev
, hdr
, lnh
== IPATH_LRH_GRH
, data
,
571 * Notify ipath_destroy_qp() if it is waiting
574 if (atomic_dec_and_test(&qp
->refcount
))
584 * ipath_ib_timer - verbs timer
585 * @arg: the device pointer
587 * This is called from ipath_do_rcv_timer() at interrupt level to check for
588 * QPs which need retransmits and to collect performance numbers.
590 static void ipath_ib_timer(struct ipath_ibdev
*dev
)
592 struct ipath_qp
*resend
= NULL
;
593 struct list_head
*last
;
600 spin_lock_irqsave(&dev
->pending_lock
, flags
);
601 /* Start filling the next pending queue. */
602 if (++dev
->pending_index
>= ARRAY_SIZE(dev
->pending
))
603 dev
->pending_index
= 0;
604 /* Save any requests still in the new queue, they have timed out. */
605 last
= &dev
->pending
[dev
->pending_index
];
606 while (!list_empty(last
)) {
607 qp
= list_entry(last
->next
, struct ipath_qp
, timerwait
);
608 list_del_init(&qp
->timerwait
);
609 qp
->timer_next
= resend
;
611 atomic_inc(&qp
->refcount
);
613 last
= &dev
->rnrwait
;
614 if (!list_empty(last
)) {
615 qp
= list_entry(last
->next
, struct ipath_qp
, timerwait
);
616 if (--qp
->s_rnr_timeout
== 0) {
618 list_del_init(&qp
->timerwait
);
619 tasklet_hi_schedule(&qp
->s_task
);
620 if (list_empty(last
))
622 qp
= list_entry(last
->next
, struct ipath_qp
,
624 } while (qp
->s_rnr_timeout
== 0);
628 * We should only be in the started state if pma_sample_start != 0
630 if (dev
->pma_sample_status
== IB_PMA_SAMPLE_STATUS_STARTED
&&
631 --dev
->pma_sample_start
== 0) {
632 dev
->pma_sample_status
= IB_PMA_SAMPLE_STATUS_RUNNING
;
633 ipath_snapshot_counters(dev
->dd
, &dev
->ipath_sword
,
637 &dev
->ipath_xmit_wait
);
639 if (dev
->pma_sample_status
== IB_PMA_SAMPLE_STATUS_RUNNING
) {
640 if (dev
->pma_sample_interval
== 0) {
641 u64 ta
, tb
, tc
, td
, te
;
643 dev
->pma_sample_status
= IB_PMA_SAMPLE_STATUS_DONE
;
644 ipath_snapshot_counters(dev
->dd
, &ta
, &tb
,
647 dev
->ipath_sword
= ta
- dev
->ipath_sword
;
648 dev
->ipath_rword
= tb
- dev
->ipath_rword
;
649 dev
->ipath_spkts
= tc
- dev
->ipath_spkts
;
650 dev
->ipath_rpkts
= td
- dev
->ipath_rpkts
;
651 dev
->ipath_xmit_wait
= te
- dev
->ipath_xmit_wait
;
654 dev
->pma_sample_interval
--;
656 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
658 /* XXX What if timer fires again while this is running? */
659 for (qp
= resend
; qp
!= NULL
; qp
= qp
->timer_next
) {
662 spin_lock_irqsave(&qp
->s_lock
, flags
);
663 if (qp
->s_last
!= qp
->s_tail
&& qp
->state
== IB_QPS_RTS
) {
665 ipath_restart_rc(qp
, qp
->s_last_psn
+ 1, &wc
);
667 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
669 /* Notify ipath_destroy_qp() if it is waiting. */
670 if (atomic_dec_and_test(&qp
->refcount
))
675 static void update_sge(struct ipath_sge_state
*ss
, u32 length
)
677 struct ipath_sge
*sge
= &ss
->sge
;
679 sge
->vaddr
+= length
;
680 sge
->length
-= length
;
681 sge
->sge_length
-= length
;
682 if (sge
->sge_length
== 0) {
684 *sge
= *ss
->sg_list
++;
685 } else if (sge
->length
== 0 && sge
->mr
!= NULL
) {
686 if (++sge
->n
>= IPATH_SEGSZ
) {
687 if (++sge
->m
>= sge
->mr
->mapsz
)
691 sge
->vaddr
= sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
692 sge
->length
= sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
696 #ifdef __LITTLE_ENDIAN
697 static inline u32
get_upper_bits(u32 data
, u32 shift
)
699 return data
>> shift
;
702 static inline u32
set_upper_bits(u32 data
, u32 shift
)
704 return data
<< shift
;
707 static inline u32
clear_upper_bytes(u32 data
, u32 n
, u32 off
)
709 data
<<= ((sizeof(u32
) - n
) * BITS_PER_BYTE
);
710 data
>>= ((sizeof(u32
) - n
- off
) * BITS_PER_BYTE
);
714 static inline u32
get_upper_bits(u32 data
, u32 shift
)
716 return data
<< shift
;
719 static inline u32
set_upper_bits(u32 data
, u32 shift
)
721 return data
>> shift
;
724 static inline u32
clear_upper_bytes(u32 data
, u32 n
, u32 off
)
726 data
>>= ((sizeof(u32
) - n
) * BITS_PER_BYTE
);
727 data
<<= ((sizeof(u32
) - n
- off
) * BITS_PER_BYTE
);
732 static void copy_io(u32 __iomem
*piobuf
, struct ipath_sge_state
*ss
,
733 u32 length
, unsigned flush_wc
)
740 u32 len
= ss
->sge
.length
;
745 if (len
> ss
->sge
.sge_length
)
746 len
= ss
->sge
.sge_length
;
748 /* If the source address is not aligned, try to align it. */
749 off
= (unsigned long)ss
->sge
.vaddr
& (sizeof(u32
) - 1);
751 u32
*addr
= (u32
*)((unsigned long)ss
->sge
.vaddr
&
753 u32 v
= get_upper_bits(*addr
, off
* BITS_PER_BYTE
);
756 y
= sizeof(u32
) - off
;
759 if (len
+ extra
>= sizeof(u32
)) {
760 data
|= set_upper_bits(v
, extra
*
762 len
= sizeof(u32
) - extra
;
767 __raw_writel(data
, piobuf
);
772 /* Clear unused upper bytes */
773 data
|= clear_upper_bytes(v
, len
, extra
);
781 /* Source address is aligned. */
782 u32
*addr
= (u32
*) ss
->sge
.vaddr
;
783 int shift
= extra
* BITS_PER_BYTE
;
784 int ushift
= 32 - shift
;
787 while (l
>= sizeof(u32
)) {
790 data
|= set_upper_bits(v
, shift
);
791 __raw_writel(data
, piobuf
);
792 data
= get_upper_bits(v
, ushift
);
798 * We still have 'extra' number of bytes leftover.
803 if (l
+ extra
>= sizeof(u32
)) {
804 data
|= set_upper_bits(v
, shift
);
805 len
-= l
+ extra
- sizeof(u32
);
810 __raw_writel(data
, piobuf
);
815 /* Clear unused upper bytes */
816 data
|= clear_upper_bytes(v
, l
,
824 } else if (len
== length
) {
828 } else if (len
== length
) {
832 * Need to round up for the last dword in the
836 __iowrite32_copy(piobuf
, ss
->sge
.vaddr
, w
- 1);
838 last
= ((u32
*) ss
->sge
.vaddr
)[w
- 1];
843 __iowrite32_copy(piobuf
, ss
->sge
.vaddr
, w
);
846 extra
= len
& (sizeof(u32
) - 1);
848 u32 v
= ((u32
*) ss
->sge
.vaddr
)[w
];
850 /* Clear unused upper bytes */
851 data
= clear_upper_bytes(v
, extra
, 0);
857 /* Update address before sending packet. */
858 update_sge(ss
, length
);
860 /* must flush early everything before trigger word */
862 __raw_writel(last
, piobuf
);
863 /* be sure trigger word is written */
866 __raw_writel(last
, piobuf
);
869 static int ipath_verbs_send_pio(struct ipath_qp
*qp
, u32
*hdr
, u32 hdrwords
,
870 struct ipath_sge_state
*ss
, u32 len
,
871 u32 plen
, u32 dwords
)
873 struct ipath_devdata
*dd
= to_idev(qp
->ibqp
.device
)->dd
;
878 piobuf
= ipath_getpiobuf(dd
, NULL
);
879 if (unlikely(piobuf
== NULL
)) {
885 * Write len to control qword, no flags.
886 * We have to flush after the PBC for correctness on some cpus
887 * or WC buffer can be written out of order.
889 writeq(plen
, piobuf
);
892 flush_wc
= dd
->ipath_flags
& IPATH_PIO_FLUSH_WC
;
895 * If there is just the header portion, must flush before
896 * writing last word of header for correctness, and after
897 * the last header word (trigger word).
901 __iowrite32_copy(piobuf
, hdr
, hdrwords
- 1);
903 __raw_writel(hdr
[hdrwords
- 1], piobuf
+ hdrwords
- 1);
906 __iowrite32_copy(piobuf
, hdr
, hdrwords
);
912 __iowrite32_copy(piobuf
, hdr
, hdrwords
);
915 /* The common case is aligned and contained in one segment. */
916 if (likely(ss
->num_sge
== 1 && len
<= ss
->sge
.length
&&
917 !((unsigned long)ss
->sge
.vaddr
& (sizeof(u32
) - 1)))) {
918 u32
*addr
= (u32
*) ss
->sge
.vaddr
;
920 /* Update address before sending packet. */
923 __iowrite32_copy(piobuf
, addr
, dwords
- 1);
924 /* must flush early everything before trigger word */
926 __raw_writel(addr
[dwords
- 1], piobuf
+ dwords
- 1);
927 /* be sure trigger word is written */
930 __iowrite32_copy(piobuf
, addr
, dwords
);
933 copy_io(piobuf
, ss
, len
, flush_wc
);
936 ipath_send_complete(qp
, qp
->s_wqe
, IB_WC_SUCCESS
);
943 * ipath_verbs_send - send a packet
944 * @qp: the QP to send on
945 * @hdr: the packet header
946 * @hdrwords: the number of 32-bit words in the header
947 * @ss: the SGE to send
948 * @len: the length of the packet in bytes
950 int ipath_verbs_send(struct ipath_qp
*qp
, struct ipath_ib_header
*hdr
,
951 u32 hdrwords
, struct ipath_sge_state
*ss
, u32 len
)
953 struct ipath_devdata
*dd
= to_idev(qp
->ibqp
.device
)->dd
;
956 u32 dwords
= (len
+ 3) >> 2;
959 * Calculate the send buffer trigger address.
960 * The +1 counts for the pbc control dword following the pbc length.
962 plen
= hdrwords
+ dwords
+ 1;
964 /* Drop non-VL15 packets if we are not in the active state */
965 if (!(dd
->ipath_flags
& IPATH_LINKACTIVE
) &&
966 qp
->ibqp
.qp_type
!= IB_QPT_SMI
) {
968 ipath_send_complete(qp
, qp
->s_wqe
, IB_WC_SUCCESS
);
971 ret
= ipath_verbs_send_pio(qp
, (u32
*) hdr
, hdrwords
,
972 ss
, len
, plen
, dwords
);
977 int ipath_snapshot_counters(struct ipath_devdata
*dd
, u64
*swords
,
978 u64
*rwords
, u64
*spkts
, u64
*rpkts
,
983 if (!(dd
->ipath_flags
& IPATH_INITTED
)) {
984 /* no hardware, freeze, etc. */
988 *swords
= ipath_snap_cntr(dd
, dd
->ipath_cregs
->cr_wordsendcnt
);
989 *rwords
= ipath_snap_cntr(dd
, dd
->ipath_cregs
->cr_wordrcvcnt
);
990 *spkts
= ipath_snap_cntr(dd
, dd
->ipath_cregs
->cr_pktsendcnt
);
991 *rpkts
= ipath_snap_cntr(dd
, dd
->ipath_cregs
->cr_pktrcvcnt
);
992 *xmit_wait
= ipath_snap_cntr(dd
, dd
->ipath_cregs
->cr_sendstallcnt
);
1001 * ipath_get_counters - get various chip counters
1002 * @dd: the infinipath device
1003 * @cntrs: counters are placed here
1005 * Return the counters needed by recv_pma_get_portcounters().
1007 int ipath_get_counters(struct ipath_devdata
*dd
,
1008 struct ipath_verbs_counters
*cntrs
)
1010 struct ipath_cregs
const *crp
= dd
->ipath_cregs
;
1013 if (!(dd
->ipath_flags
& IPATH_INITTED
)) {
1014 /* no hardware, freeze, etc. */
1018 cntrs
->symbol_error_counter
=
1019 ipath_snap_cntr(dd
, crp
->cr_ibsymbolerrcnt
);
1020 cntrs
->link_error_recovery_counter
=
1021 ipath_snap_cntr(dd
, crp
->cr_iblinkerrrecovcnt
);
1023 * The link downed counter counts when the other side downs the
1024 * connection. We add in the number of times we downed the link
1025 * due to local link integrity errors to compensate.
1027 cntrs
->link_downed_counter
=
1028 ipath_snap_cntr(dd
, crp
->cr_iblinkdowncnt
);
1029 cntrs
->port_rcv_errors
=
1030 ipath_snap_cntr(dd
, crp
->cr_rxdroppktcnt
) +
1031 ipath_snap_cntr(dd
, crp
->cr_rcvovflcnt
) +
1032 ipath_snap_cntr(dd
, crp
->cr_portovflcnt
) +
1033 ipath_snap_cntr(dd
, crp
->cr_err_rlencnt
) +
1034 ipath_snap_cntr(dd
, crp
->cr_invalidrlencnt
) +
1035 ipath_snap_cntr(dd
, crp
->cr_errlinkcnt
) +
1036 ipath_snap_cntr(dd
, crp
->cr_erricrccnt
) +
1037 ipath_snap_cntr(dd
, crp
->cr_errvcrccnt
) +
1038 ipath_snap_cntr(dd
, crp
->cr_errlpcrccnt
) +
1039 ipath_snap_cntr(dd
, crp
->cr_badformatcnt
) +
1040 dd
->ipath_rxfc_unsupvl_errs
;
1041 cntrs
->port_rcv_remphys_errors
=
1042 ipath_snap_cntr(dd
, crp
->cr_rcvebpcnt
);
1043 cntrs
->port_xmit_discards
= ipath_snap_cntr(dd
, crp
->cr_unsupvlcnt
);
1044 cntrs
->port_xmit_data
= ipath_snap_cntr(dd
, crp
->cr_wordsendcnt
);
1045 cntrs
->port_rcv_data
= ipath_snap_cntr(dd
, crp
->cr_wordrcvcnt
);
1046 cntrs
->port_xmit_packets
= ipath_snap_cntr(dd
, crp
->cr_pktsendcnt
);
1047 cntrs
->port_rcv_packets
= ipath_snap_cntr(dd
, crp
->cr_pktrcvcnt
);
1048 cntrs
->local_link_integrity_errors
=
1049 (dd
->ipath_flags
& IPATH_GPIO_ERRINTRS
) ?
1050 dd
->ipath_lli_errs
: dd
->ipath_lli_errors
;
1051 cntrs
->excessive_buffer_overrun_errors
= dd
->ipath_overrun_thresh_errs
;
1060 * ipath_ib_piobufavail - callback when a PIO buffer is available
1061 * @arg: the device pointer
1063 * This is called from ipath_intr() at interrupt level when a PIO buffer is
1064 * available after ipath_verbs_send() returned an error that no buffers were
1065 * available. Return 1 if we consumed all the PIO buffers and we still have
1066 * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and
1069 int ipath_ib_piobufavail(struct ipath_ibdev
*dev
)
1071 struct ipath_qp
*qp
;
1072 unsigned long flags
;
1077 spin_lock_irqsave(&dev
->pending_lock
, flags
);
1078 while (!list_empty(&dev
->piowait
)) {
1079 qp
= list_entry(dev
->piowait
.next
, struct ipath_qp
,
1081 list_del_init(&qp
->piowait
);
1082 clear_bit(IPATH_S_BUSY
, &qp
->s_busy
);
1083 tasklet_hi_schedule(&qp
->s_task
);
1085 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
1091 static int ipath_query_device(struct ib_device
*ibdev
,
1092 struct ib_device_attr
*props
)
1094 struct ipath_ibdev
*dev
= to_idev(ibdev
);
1096 memset(props
, 0, sizeof(*props
));
1098 props
->device_cap_flags
= IB_DEVICE_BAD_PKEY_CNTR
|
1099 IB_DEVICE_BAD_QKEY_CNTR
| IB_DEVICE_SHUTDOWN_PORT
|
1100 IB_DEVICE_SYS_IMAGE_GUID
;
1101 props
->page_size_cap
= PAGE_SIZE
;
1102 props
->vendor_id
= dev
->dd
->ipath_vendorid
;
1103 props
->vendor_part_id
= dev
->dd
->ipath_deviceid
;
1104 props
->hw_ver
= dev
->dd
->ipath_pcirev
;
1106 props
->sys_image_guid
= dev
->sys_image_guid
;
1108 props
->max_mr_size
= ~0ull;
1109 props
->max_qp
= ib_ipath_max_qps
;
1110 props
->max_qp_wr
= ib_ipath_max_qp_wrs
;
1111 props
->max_sge
= ib_ipath_max_sges
;
1112 props
->max_cq
= ib_ipath_max_cqs
;
1113 props
->max_ah
= ib_ipath_max_ahs
;
1114 props
->max_cqe
= ib_ipath_max_cqes
;
1115 props
->max_mr
= dev
->lk_table
.max
;
1116 props
->max_fmr
= dev
->lk_table
.max
;
1117 props
->max_map_per_fmr
= 32767;
1118 props
->max_pd
= ib_ipath_max_pds
;
1119 props
->max_qp_rd_atom
= IPATH_MAX_RDMA_ATOMIC
;
1120 props
->max_qp_init_rd_atom
= 255;
1121 /* props->max_res_rd_atom */
1122 props
->max_srq
= ib_ipath_max_srqs
;
1123 props
->max_srq_wr
= ib_ipath_max_srq_wrs
;
1124 props
->max_srq_sge
= ib_ipath_max_srq_sges
;
1125 /* props->local_ca_ack_delay */
1126 props
->atomic_cap
= IB_ATOMIC_GLOB
;
1127 props
->max_pkeys
= ipath_get_npkeys(dev
->dd
);
1128 props
->max_mcast_grp
= ib_ipath_max_mcast_grps
;
1129 props
->max_mcast_qp_attach
= ib_ipath_max_mcast_qp_attached
;
1130 props
->max_total_mcast_qp_attach
= props
->max_mcast_qp_attach
*
1131 props
->max_mcast_grp
;
1136 const u8 ipath_cvt_physportstate
[32] = {
1137 [INFINIPATH_IBCS_LT_STATE_DISABLED
] = IB_PHYSPORTSTATE_DISABLED
,
1138 [INFINIPATH_IBCS_LT_STATE_LINKUP
] = IB_PHYSPORTSTATE_LINKUP
,
1139 [INFINIPATH_IBCS_LT_STATE_POLLACTIVE
] = IB_PHYSPORTSTATE_POLL
,
1140 [INFINIPATH_IBCS_LT_STATE_POLLQUIET
] = IB_PHYSPORTSTATE_POLL
,
1141 [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY
] = IB_PHYSPORTSTATE_SLEEP
,
1142 [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET
] = IB_PHYSPORTSTATE_SLEEP
,
1143 [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE
] =
1144 IB_PHYSPORTSTATE_CFG_TRAIN
,
1145 [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG
] =
1146 IB_PHYSPORTSTATE_CFG_TRAIN
,
1147 [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT
] =
1148 IB_PHYSPORTSTATE_CFG_TRAIN
,
1149 [INFINIPATH_IBCS_LT_STATE_CFGIDLE
] = IB_PHYSPORTSTATE_CFG_TRAIN
,
1150 [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN
] =
1151 IB_PHYSPORTSTATE_LINK_ERR_RECOVER
,
1152 [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT
] =
1153 IB_PHYSPORTSTATE_LINK_ERR_RECOVER
,
1154 [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE
] =
1155 IB_PHYSPORTSTATE_LINK_ERR_RECOVER
,
1156 [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN
,
1157 [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN
,
1158 [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN
,
1159 [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN
,
1160 [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN
,
1161 [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN
,
1162 [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN
,
1163 [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
1166 u32
ipath_get_cr_errpkey(struct ipath_devdata
*dd
)
1168 return ipath_read_creg32(dd
, dd
->ipath_cregs
->cr_errpkey
);
1171 static int ipath_query_port(struct ib_device
*ibdev
,
1172 u8 port
, struct ib_port_attr
*props
)
1174 struct ipath_ibdev
*dev
= to_idev(ibdev
);
1175 struct ipath_devdata
*dd
= dev
->dd
;
1177 u16 lid
= dd
->ipath_lid
;
1180 memset(props
, 0, sizeof(*props
));
1181 props
->lid
= lid
? lid
: __constant_be16_to_cpu(IB_LID_PERMISSIVE
);
1182 props
->lmc
= dd
->ipath_lmc
;
1183 props
->sm_lid
= dev
->sm_lid
;
1184 props
->sm_sl
= dev
->sm_sl
;
1185 ibcstat
= dd
->ipath_lastibcstat
;
1186 props
->state
= ((ibcstat
>> 4) & 0x3) + 1;
1187 /* See phys_state_show() */
1188 props
->phys_state
= /* MEA: assumes shift == 0 */
1189 ipath_cvt_physportstate
[dd
->ipath_lastibcstat
&
1191 props
->port_cap_flags
= dev
->port_cap_flags
;
1192 props
->gid_tbl_len
= 1;
1193 props
->max_msg_sz
= 0x80000000;
1194 props
->pkey_tbl_len
= ipath_get_npkeys(dd
);
1195 props
->bad_pkey_cntr
= ipath_get_cr_errpkey(dd
) -
1196 dev
->z_pkey_violations
;
1197 props
->qkey_viol_cntr
= dev
->qkey_violations
;
1198 props
->active_width
= IB_WIDTH_4X
;
1199 /* See rate_show() */
1200 props
->active_speed
= 1; /* Regular 10Mbs speed. */
1201 props
->max_vl_num
= 1; /* VLCap = VL0 */
1202 props
->init_type_reply
= 0;
1205 * Note: the chip supports a maximum MTU of 4096, but the driver
1206 * hasn't implemented this feature yet, so set the maximum value
1209 props
->max_mtu
= IB_MTU_2048
;
1210 switch (dd
->ipath_ibmtu
) {
1229 props
->active_mtu
= mtu
;
1230 props
->subnet_timeout
= dev
->subnet_timeout
;
1235 static int ipath_modify_device(struct ib_device
*device
,
1236 int device_modify_mask
,
1237 struct ib_device_modify
*device_modify
)
1241 if (device_modify_mask
& ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID
|
1242 IB_DEVICE_MODIFY_NODE_DESC
)) {
1247 if (device_modify_mask
& IB_DEVICE_MODIFY_NODE_DESC
)
1248 memcpy(device
->node_desc
, device_modify
->node_desc
, 64);
1250 if (device_modify_mask
& IB_DEVICE_MODIFY_SYS_IMAGE_GUID
)
1251 to_idev(device
)->sys_image_guid
=
1252 cpu_to_be64(device_modify
->sys_image_guid
);
1260 static int ipath_modify_port(struct ib_device
*ibdev
,
1261 u8 port
, int port_modify_mask
,
1262 struct ib_port_modify
*props
)
1264 struct ipath_ibdev
*dev
= to_idev(ibdev
);
1266 dev
->port_cap_flags
|= props
->set_port_cap_mask
;
1267 dev
->port_cap_flags
&= ~props
->clr_port_cap_mask
;
1268 if (port_modify_mask
& IB_PORT_SHUTDOWN
)
1269 ipath_set_linkstate(dev
->dd
, IPATH_IB_LINKDOWN
);
1270 if (port_modify_mask
& IB_PORT_RESET_QKEY_CNTR
)
1271 dev
->qkey_violations
= 0;
1275 static int ipath_query_gid(struct ib_device
*ibdev
, u8 port
,
1276 int index
, union ib_gid
*gid
)
1278 struct ipath_ibdev
*dev
= to_idev(ibdev
);
1285 gid
->global
.subnet_prefix
= dev
->gid_prefix
;
1286 gid
->global
.interface_id
= dev
->dd
->ipath_guid
;
1294 static struct ib_pd
*ipath_alloc_pd(struct ib_device
*ibdev
,
1295 struct ib_ucontext
*context
,
1296 struct ib_udata
*udata
)
1298 struct ipath_ibdev
*dev
= to_idev(ibdev
);
1299 struct ipath_pd
*pd
;
1303 * This is actually totally arbitrary. Some correctness tests
1304 * assume there's a maximum number of PDs that can be allocated.
1305 * We don't actually have this limit, but we fail the test if
1306 * we allow allocations of more than we report for this value.
1309 pd
= kmalloc(sizeof *pd
, GFP_KERNEL
);
1311 ret
= ERR_PTR(-ENOMEM
);
1315 spin_lock(&dev
->n_pds_lock
);
1316 if (dev
->n_pds_allocated
== ib_ipath_max_pds
) {
1317 spin_unlock(&dev
->n_pds_lock
);
1319 ret
= ERR_PTR(-ENOMEM
);
1323 dev
->n_pds_allocated
++;
1324 spin_unlock(&dev
->n_pds_lock
);
1326 /* ib_alloc_pd() will initialize pd->ibpd. */
1327 pd
->user
= udata
!= NULL
;
1335 static int ipath_dealloc_pd(struct ib_pd
*ibpd
)
1337 struct ipath_pd
*pd
= to_ipd(ibpd
);
1338 struct ipath_ibdev
*dev
= to_idev(ibpd
->device
);
1340 spin_lock(&dev
->n_pds_lock
);
1341 dev
->n_pds_allocated
--;
1342 spin_unlock(&dev
->n_pds_lock
);
1350 * ipath_create_ah - create an address handle
1351 * @pd: the protection domain
1352 * @ah_attr: the attributes of the AH
1354 * This may be called from interrupt context.
1356 static struct ib_ah
*ipath_create_ah(struct ib_pd
*pd
,
1357 struct ib_ah_attr
*ah_attr
)
1359 struct ipath_ah
*ah
;
1361 struct ipath_ibdev
*dev
= to_idev(pd
->device
);
1362 unsigned long flags
;
1364 /* A multicast address requires a GRH (see ch. 8.4.1). */
1365 if (ah_attr
->dlid
>= IPATH_MULTICAST_LID_BASE
&&
1366 ah_attr
->dlid
!= IPATH_PERMISSIVE_LID
&&
1367 !(ah_attr
->ah_flags
& IB_AH_GRH
)) {
1368 ret
= ERR_PTR(-EINVAL
);
1372 if (ah_attr
->dlid
== 0) {
1373 ret
= ERR_PTR(-EINVAL
);
1377 if (ah_attr
->port_num
< 1 ||
1378 ah_attr
->port_num
> pd
->device
->phys_port_cnt
) {
1379 ret
= ERR_PTR(-EINVAL
);
1383 ah
= kmalloc(sizeof *ah
, GFP_ATOMIC
);
1385 ret
= ERR_PTR(-ENOMEM
);
1389 spin_lock_irqsave(&dev
->n_ahs_lock
, flags
);
1390 if (dev
->n_ahs_allocated
== ib_ipath_max_ahs
) {
1391 spin_unlock_irqrestore(&dev
->n_ahs_lock
, flags
);
1393 ret
= ERR_PTR(-ENOMEM
);
1397 dev
->n_ahs_allocated
++;
1398 spin_unlock_irqrestore(&dev
->n_ahs_lock
, flags
);
1400 /* ib_create_ah() will initialize ah->ibah. */
1401 ah
->attr
= *ah_attr
;
1410 * ipath_destroy_ah - destroy an address handle
1411 * @ibah: the AH to destroy
1413 * This may be called from interrupt context.
1415 static int ipath_destroy_ah(struct ib_ah
*ibah
)
1417 struct ipath_ibdev
*dev
= to_idev(ibah
->device
);
1418 struct ipath_ah
*ah
= to_iah(ibah
);
1419 unsigned long flags
;
1421 spin_lock_irqsave(&dev
->n_ahs_lock
, flags
);
1422 dev
->n_ahs_allocated
--;
1423 spin_unlock_irqrestore(&dev
->n_ahs_lock
, flags
);
1430 static int ipath_query_ah(struct ib_ah
*ibah
, struct ib_ah_attr
*ah_attr
)
1432 struct ipath_ah
*ah
= to_iah(ibah
);
1434 *ah_attr
= ah
->attr
;
1440 * ipath_get_npkeys - return the size of the PKEY table for port 0
1441 * @dd: the infinipath device
1443 unsigned ipath_get_npkeys(struct ipath_devdata
*dd
)
1445 return ARRAY_SIZE(dd
->ipath_pd
[0]->port_pkeys
);
1449 * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table
1450 * @dd: the infinipath device
1451 * @index: the PKEY index
1453 unsigned ipath_get_pkey(struct ipath_devdata
*dd
, unsigned index
)
1457 if (index
>= ARRAY_SIZE(dd
->ipath_pd
[0]->port_pkeys
))
1460 ret
= dd
->ipath_pd
[0]->port_pkeys
[index
];
1465 static int ipath_query_pkey(struct ib_device
*ibdev
, u8 port
, u16 index
,
1468 struct ipath_ibdev
*dev
= to_idev(ibdev
);
1471 if (index
>= ipath_get_npkeys(dev
->dd
)) {
1476 *pkey
= ipath_get_pkey(dev
->dd
, index
);
1484 * ipath_alloc_ucontext - allocate a ucontest
1485 * @ibdev: the infiniband device
1486 * @udata: not used by the InfiniPath driver
1489 static struct ib_ucontext
*ipath_alloc_ucontext(struct ib_device
*ibdev
,
1490 struct ib_udata
*udata
)
1492 struct ipath_ucontext
*context
;
1493 struct ib_ucontext
*ret
;
1495 context
= kmalloc(sizeof *context
, GFP_KERNEL
);
1497 ret
= ERR_PTR(-ENOMEM
);
1501 ret
= &context
->ibucontext
;
1507 static int ipath_dealloc_ucontext(struct ib_ucontext
*context
)
1509 kfree(to_iucontext(context
));
1513 static int ipath_verbs_register_sysfs(struct ib_device
*dev
);
1515 static void __verbs_timer(unsigned long arg
)
1517 struct ipath_devdata
*dd
= (struct ipath_devdata
*) arg
;
1519 /* Handle verbs layer timeouts. */
1520 ipath_ib_timer(dd
->verbs_dev
);
1522 mod_timer(&dd
->verbs_timer
, jiffies
+ 1);
1525 static int enable_timer(struct ipath_devdata
*dd
)
1528 * Early chips had a design flaw where the chip and kernel idea
1529 * of the tail register don't always agree, and therefore we won't
1530 * get an interrupt on the next packet received.
1531 * If the board supports per packet receive interrupts, use it.
1532 * Otherwise, the timer function periodically checks for packets
1533 * to cover this case.
1534 * Either way, the timer is needed for verbs layer related
1537 if (dd
->ipath_flags
& IPATH_GPIO_INTR
) {
1538 ipath_write_kreg(dd
, dd
->ipath_kregs
->kr_debugportselect
,
1539 0x2074076542310ULL
);
1540 /* Enable GPIO bit 2 interrupt */
1541 dd
->ipath_gpio_mask
|= (u64
) (1 << IPATH_GPIO_PORT0_BIT
);
1542 ipath_write_kreg(dd
, dd
->ipath_kregs
->kr_gpio_mask
,
1543 dd
->ipath_gpio_mask
);
1546 init_timer(&dd
->verbs_timer
);
1547 dd
->verbs_timer
.function
= __verbs_timer
;
1548 dd
->verbs_timer
.data
= (unsigned long)dd
;
1549 dd
->verbs_timer
.expires
= jiffies
+ 1;
1550 add_timer(&dd
->verbs_timer
);
1555 static int disable_timer(struct ipath_devdata
*dd
)
1557 /* Disable GPIO bit 2 interrupt */
1558 if (dd
->ipath_flags
& IPATH_GPIO_INTR
) {
1559 /* Disable GPIO bit 2 interrupt */
1560 dd
->ipath_gpio_mask
&= ~((u64
) (1 << IPATH_GPIO_PORT0_BIT
));
1561 ipath_write_kreg(dd
, dd
->ipath_kregs
->kr_gpio_mask
,
1562 dd
->ipath_gpio_mask
);
1564 * We might want to undo changes to debugportselect,
1569 del_timer_sync(&dd
->verbs_timer
);
1575 * ipath_register_ib_device - register our device with the infiniband core
1576 * @dd: the device data structure
1577 * Return the allocated ipath_ibdev pointer or NULL on error.
1579 int ipath_register_ib_device(struct ipath_devdata
*dd
)
1581 struct ipath_verbs_counters cntrs
;
1582 struct ipath_ibdev
*idev
;
1583 struct ib_device
*dev
;
1586 idev
= (struct ipath_ibdev
*)ib_alloc_device(sizeof *idev
);
1594 /* Only need to initialize non-zero fields. */
1595 spin_lock_init(&idev
->n_pds_lock
);
1596 spin_lock_init(&idev
->n_ahs_lock
);
1597 spin_lock_init(&idev
->n_cqs_lock
);
1598 spin_lock_init(&idev
->n_qps_lock
);
1599 spin_lock_init(&idev
->n_srqs_lock
);
1600 spin_lock_init(&idev
->n_mcast_grps_lock
);
1602 spin_lock_init(&idev
->qp_table
.lock
);
1603 spin_lock_init(&idev
->lk_table
.lock
);
1604 idev
->sm_lid
= __constant_be16_to_cpu(IB_LID_PERMISSIVE
);
1605 /* Set the prefix to the default value (see ch. 4.1.1) */
1606 idev
->gid_prefix
= __constant_cpu_to_be64(0xfe80000000000000ULL
);
1608 ret
= ipath_init_qp_table(idev
, ib_ipath_qp_table_size
);
1613 * The top ib_ipath_lkey_table_size bits are used to index the
1614 * table. The lower 8 bits can be owned by the user (copied from
1615 * the LKEY). The remaining bits act as a generation number or tag.
1617 idev
->lk_table
.max
= 1 << ib_ipath_lkey_table_size
;
1618 idev
->lk_table
.table
= kzalloc(idev
->lk_table
.max
*
1619 sizeof(*idev
->lk_table
.table
),
1621 if (idev
->lk_table
.table
== NULL
) {
1625 INIT_LIST_HEAD(&idev
->pending_mmaps
);
1626 spin_lock_init(&idev
->pending_lock
);
1627 idev
->mmap_offset
= PAGE_SIZE
;
1628 spin_lock_init(&idev
->mmap_offset_lock
);
1629 INIT_LIST_HEAD(&idev
->pending
[0]);
1630 INIT_LIST_HEAD(&idev
->pending
[1]);
1631 INIT_LIST_HEAD(&idev
->pending
[2]);
1632 INIT_LIST_HEAD(&idev
->piowait
);
1633 INIT_LIST_HEAD(&idev
->rnrwait
);
1634 idev
->pending_index
= 0;
1635 idev
->port_cap_flags
=
1636 IB_PORT_SYS_IMAGE_GUID_SUP
| IB_PORT_CLIENT_REG_SUP
;
1637 idev
->pma_counter_select
[0] = IB_PMA_PORT_XMIT_DATA
;
1638 idev
->pma_counter_select
[1] = IB_PMA_PORT_RCV_DATA
;
1639 idev
->pma_counter_select
[2] = IB_PMA_PORT_XMIT_PKTS
;
1640 idev
->pma_counter_select
[3] = IB_PMA_PORT_RCV_PKTS
;
1641 idev
->pma_counter_select
[4] = IB_PMA_PORT_XMIT_WAIT
;
1642 idev
->link_width_enabled
= 3; /* 1x or 4x */
1644 /* Snapshot current HW counters to "clear" them. */
1645 ipath_get_counters(dd
, &cntrs
);
1646 idev
->z_symbol_error_counter
= cntrs
.symbol_error_counter
;
1647 idev
->z_link_error_recovery_counter
=
1648 cntrs
.link_error_recovery_counter
;
1649 idev
->z_link_downed_counter
= cntrs
.link_downed_counter
;
1650 idev
->z_port_rcv_errors
= cntrs
.port_rcv_errors
;
1651 idev
->z_port_rcv_remphys_errors
=
1652 cntrs
.port_rcv_remphys_errors
;
1653 idev
->z_port_xmit_discards
= cntrs
.port_xmit_discards
;
1654 idev
->z_port_xmit_data
= cntrs
.port_xmit_data
;
1655 idev
->z_port_rcv_data
= cntrs
.port_rcv_data
;
1656 idev
->z_port_xmit_packets
= cntrs
.port_xmit_packets
;
1657 idev
->z_port_rcv_packets
= cntrs
.port_rcv_packets
;
1658 idev
->z_local_link_integrity_errors
=
1659 cntrs
.local_link_integrity_errors
;
1660 idev
->z_excessive_buffer_overrun_errors
=
1661 cntrs
.excessive_buffer_overrun_errors
;
1662 idev
->z_vl15_dropped
= cntrs
.vl15_dropped
;
1665 * The system image GUID is supposed to be the same for all
1666 * IB HCAs in a single system but since there can be other
1667 * device types in the system, we can't be sure this is unique.
1669 if (!sys_image_guid
)
1670 sys_image_guid
= dd
->ipath_guid
;
1671 idev
->sys_image_guid
= sys_image_guid
;
1672 idev
->ib_unit
= dd
->ipath_unit
;
1675 strlcpy(dev
->name
, "ipath%d", IB_DEVICE_NAME_MAX
);
1676 dev
->owner
= THIS_MODULE
;
1677 dev
->node_guid
= dd
->ipath_guid
;
1678 dev
->uverbs_abi_ver
= IPATH_UVERBS_ABI_VERSION
;
1679 dev
->uverbs_cmd_mask
=
1680 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT
) |
1681 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE
) |
1682 (1ull << IB_USER_VERBS_CMD_QUERY_PORT
) |
1683 (1ull << IB_USER_VERBS_CMD_ALLOC_PD
) |
1684 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD
) |
1685 (1ull << IB_USER_VERBS_CMD_CREATE_AH
) |
1686 (1ull << IB_USER_VERBS_CMD_DESTROY_AH
) |
1687 (1ull << IB_USER_VERBS_CMD_QUERY_AH
) |
1688 (1ull << IB_USER_VERBS_CMD_REG_MR
) |
1689 (1ull << IB_USER_VERBS_CMD_DEREG_MR
) |
1690 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL
) |
1691 (1ull << IB_USER_VERBS_CMD_CREATE_CQ
) |
1692 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ
) |
1693 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ
) |
1694 (1ull << IB_USER_VERBS_CMD_POLL_CQ
) |
1695 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ
) |
1696 (1ull << IB_USER_VERBS_CMD_CREATE_QP
) |
1697 (1ull << IB_USER_VERBS_CMD_QUERY_QP
) |
1698 (1ull << IB_USER_VERBS_CMD_MODIFY_QP
) |
1699 (1ull << IB_USER_VERBS_CMD_DESTROY_QP
) |
1700 (1ull << IB_USER_VERBS_CMD_POST_SEND
) |
1701 (1ull << IB_USER_VERBS_CMD_POST_RECV
) |
1702 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST
) |
1703 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST
) |
1704 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ
) |
1705 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ
) |
1706 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ
) |
1707 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ
) |
1708 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV
);
1709 dev
->node_type
= RDMA_NODE_IB_CA
;
1710 dev
->phys_port_cnt
= 1;
1711 dev
->num_comp_vectors
= 1;
1712 dev
->dma_device
= &dd
->pcidev
->dev
;
1713 dev
->query_device
= ipath_query_device
;
1714 dev
->modify_device
= ipath_modify_device
;
1715 dev
->query_port
= ipath_query_port
;
1716 dev
->modify_port
= ipath_modify_port
;
1717 dev
->query_pkey
= ipath_query_pkey
;
1718 dev
->query_gid
= ipath_query_gid
;
1719 dev
->alloc_ucontext
= ipath_alloc_ucontext
;
1720 dev
->dealloc_ucontext
= ipath_dealloc_ucontext
;
1721 dev
->alloc_pd
= ipath_alloc_pd
;
1722 dev
->dealloc_pd
= ipath_dealloc_pd
;
1723 dev
->create_ah
= ipath_create_ah
;
1724 dev
->destroy_ah
= ipath_destroy_ah
;
1725 dev
->query_ah
= ipath_query_ah
;
1726 dev
->create_srq
= ipath_create_srq
;
1727 dev
->modify_srq
= ipath_modify_srq
;
1728 dev
->query_srq
= ipath_query_srq
;
1729 dev
->destroy_srq
= ipath_destroy_srq
;
1730 dev
->create_qp
= ipath_create_qp
;
1731 dev
->modify_qp
= ipath_modify_qp
;
1732 dev
->query_qp
= ipath_query_qp
;
1733 dev
->destroy_qp
= ipath_destroy_qp
;
1734 dev
->post_send
= ipath_post_send
;
1735 dev
->post_recv
= ipath_post_receive
;
1736 dev
->post_srq_recv
= ipath_post_srq_receive
;
1737 dev
->create_cq
= ipath_create_cq
;
1738 dev
->destroy_cq
= ipath_destroy_cq
;
1739 dev
->resize_cq
= ipath_resize_cq
;
1740 dev
->poll_cq
= ipath_poll_cq
;
1741 dev
->req_notify_cq
= ipath_req_notify_cq
;
1742 dev
->get_dma_mr
= ipath_get_dma_mr
;
1743 dev
->reg_phys_mr
= ipath_reg_phys_mr
;
1744 dev
->reg_user_mr
= ipath_reg_user_mr
;
1745 dev
->dereg_mr
= ipath_dereg_mr
;
1746 dev
->alloc_fmr
= ipath_alloc_fmr
;
1747 dev
->map_phys_fmr
= ipath_map_phys_fmr
;
1748 dev
->unmap_fmr
= ipath_unmap_fmr
;
1749 dev
->dealloc_fmr
= ipath_dealloc_fmr
;
1750 dev
->attach_mcast
= ipath_multicast_attach
;
1751 dev
->detach_mcast
= ipath_multicast_detach
;
1752 dev
->process_mad
= ipath_process_mad
;
1753 dev
->mmap
= ipath_mmap
;
1754 dev
->dma_ops
= &ipath_dma_mapping_ops
;
1756 snprintf(dev
->node_desc
, sizeof(dev
->node_desc
),
1757 IPATH_IDSTR
" %s", init_utsname()->nodename
);
1759 ret
= ib_register_device(dev
);
1763 if (ipath_verbs_register_sysfs(dev
))
1771 ib_unregister_device(dev
);
1773 kfree(idev
->lk_table
.table
);
1775 kfree(idev
->qp_table
.table
);
1777 ib_dealloc_device(dev
);
1778 ipath_dev_err(dd
, "cannot register verbs: %d!\n", -ret
);
1782 dd
->verbs_dev
= idev
;
1786 void ipath_unregister_ib_device(struct ipath_ibdev
*dev
)
1788 struct ib_device
*ibdev
= &dev
->ibdev
;
1790 disable_timer(dev
->dd
);
1792 ib_unregister_device(ibdev
);
1794 if (!list_empty(&dev
->pending
[0]) ||
1795 !list_empty(&dev
->pending
[1]) ||
1796 !list_empty(&dev
->pending
[2]))
1797 ipath_dev_err(dev
->dd
, "pending list not empty!\n");
1798 if (!list_empty(&dev
->piowait
))
1799 ipath_dev_err(dev
->dd
, "piowait list not empty!\n");
1800 if (!list_empty(&dev
->rnrwait
))
1801 ipath_dev_err(dev
->dd
, "rnrwait list not empty!\n");
1802 if (!ipath_mcast_tree_empty())
1803 ipath_dev_err(dev
->dd
, "multicast table memory leak!\n");
1805 * Note that ipath_unregister_ib_device() can be called before all
1806 * the QPs are destroyed!
1808 ipath_free_all_qps(&dev
->qp_table
);
1809 kfree(dev
->qp_table
.table
);
1810 kfree(dev
->lk_table
.table
);
1811 ib_dealloc_device(ibdev
);
1814 static ssize_t
show_rev(struct class_device
*cdev
, char *buf
)
1816 struct ipath_ibdev
*dev
=
1817 container_of(cdev
, struct ipath_ibdev
, ibdev
.class_dev
);
1819 return sprintf(buf
, "%x\n", dev
->dd
->ipath_pcirev
);
1822 static ssize_t
show_hca(struct class_device
*cdev
, char *buf
)
1824 struct ipath_ibdev
*dev
=
1825 container_of(cdev
, struct ipath_ibdev
, ibdev
.class_dev
);
1828 ret
= dev
->dd
->ipath_f_get_boardname(dev
->dd
, buf
, 128);
1838 static ssize_t
show_stats(struct class_device
*cdev
, char *buf
)
1840 struct ipath_ibdev
*dev
=
1841 container_of(cdev
, struct ipath_ibdev
, ibdev
.class_dev
);
1860 dev
->n_rc_resends
, dev
->n_rc_qacks
, dev
->n_rc_acks
,
1861 dev
->n_seq_naks
, dev
->n_rdma_seq
, dev
->n_rnr_naks
,
1862 dev
->n_other_naks
, dev
->n_timeouts
,
1863 dev
->n_rdma_dup_busy
, dev
->n_rc_stalls
, dev
->n_piowait
,
1864 dev
->n_no_piobuf
, dev
->n_pkt_drops
, dev
->n_wqe_errs
);
1865 for (i
= 0; i
< ARRAY_SIZE(dev
->opstats
); i
++) {
1866 const struct ipath_opcode_stats
*si
= &dev
->opstats
[i
];
1868 if (!si
->n_packets
&& !si
->n_bytes
)
1870 len
+= sprintf(buf
+ len
, "%02x %llu/%llu\n", i
,
1871 (unsigned long long) si
->n_packets
,
1872 (unsigned long long) si
->n_bytes
);
1877 static CLASS_DEVICE_ATTR(hw_rev
, S_IRUGO
, show_rev
, NULL
);
1878 static CLASS_DEVICE_ATTR(hca_type
, S_IRUGO
, show_hca
, NULL
);
1879 static CLASS_DEVICE_ATTR(board_id
, S_IRUGO
, show_hca
, NULL
);
1880 static CLASS_DEVICE_ATTR(stats
, S_IRUGO
, show_stats
, NULL
);
1882 static struct class_device_attribute
*ipath_class_attributes
[] = {
1883 &class_device_attr_hw_rev
,
1884 &class_device_attr_hca_type
,
1885 &class_device_attr_board_id
,
1886 &class_device_attr_stats
1889 static int ipath_verbs_register_sysfs(struct ib_device
*dev
)
1894 for (i
= 0; i
< ARRAY_SIZE(ipath_class_attributes
); ++i
)
1895 if (class_device_create_file(&dev
->class_dev
,
1896 ipath_class_attributes
[i
])) {