2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
5 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
6 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37 #include <rdma/ib_smi.h>
38 #include <rdma/ib_umem.h>
39 #include <rdma/ib_user_verbs.h>
41 #include <linux/sched.h>
42 #include <linux/slab.h>
43 #include <linux/stat.h>
45 #include <linux/export.h>
47 #include "mthca_dev.h"
48 #include "mthca_cmd.h"
49 #include "mthca_user.h"
50 #include "mthca_memfree.h"
52 static void init_query_mad(struct ib_smp
*mad
)
54 mad
->base_version
= 1;
55 mad
->mgmt_class
= IB_MGMT_CLASS_SUBN_LID_ROUTED
;
56 mad
->class_version
= 1;
57 mad
->method
= IB_MGMT_METHOD_GET
;
60 static int mthca_query_device(struct ib_device
*ibdev
, struct ib_device_attr
*props
,
63 struct ib_smp
*in_mad
= NULL
;
64 struct ib_smp
*out_mad
= NULL
;
66 struct mthca_dev
*mdev
= to_mdev(ibdev
);
68 if (uhw
->inlen
|| uhw
->outlen
)
71 in_mad
= kzalloc(sizeof *in_mad
, GFP_KERNEL
);
72 out_mad
= kmalloc(sizeof *out_mad
, GFP_KERNEL
);
73 if (!in_mad
|| !out_mad
)
76 memset(props
, 0, sizeof *props
);
78 props
->fw_ver
= mdev
->fw_ver
;
80 init_query_mad(in_mad
);
81 in_mad
->attr_id
= IB_SMP_ATTR_NODE_INFO
;
83 err
= mthca_MAD_IFC(mdev
, 1, 1,
84 1, NULL
, NULL
, in_mad
, out_mad
);
88 props
->device_cap_flags
= mdev
->device_cap_flags
;
89 props
->vendor_id
= be32_to_cpup((__be32
*) (out_mad
->data
+ 36)) &
91 props
->vendor_part_id
= be16_to_cpup((__be16
*) (out_mad
->data
+ 30));
92 props
->hw_ver
= be32_to_cpup((__be32
*) (out_mad
->data
+ 32));
93 memcpy(&props
->sys_image_guid
, out_mad
->data
+ 4, 8);
95 props
->max_mr_size
= ~0ull;
96 props
->page_size_cap
= mdev
->limits
.page_size_cap
;
97 props
->max_qp
= mdev
->limits
.num_qps
- mdev
->limits
.reserved_qps
;
98 props
->max_qp_wr
= mdev
->limits
.max_wqes
;
99 props
->max_sge
= mdev
->limits
.max_sg
;
100 props
->max_sge_rd
= props
->max_sge
;
101 props
->max_cq
= mdev
->limits
.num_cqs
- mdev
->limits
.reserved_cqs
;
102 props
->max_cqe
= mdev
->limits
.max_cqes
;
103 props
->max_mr
= mdev
->limits
.num_mpts
- mdev
->limits
.reserved_mrws
;
104 props
->max_pd
= mdev
->limits
.num_pds
- mdev
->limits
.reserved_pds
;
105 props
->max_qp_rd_atom
= 1 << mdev
->qp_table
.rdb_shift
;
106 props
->max_qp_init_rd_atom
= mdev
->limits
.max_qp_init_rdma
;
107 props
->max_res_rd_atom
= props
->max_qp_rd_atom
* props
->max_qp
;
108 props
->max_srq
= mdev
->limits
.num_srqs
- mdev
->limits
.reserved_srqs
;
109 props
->max_srq_wr
= mdev
->limits
.max_srq_wqes
;
110 props
->max_srq_sge
= mdev
->limits
.max_srq_sge
;
111 props
->local_ca_ack_delay
= mdev
->limits
.local_ca_ack_delay
;
112 props
->atomic_cap
= mdev
->limits
.flags
& DEV_LIM_FLAG_ATOMIC
?
113 IB_ATOMIC_HCA
: IB_ATOMIC_NONE
;
114 props
->max_pkeys
= mdev
->limits
.pkey_table_len
;
115 props
->max_mcast_grp
= mdev
->limits
.num_mgms
+ mdev
->limits
.num_amgms
;
116 props
->max_mcast_qp_attach
= MTHCA_QP_PER_MGM
;
117 props
->max_total_mcast_qp_attach
= props
->max_mcast_qp_attach
*
118 props
->max_mcast_grp
;
120 * If Sinai memory key optimization is being used, then only
121 * the 8-bit key portion will change. For other HCAs, the
122 * unused index bits will also be used for FMR remapping.
124 if (mdev
->mthca_flags
& MTHCA_FLAG_SINAI_OPT
)
125 props
->max_map_per_fmr
= 255;
127 props
->max_map_per_fmr
=
128 (1 << (32 - ilog2(mdev
->limits
.num_mpts
))) - 1;
137 static int mthca_query_port(struct ib_device
*ibdev
,
138 u8 port
, struct ib_port_attr
*props
)
140 struct ib_smp
*in_mad
= NULL
;
141 struct ib_smp
*out_mad
= NULL
;
144 in_mad
= kzalloc(sizeof *in_mad
, GFP_KERNEL
);
145 out_mad
= kmalloc(sizeof *out_mad
, GFP_KERNEL
);
146 if (!in_mad
|| !out_mad
)
149 memset(props
, 0, sizeof *props
);
151 init_query_mad(in_mad
);
152 in_mad
->attr_id
= IB_SMP_ATTR_PORT_INFO
;
153 in_mad
->attr_mod
= cpu_to_be32(port
);
155 err
= mthca_MAD_IFC(to_mdev(ibdev
), 1, 1,
156 port
, NULL
, NULL
, in_mad
, out_mad
);
160 props
->lid
= be16_to_cpup((__be16
*) (out_mad
->data
+ 16));
161 props
->lmc
= out_mad
->data
[34] & 0x7;
162 props
->sm_lid
= be16_to_cpup((__be16
*) (out_mad
->data
+ 18));
163 props
->sm_sl
= out_mad
->data
[36] & 0xf;
164 props
->state
= out_mad
->data
[32] & 0xf;
165 props
->phys_state
= out_mad
->data
[33] >> 4;
166 props
->port_cap_flags
= be32_to_cpup((__be32
*) (out_mad
->data
+ 20));
167 props
->gid_tbl_len
= to_mdev(ibdev
)->limits
.gid_table_len
;
168 props
->max_msg_sz
= 0x80000000;
169 props
->pkey_tbl_len
= to_mdev(ibdev
)->limits
.pkey_table_len
;
170 props
->bad_pkey_cntr
= be16_to_cpup((__be16
*) (out_mad
->data
+ 46));
171 props
->qkey_viol_cntr
= be16_to_cpup((__be16
*) (out_mad
->data
+ 48));
172 props
->active_width
= out_mad
->data
[31] & 0xf;
173 props
->active_speed
= out_mad
->data
[35] >> 4;
174 props
->max_mtu
= out_mad
->data
[41] & 0xf;
175 props
->active_mtu
= out_mad
->data
[36] >> 4;
176 props
->subnet_timeout
= out_mad
->data
[51] & 0x1f;
177 props
->max_vl_num
= out_mad
->data
[37] >> 4;
178 props
->init_type_reply
= out_mad
->data
[41] >> 4;
186 static int mthca_modify_device(struct ib_device
*ibdev
,
188 struct ib_device_modify
*props
)
190 if (mask
& ~IB_DEVICE_MODIFY_NODE_DESC
)
193 if (mask
& IB_DEVICE_MODIFY_NODE_DESC
) {
194 if (mutex_lock_interruptible(&to_mdev(ibdev
)->cap_mask_mutex
))
196 memcpy(ibdev
->node_desc
, props
->node_desc
, 64);
197 mutex_unlock(&to_mdev(ibdev
)->cap_mask_mutex
);
203 static int mthca_modify_port(struct ib_device
*ibdev
,
204 u8 port
, int port_modify_mask
,
205 struct ib_port_modify
*props
)
207 struct mthca_set_ib_param set_ib
;
208 struct ib_port_attr attr
;
211 if (mutex_lock_interruptible(&to_mdev(ibdev
)->cap_mask_mutex
))
214 err
= mthca_query_port(ibdev
, port
, &attr
);
218 set_ib
.set_si_guid
= 0;
219 set_ib
.reset_qkey_viol
= !!(port_modify_mask
& IB_PORT_RESET_QKEY_CNTR
);
221 set_ib
.cap_mask
= (attr
.port_cap_flags
| props
->set_port_cap_mask
) &
222 ~props
->clr_port_cap_mask
;
224 err
= mthca_SET_IB(to_mdev(ibdev
), &set_ib
, port
);
228 mutex_unlock(&to_mdev(ibdev
)->cap_mask_mutex
);
232 static int mthca_query_pkey(struct ib_device
*ibdev
,
233 u8 port
, u16 index
, u16
*pkey
)
235 struct ib_smp
*in_mad
= NULL
;
236 struct ib_smp
*out_mad
= NULL
;
239 in_mad
= kzalloc(sizeof *in_mad
, GFP_KERNEL
);
240 out_mad
= kmalloc(sizeof *out_mad
, GFP_KERNEL
);
241 if (!in_mad
|| !out_mad
)
244 init_query_mad(in_mad
);
245 in_mad
->attr_id
= IB_SMP_ATTR_PKEY_TABLE
;
246 in_mad
->attr_mod
= cpu_to_be32(index
/ 32);
248 err
= mthca_MAD_IFC(to_mdev(ibdev
), 1, 1,
249 port
, NULL
, NULL
, in_mad
, out_mad
);
253 *pkey
= be16_to_cpu(((__be16
*) out_mad
->data
)[index
% 32]);
261 static int mthca_query_gid(struct ib_device
*ibdev
, u8 port
,
262 int index
, union ib_gid
*gid
)
264 struct ib_smp
*in_mad
= NULL
;
265 struct ib_smp
*out_mad
= NULL
;
268 in_mad
= kzalloc(sizeof *in_mad
, GFP_KERNEL
);
269 out_mad
= kmalloc(sizeof *out_mad
, GFP_KERNEL
);
270 if (!in_mad
|| !out_mad
)
273 init_query_mad(in_mad
);
274 in_mad
->attr_id
= IB_SMP_ATTR_PORT_INFO
;
275 in_mad
->attr_mod
= cpu_to_be32(port
);
277 err
= mthca_MAD_IFC(to_mdev(ibdev
), 1, 1,
278 port
, NULL
, NULL
, in_mad
, out_mad
);
282 memcpy(gid
->raw
, out_mad
->data
+ 8, 8);
284 init_query_mad(in_mad
);
285 in_mad
->attr_id
= IB_SMP_ATTR_GUID_INFO
;
286 in_mad
->attr_mod
= cpu_to_be32(index
/ 8);
288 err
= mthca_MAD_IFC(to_mdev(ibdev
), 1, 1,
289 port
, NULL
, NULL
, in_mad
, out_mad
);
293 memcpy(gid
->raw
+ 8, out_mad
->data
+ (index
% 8) * 8, 8);
301 static struct ib_ucontext
*mthca_alloc_ucontext(struct ib_device
*ibdev
,
302 struct ib_udata
*udata
)
304 struct mthca_alloc_ucontext_resp uresp
;
305 struct mthca_ucontext
*context
;
308 if (!(to_mdev(ibdev
)->active
))
309 return ERR_PTR(-EAGAIN
);
311 memset(&uresp
, 0, sizeof uresp
);
313 uresp
.qp_tab_size
= to_mdev(ibdev
)->limits
.num_qps
;
314 if (mthca_is_memfree(to_mdev(ibdev
)))
315 uresp
.uarc_size
= to_mdev(ibdev
)->uar_table
.uarc_size
;
319 context
= kmalloc(sizeof *context
, GFP_KERNEL
);
321 return ERR_PTR(-ENOMEM
);
323 err
= mthca_uar_alloc(to_mdev(ibdev
), &context
->uar
);
329 context
->db_tab
= mthca_init_user_db_tab(to_mdev(ibdev
));
330 if (IS_ERR(context
->db_tab
)) {
331 err
= PTR_ERR(context
->db_tab
);
332 mthca_uar_free(to_mdev(ibdev
), &context
->uar
);
337 if (ib_copy_to_udata(udata
, &uresp
, sizeof uresp
)) {
338 mthca_cleanup_user_db_tab(to_mdev(ibdev
), &context
->uar
, context
->db_tab
);
339 mthca_uar_free(to_mdev(ibdev
), &context
->uar
);
341 return ERR_PTR(-EFAULT
);
344 context
->reg_mr_warned
= 0;
346 return &context
->ibucontext
;
349 static int mthca_dealloc_ucontext(struct ib_ucontext
*context
)
351 mthca_cleanup_user_db_tab(to_mdev(context
->device
), &to_mucontext(context
)->uar
,
352 to_mucontext(context
)->db_tab
);
353 mthca_uar_free(to_mdev(context
->device
), &to_mucontext(context
)->uar
);
354 kfree(to_mucontext(context
));
359 static int mthca_mmap_uar(struct ib_ucontext
*context
,
360 struct vm_area_struct
*vma
)
362 if (vma
->vm_end
- vma
->vm_start
!= PAGE_SIZE
)
365 vma
->vm_page_prot
= pgprot_noncached(vma
->vm_page_prot
);
367 if (io_remap_pfn_range(vma
, vma
->vm_start
,
368 to_mucontext(context
)->uar
.pfn
,
369 PAGE_SIZE
, vma
->vm_page_prot
))
375 static struct ib_pd
*mthca_alloc_pd(struct ib_device
*ibdev
,
376 struct ib_ucontext
*context
,
377 struct ib_udata
*udata
)
382 pd
= kmalloc(sizeof *pd
, GFP_KERNEL
);
384 return ERR_PTR(-ENOMEM
);
386 err
= mthca_pd_alloc(to_mdev(ibdev
), !context
, pd
);
393 if (ib_copy_to_udata(udata
, &pd
->pd_num
, sizeof (__u32
))) {
394 mthca_pd_free(to_mdev(ibdev
), pd
);
396 return ERR_PTR(-EFAULT
);
403 static int mthca_dealloc_pd(struct ib_pd
*pd
)
405 mthca_pd_free(to_mdev(pd
->device
), to_mpd(pd
));
411 static struct ib_ah
*mthca_ah_create(struct ib_pd
*pd
,
412 struct ib_ah_attr
*ah_attr
)
417 ah
= kmalloc(sizeof *ah
, GFP_ATOMIC
);
419 return ERR_PTR(-ENOMEM
);
421 err
= mthca_create_ah(to_mdev(pd
->device
), to_mpd(pd
), ah_attr
, ah
);
430 static int mthca_ah_destroy(struct ib_ah
*ah
)
432 mthca_destroy_ah(to_mdev(ah
->device
), to_mah(ah
));
438 static struct ib_srq
*mthca_create_srq(struct ib_pd
*pd
,
439 struct ib_srq_init_attr
*init_attr
,
440 struct ib_udata
*udata
)
442 struct mthca_create_srq ucmd
;
443 struct mthca_ucontext
*context
= NULL
;
444 struct mthca_srq
*srq
;
447 if (init_attr
->srq_type
!= IB_SRQT_BASIC
)
448 return ERR_PTR(-ENOSYS
);
450 srq
= kmalloc(sizeof *srq
, GFP_KERNEL
);
452 return ERR_PTR(-ENOMEM
);
455 context
= to_mucontext(pd
->uobject
->context
);
457 if (ib_copy_from_udata(&ucmd
, udata
, sizeof ucmd
)) {
462 err
= mthca_map_user_db(to_mdev(pd
->device
), &context
->uar
,
463 context
->db_tab
, ucmd
.db_index
,
469 srq
->mr
.ibmr
.lkey
= ucmd
.lkey
;
470 srq
->db_index
= ucmd
.db_index
;
473 err
= mthca_alloc_srq(to_mdev(pd
->device
), to_mpd(pd
),
474 &init_attr
->attr
, srq
);
476 if (err
&& pd
->uobject
)
477 mthca_unmap_user_db(to_mdev(pd
->device
), &context
->uar
,
478 context
->db_tab
, ucmd
.db_index
);
483 if (context
&& ib_copy_to_udata(udata
, &srq
->srqn
, sizeof (__u32
))) {
484 mthca_free_srq(to_mdev(pd
->device
), srq
);
497 static int mthca_destroy_srq(struct ib_srq
*srq
)
499 struct mthca_ucontext
*context
;
502 context
= to_mucontext(srq
->uobject
->context
);
504 mthca_unmap_user_db(to_mdev(srq
->device
), &context
->uar
,
505 context
->db_tab
, to_msrq(srq
)->db_index
);
508 mthca_free_srq(to_mdev(srq
->device
), to_msrq(srq
));
514 static struct ib_qp
*mthca_create_qp(struct ib_pd
*pd
,
515 struct ib_qp_init_attr
*init_attr
,
516 struct ib_udata
*udata
)
518 struct mthca_create_qp ucmd
;
522 if (init_attr
->create_flags
)
523 return ERR_PTR(-EINVAL
);
525 switch (init_attr
->qp_type
) {
530 struct mthca_ucontext
*context
;
532 qp
= kmalloc(sizeof *qp
, GFP_KERNEL
);
534 return ERR_PTR(-ENOMEM
);
537 context
= to_mucontext(pd
->uobject
->context
);
539 if (ib_copy_from_udata(&ucmd
, udata
, sizeof ucmd
)) {
541 return ERR_PTR(-EFAULT
);
544 err
= mthca_map_user_db(to_mdev(pd
->device
), &context
->uar
,
546 ucmd
.sq_db_index
, ucmd
.sq_db_page
);
552 err
= mthca_map_user_db(to_mdev(pd
->device
), &context
->uar
,
554 ucmd
.rq_db_index
, ucmd
.rq_db_page
);
556 mthca_unmap_user_db(to_mdev(pd
->device
),
564 qp
->mr
.ibmr
.lkey
= ucmd
.lkey
;
565 qp
->sq
.db_index
= ucmd
.sq_db_index
;
566 qp
->rq
.db_index
= ucmd
.rq_db_index
;
569 err
= mthca_alloc_qp(to_mdev(pd
->device
), to_mpd(pd
),
570 to_mcq(init_attr
->send_cq
),
571 to_mcq(init_attr
->recv_cq
),
572 init_attr
->qp_type
, init_attr
->sq_sig_type
,
573 &init_attr
->cap
, qp
);
575 if (err
&& pd
->uobject
) {
576 context
= to_mucontext(pd
->uobject
->context
);
578 mthca_unmap_user_db(to_mdev(pd
->device
),
582 mthca_unmap_user_db(to_mdev(pd
->device
),
588 qp
->ibqp
.qp_num
= qp
->qpn
;
594 /* Don't allow userspace to create special QPs */
596 return ERR_PTR(-EINVAL
);
598 qp
= kmalloc(sizeof (struct mthca_sqp
), GFP_KERNEL
);
600 return ERR_PTR(-ENOMEM
);
602 qp
->ibqp
.qp_num
= init_attr
->qp_type
== IB_QPT_SMI
? 0 : 1;
604 err
= mthca_alloc_sqp(to_mdev(pd
->device
), to_mpd(pd
),
605 to_mcq(init_attr
->send_cq
),
606 to_mcq(init_attr
->recv_cq
),
607 init_attr
->sq_sig_type
, &init_attr
->cap
,
608 qp
->ibqp
.qp_num
, init_attr
->port_num
,
613 /* Don't support raw QPs */
614 return ERR_PTR(-ENOSYS
);
622 init_attr
->cap
.max_send_wr
= qp
->sq
.max
;
623 init_attr
->cap
.max_recv_wr
= qp
->rq
.max
;
624 init_attr
->cap
.max_send_sge
= qp
->sq
.max_gs
;
625 init_attr
->cap
.max_recv_sge
= qp
->rq
.max_gs
;
626 init_attr
->cap
.max_inline_data
= qp
->max_inline_data
;
631 static int mthca_destroy_qp(struct ib_qp
*qp
)
634 mthca_unmap_user_db(to_mdev(qp
->device
),
635 &to_mucontext(qp
->uobject
->context
)->uar
,
636 to_mucontext(qp
->uobject
->context
)->db_tab
,
637 to_mqp(qp
)->sq
.db_index
);
638 mthca_unmap_user_db(to_mdev(qp
->device
),
639 &to_mucontext(qp
->uobject
->context
)->uar
,
640 to_mucontext(qp
->uobject
->context
)->db_tab
,
641 to_mqp(qp
)->rq
.db_index
);
643 mthca_free_qp(to_mdev(qp
->device
), to_mqp(qp
));
648 static struct ib_cq
*mthca_create_cq(struct ib_device
*ibdev
,
649 const struct ib_cq_init_attr
*attr
,
650 struct ib_ucontext
*context
,
651 struct ib_udata
*udata
)
653 int entries
= attr
->cqe
;
654 struct mthca_create_cq ucmd
;
660 return ERR_PTR(-EINVAL
);
662 if (entries
< 1 || entries
> to_mdev(ibdev
)->limits
.max_cqes
)
663 return ERR_PTR(-EINVAL
);
666 if (ib_copy_from_udata(&ucmd
, udata
, sizeof ucmd
))
667 return ERR_PTR(-EFAULT
);
669 err
= mthca_map_user_db(to_mdev(ibdev
), &to_mucontext(context
)->uar
,
670 to_mucontext(context
)->db_tab
,
671 ucmd
.set_db_index
, ucmd
.set_db_page
);
675 err
= mthca_map_user_db(to_mdev(ibdev
), &to_mucontext(context
)->uar
,
676 to_mucontext(context
)->db_tab
,
677 ucmd
.arm_db_index
, ucmd
.arm_db_page
);
682 cq
= kmalloc(sizeof *cq
, GFP_KERNEL
);
689 cq
->buf
.mr
.ibmr
.lkey
= ucmd
.lkey
;
690 cq
->set_ci_db_index
= ucmd
.set_db_index
;
691 cq
->arm_db_index
= ucmd
.arm_db_index
;
694 for (nent
= 1; nent
<= entries
; nent
<<= 1)
697 err
= mthca_init_cq(to_mdev(ibdev
), nent
,
698 context
? to_mucontext(context
) : NULL
,
699 context
? ucmd
.pdn
: to_mdev(ibdev
)->driver_pd
.pd_num
,
704 if (context
&& ib_copy_to_udata(udata
, &cq
->cqn
, sizeof (__u32
))) {
705 mthca_free_cq(to_mdev(ibdev
), cq
);
710 cq
->resize_buf
= NULL
;
719 mthca_unmap_user_db(to_mdev(ibdev
), &to_mucontext(context
)->uar
,
720 to_mucontext(context
)->db_tab
, ucmd
.arm_db_index
);
724 mthca_unmap_user_db(to_mdev(ibdev
), &to_mucontext(context
)->uar
,
725 to_mucontext(context
)->db_tab
, ucmd
.set_db_index
);
730 static int mthca_alloc_resize_buf(struct mthca_dev
*dev
, struct mthca_cq
*cq
,
735 spin_lock_irq(&cq
->lock
);
736 if (cq
->resize_buf
) {
741 cq
->resize_buf
= kmalloc(sizeof *cq
->resize_buf
, GFP_ATOMIC
);
742 if (!cq
->resize_buf
) {
747 cq
->resize_buf
->state
= CQ_RESIZE_ALLOC
;
752 spin_unlock_irq(&cq
->lock
);
757 ret
= mthca_alloc_cq_buf(dev
, &cq
->resize_buf
->buf
, entries
);
759 spin_lock_irq(&cq
->lock
);
760 kfree(cq
->resize_buf
);
761 cq
->resize_buf
= NULL
;
762 spin_unlock_irq(&cq
->lock
);
766 cq
->resize_buf
->cqe
= entries
- 1;
768 spin_lock_irq(&cq
->lock
);
769 cq
->resize_buf
->state
= CQ_RESIZE_READY
;
770 spin_unlock_irq(&cq
->lock
);
775 static int mthca_resize_cq(struct ib_cq
*ibcq
, int entries
, struct ib_udata
*udata
)
777 struct mthca_dev
*dev
= to_mdev(ibcq
->device
);
778 struct mthca_cq
*cq
= to_mcq(ibcq
);
779 struct mthca_resize_cq ucmd
;
783 if (entries
< 1 || entries
> dev
->limits
.max_cqes
)
786 mutex_lock(&cq
->mutex
);
788 entries
= roundup_pow_of_two(entries
+ 1);
789 if (entries
== ibcq
->cqe
+ 1) {
795 ret
= mthca_alloc_resize_buf(dev
, cq
, entries
);
798 lkey
= cq
->resize_buf
->buf
.mr
.ibmr
.lkey
;
800 if (ib_copy_from_udata(&ucmd
, udata
, sizeof ucmd
)) {
807 ret
= mthca_RESIZE_CQ(dev
, cq
->cqn
, lkey
, ilog2(entries
));
810 if (cq
->resize_buf
) {
811 mthca_free_cq_buf(dev
, &cq
->resize_buf
->buf
,
812 cq
->resize_buf
->cqe
);
813 kfree(cq
->resize_buf
);
814 spin_lock_irq(&cq
->lock
);
815 cq
->resize_buf
= NULL
;
816 spin_unlock_irq(&cq
->lock
);
822 struct mthca_cq_buf tbuf
;
825 spin_lock_irq(&cq
->lock
);
826 if (cq
->resize_buf
->state
== CQ_RESIZE_READY
) {
827 mthca_cq_resize_copy_cqes(cq
);
830 cq
->buf
= cq
->resize_buf
->buf
;
831 cq
->ibcq
.cqe
= cq
->resize_buf
->cqe
;
833 tbuf
= cq
->resize_buf
->buf
;
834 tcqe
= cq
->resize_buf
->cqe
;
837 kfree(cq
->resize_buf
);
838 cq
->resize_buf
= NULL
;
839 spin_unlock_irq(&cq
->lock
);
841 mthca_free_cq_buf(dev
, &tbuf
, tcqe
);
843 ibcq
->cqe
= entries
- 1;
846 mutex_unlock(&cq
->mutex
);
851 static int mthca_destroy_cq(struct ib_cq
*cq
)
854 mthca_unmap_user_db(to_mdev(cq
->device
),
855 &to_mucontext(cq
->uobject
->context
)->uar
,
856 to_mucontext(cq
->uobject
->context
)->db_tab
,
857 to_mcq(cq
)->arm_db_index
);
858 mthca_unmap_user_db(to_mdev(cq
->device
),
859 &to_mucontext(cq
->uobject
->context
)->uar
,
860 to_mucontext(cq
->uobject
->context
)->db_tab
,
861 to_mcq(cq
)->set_ci_db_index
);
863 mthca_free_cq(to_mdev(cq
->device
), to_mcq(cq
));
869 static inline u32
convert_access(int acc
)
871 return (acc
& IB_ACCESS_REMOTE_ATOMIC
? MTHCA_MPT_FLAG_ATOMIC
: 0) |
872 (acc
& IB_ACCESS_REMOTE_WRITE
? MTHCA_MPT_FLAG_REMOTE_WRITE
: 0) |
873 (acc
& IB_ACCESS_REMOTE_READ
? MTHCA_MPT_FLAG_REMOTE_READ
: 0) |
874 (acc
& IB_ACCESS_LOCAL_WRITE
? MTHCA_MPT_FLAG_LOCAL_WRITE
: 0) |
875 MTHCA_MPT_FLAG_LOCAL_READ
;
878 static struct ib_mr
*mthca_get_dma_mr(struct ib_pd
*pd
, int acc
)
883 mr
= kmalloc(sizeof *mr
, GFP_KERNEL
);
885 return ERR_PTR(-ENOMEM
);
887 err
= mthca_mr_alloc_notrans(to_mdev(pd
->device
),
889 convert_access(acc
), mr
);
901 static struct ib_mr
*mthca_reg_phys_mr(struct ib_pd
*pd
,
902 struct ib_phys_buf
*buffer_list
,
916 mask
= buffer_list
[0].addr
^ *iova_start
;
918 for (i
= 0; i
< num_phys_buf
; ++i
) {
920 mask
|= buffer_list
[i
].addr
;
921 if (i
!= num_phys_buf
- 1)
922 mask
|= buffer_list
[i
].addr
+ buffer_list
[i
].size
;
924 total_size
+= buffer_list
[i
].size
;
927 if (mask
& ~PAGE_MASK
)
928 return ERR_PTR(-EINVAL
);
930 shift
= __ffs(mask
| 1 << 31);
932 buffer_list
[0].size
+= buffer_list
[0].addr
& ((1ULL << shift
) - 1);
933 buffer_list
[0].addr
&= ~0ull << shift
;
935 mr
= kmalloc(sizeof *mr
, GFP_KERNEL
);
937 return ERR_PTR(-ENOMEM
);
940 for (i
= 0; i
< num_phys_buf
; ++i
)
941 npages
+= (buffer_list
[i
].size
+ (1ULL << shift
) - 1) >> shift
;
946 page_list
= kmalloc(npages
* sizeof *page_list
, GFP_KERNEL
);
949 return ERR_PTR(-ENOMEM
);
953 for (i
= 0; i
< num_phys_buf
; ++i
)
955 j
< (buffer_list
[i
].size
+ (1ULL << shift
) - 1) >> shift
;
957 page_list
[n
++] = buffer_list
[i
].addr
+ ((u64
) j
<< shift
);
959 mthca_dbg(to_mdev(pd
->device
), "Registering memory at %llx (iova %llx) "
960 "in PD %x; shift %d, npages %d.\n",
961 (unsigned long long) buffer_list
[0].addr
,
962 (unsigned long long) *iova_start
,
966 err
= mthca_mr_alloc_phys(to_mdev(pd
->device
),
968 page_list
, shift
, npages
,
969 *iova_start
, total_size
,
970 convert_access(acc
), mr
);
984 static struct ib_mr
*mthca_reg_user_mr(struct ib_pd
*pd
, u64 start
, u64 length
,
985 u64 virt
, int acc
, struct ib_udata
*udata
)
987 struct mthca_dev
*dev
= to_mdev(pd
->device
);
988 struct scatterlist
*sg
;
990 struct mthca_reg_mr ucmd
;
997 if (udata
->inlen
- sizeof (struct ib_uverbs_cmd_hdr
) < sizeof ucmd
) {
998 if (!to_mucontext(pd
->uobject
->context
)->reg_mr_warned
) {
999 mthca_warn(dev
, "Process '%s' did not pass in MR attrs.\n",
1001 mthca_warn(dev
, " Update libmthca to fix this.\n");
1003 ++to_mucontext(pd
->uobject
->context
)->reg_mr_warned
;
1005 } else if (ib_copy_from_udata(&ucmd
, udata
, sizeof ucmd
))
1006 return ERR_PTR(-EFAULT
);
1008 mr
= kmalloc(sizeof *mr
, GFP_KERNEL
);
1010 return ERR_PTR(-ENOMEM
);
1012 mr
->umem
= ib_umem_get(pd
->uobject
->context
, start
, length
, acc
,
1013 ucmd
.mr_attrs
& MTHCA_MR_DMASYNC
);
1015 if (IS_ERR(mr
->umem
)) {
1016 err
= PTR_ERR(mr
->umem
);
1020 shift
= ffs(mr
->umem
->page_size
) - 1;
1023 mr
->mtt
= mthca_alloc_mtt(dev
, n
);
1024 if (IS_ERR(mr
->mtt
)) {
1025 err
= PTR_ERR(mr
->mtt
);
1029 pages
= (u64
*) __get_free_page(GFP_KERNEL
);
1037 write_mtt_size
= min(mthca_write_mtt_size(dev
), (int) (PAGE_SIZE
/ sizeof *pages
));
1039 for_each_sg(mr
->umem
->sg_head
.sgl
, sg
, mr
->umem
->nmap
, entry
) {
1040 len
= sg_dma_len(sg
) >> shift
;
1041 for (k
= 0; k
< len
; ++k
) {
1042 pages
[i
++] = sg_dma_address(sg
) +
1043 mr
->umem
->page_size
* k
;
1045 * Be friendly to write_mtt and pass it chunks
1046 * of appropriate size.
1048 if (i
== write_mtt_size
) {
1049 err
= mthca_write_mtt(dev
, mr
->mtt
, n
, pages
, i
);
1059 err
= mthca_write_mtt(dev
, mr
->mtt
, n
, pages
, i
);
1061 free_page((unsigned long) pages
);
1065 err
= mthca_mr_alloc(dev
, to_mpd(pd
)->pd_num
, shift
, virt
, length
,
1066 convert_access(acc
), mr
);
1074 mthca_free_mtt(dev
, mr
->mtt
);
1077 ib_umem_release(mr
->umem
);
1081 return ERR_PTR(err
);
1084 static int mthca_dereg_mr(struct ib_mr
*mr
)
1086 struct mthca_mr
*mmr
= to_mmr(mr
);
1088 mthca_free_mr(to_mdev(mr
->device
), mmr
);
1090 ib_umem_release(mmr
->umem
);
1096 static struct ib_fmr
*mthca_alloc_fmr(struct ib_pd
*pd
, int mr_access_flags
,
1097 struct ib_fmr_attr
*fmr_attr
)
1099 struct mthca_fmr
*fmr
;
1102 fmr
= kmalloc(sizeof *fmr
, GFP_KERNEL
);
1104 return ERR_PTR(-ENOMEM
);
1106 memcpy(&fmr
->attr
, fmr_attr
, sizeof *fmr_attr
);
1107 err
= mthca_fmr_alloc(to_mdev(pd
->device
), to_mpd(pd
)->pd_num
,
1108 convert_access(mr_access_flags
), fmr
);
1112 return ERR_PTR(err
);
1118 static int mthca_dealloc_fmr(struct ib_fmr
*fmr
)
1120 struct mthca_fmr
*mfmr
= to_mfmr(fmr
);
1123 err
= mthca_free_fmr(to_mdev(fmr
->device
), mfmr
);
1131 static int mthca_unmap_fmr(struct list_head
*fmr_list
)
1135 struct mthca_dev
*mdev
= NULL
;
1137 list_for_each_entry(fmr
, fmr_list
, list
) {
1138 if (mdev
&& to_mdev(fmr
->device
) != mdev
)
1140 mdev
= to_mdev(fmr
->device
);
1146 if (mthca_is_memfree(mdev
)) {
1147 list_for_each_entry(fmr
, fmr_list
, list
)
1148 mthca_arbel_fmr_unmap(mdev
, to_mfmr(fmr
));
1152 list_for_each_entry(fmr
, fmr_list
, list
)
1153 mthca_tavor_fmr_unmap(mdev
, to_mfmr(fmr
));
1155 err
= mthca_SYNC_TPT(mdev
);
1159 static ssize_t
show_rev(struct device
*device
, struct device_attribute
*attr
,
1162 struct mthca_dev
*dev
=
1163 container_of(device
, struct mthca_dev
, ib_dev
.dev
);
1164 return sprintf(buf
, "%x\n", dev
->rev_id
);
1167 static ssize_t
show_fw_ver(struct device
*device
, struct device_attribute
*attr
,
1170 struct mthca_dev
*dev
=
1171 container_of(device
, struct mthca_dev
, ib_dev
.dev
);
1172 return sprintf(buf
, "%d.%d.%d\n", (int) (dev
->fw_ver
>> 32),
1173 (int) (dev
->fw_ver
>> 16) & 0xffff,
1174 (int) dev
->fw_ver
& 0xffff);
1177 static ssize_t
show_hca(struct device
*device
, struct device_attribute
*attr
,
1180 struct mthca_dev
*dev
=
1181 container_of(device
, struct mthca_dev
, ib_dev
.dev
);
1182 switch (dev
->pdev
->device
) {
1183 case PCI_DEVICE_ID_MELLANOX_TAVOR
:
1184 return sprintf(buf
, "MT23108\n");
1185 case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT
:
1186 return sprintf(buf
, "MT25208 (MT23108 compat mode)\n");
1187 case PCI_DEVICE_ID_MELLANOX_ARBEL
:
1188 return sprintf(buf
, "MT25208\n");
1189 case PCI_DEVICE_ID_MELLANOX_SINAI
:
1190 case PCI_DEVICE_ID_MELLANOX_SINAI_OLD
:
1191 return sprintf(buf
, "MT25204\n");
1193 return sprintf(buf
, "unknown\n");
1197 static ssize_t
show_board(struct device
*device
, struct device_attribute
*attr
,
1200 struct mthca_dev
*dev
=
1201 container_of(device
, struct mthca_dev
, ib_dev
.dev
);
1202 return sprintf(buf
, "%.*s\n", MTHCA_BOARD_ID_LEN
, dev
->board_id
);
1205 static DEVICE_ATTR(hw_rev
, S_IRUGO
, show_rev
, NULL
);
1206 static DEVICE_ATTR(fw_ver
, S_IRUGO
, show_fw_ver
, NULL
);
1207 static DEVICE_ATTR(hca_type
, S_IRUGO
, show_hca
, NULL
);
1208 static DEVICE_ATTR(board_id
, S_IRUGO
, show_board
, NULL
);
1210 static struct device_attribute
*mthca_dev_attributes
[] = {
1217 static int mthca_init_node_data(struct mthca_dev
*dev
)
1219 struct ib_smp
*in_mad
= NULL
;
1220 struct ib_smp
*out_mad
= NULL
;
1223 in_mad
= kzalloc(sizeof *in_mad
, GFP_KERNEL
);
1224 out_mad
= kmalloc(sizeof *out_mad
, GFP_KERNEL
);
1225 if (!in_mad
|| !out_mad
)
1228 init_query_mad(in_mad
);
1229 in_mad
->attr_id
= IB_SMP_ATTR_NODE_DESC
;
1231 err
= mthca_MAD_IFC(dev
, 1, 1,
1232 1, NULL
, NULL
, in_mad
, out_mad
);
1236 memcpy(dev
->ib_dev
.node_desc
, out_mad
->data
, 64);
1238 in_mad
->attr_id
= IB_SMP_ATTR_NODE_INFO
;
1240 err
= mthca_MAD_IFC(dev
, 1, 1,
1241 1, NULL
, NULL
, in_mad
, out_mad
);
1245 if (mthca_is_memfree(dev
))
1246 dev
->rev_id
= be32_to_cpup((__be32
*) (out_mad
->data
+ 32));
1247 memcpy(&dev
->ib_dev
.node_guid
, out_mad
->data
+ 12, 8);
1255 static int mthca_port_immutable(struct ib_device
*ibdev
, u8 port_num
,
1256 struct ib_port_immutable
*immutable
)
1258 struct ib_port_attr attr
;
1261 err
= mthca_query_port(ibdev
, port_num
, &attr
);
1265 immutable
->pkey_tbl_len
= attr
.pkey_tbl_len
;
1266 immutable
->gid_tbl_len
= attr
.gid_tbl_len
;
1267 immutable
->core_cap_flags
= RDMA_CORE_PORT_IBA_IB
;
1268 immutable
->max_mad_size
= IB_MGMT_MAD_SIZE
;
1273 int mthca_register_device(struct mthca_dev
*dev
)
1278 ret
= mthca_init_node_data(dev
);
1282 strlcpy(dev
->ib_dev
.name
, "mthca%d", IB_DEVICE_NAME_MAX
);
1283 dev
->ib_dev
.owner
= THIS_MODULE
;
1285 dev
->ib_dev
.uverbs_abi_ver
= MTHCA_UVERBS_ABI_VERSION
;
1286 dev
->ib_dev
.uverbs_cmd_mask
=
1287 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT
) |
1288 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE
) |
1289 (1ull << IB_USER_VERBS_CMD_QUERY_PORT
) |
1290 (1ull << IB_USER_VERBS_CMD_ALLOC_PD
) |
1291 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD
) |
1292 (1ull << IB_USER_VERBS_CMD_REG_MR
) |
1293 (1ull << IB_USER_VERBS_CMD_DEREG_MR
) |
1294 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL
) |
1295 (1ull << IB_USER_VERBS_CMD_CREATE_CQ
) |
1296 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ
) |
1297 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ
) |
1298 (1ull << IB_USER_VERBS_CMD_CREATE_QP
) |
1299 (1ull << IB_USER_VERBS_CMD_QUERY_QP
) |
1300 (1ull << IB_USER_VERBS_CMD_MODIFY_QP
) |
1301 (1ull << IB_USER_VERBS_CMD_DESTROY_QP
) |
1302 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST
) |
1303 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST
);
1304 dev
->ib_dev
.node_type
= RDMA_NODE_IB_CA
;
1305 dev
->ib_dev
.phys_port_cnt
= dev
->limits
.num_ports
;
1306 dev
->ib_dev
.num_comp_vectors
= 1;
1307 dev
->ib_dev
.dma_device
= &dev
->pdev
->dev
;
1308 dev
->ib_dev
.query_device
= mthca_query_device
;
1309 dev
->ib_dev
.query_port
= mthca_query_port
;
1310 dev
->ib_dev
.modify_device
= mthca_modify_device
;
1311 dev
->ib_dev
.modify_port
= mthca_modify_port
;
1312 dev
->ib_dev
.query_pkey
= mthca_query_pkey
;
1313 dev
->ib_dev
.query_gid
= mthca_query_gid
;
1314 dev
->ib_dev
.alloc_ucontext
= mthca_alloc_ucontext
;
1315 dev
->ib_dev
.dealloc_ucontext
= mthca_dealloc_ucontext
;
1316 dev
->ib_dev
.mmap
= mthca_mmap_uar
;
1317 dev
->ib_dev
.alloc_pd
= mthca_alloc_pd
;
1318 dev
->ib_dev
.dealloc_pd
= mthca_dealloc_pd
;
1319 dev
->ib_dev
.create_ah
= mthca_ah_create
;
1320 dev
->ib_dev
.query_ah
= mthca_ah_query
;
1321 dev
->ib_dev
.destroy_ah
= mthca_ah_destroy
;
1323 if (dev
->mthca_flags
& MTHCA_FLAG_SRQ
) {
1324 dev
->ib_dev
.create_srq
= mthca_create_srq
;
1325 dev
->ib_dev
.modify_srq
= mthca_modify_srq
;
1326 dev
->ib_dev
.query_srq
= mthca_query_srq
;
1327 dev
->ib_dev
.destroy_srq
= mthca_destroy_srq
;
1328 dev
->ib_dev
.uverbs_cmd_mask
|=
1329 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ
) |
1330 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ
) |
1331 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ
) |
1332 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ
);
1334 if (mthca_is_memfree(dev
))
1335 dev
->ib_dev
.post_srq_recv
= mthca_arbel_post_srq_recv
;
1337 dev
->ib_dev
.post_srq_recv
= mthca_tavor_post_srq_recv
;
1340 dev
->ib_dev
.create_qp
= mthca_create_qp
;
1341 dev
->ib_dev
.modify_qp
= mthca_modify_qp
;
1342 dev
->ib_dev
.query_qp
= mthca_query_qp
;
1343 dev
->ib_dev
.destroy_qp
= mthca_destroy_qp
;
1344 dev
->ib_dev
.create_cq
= mthca_create_cq
;
1345 dev
->ib_dev
.resize_cq
= mthca_resize_cq
;
1346 dev
->ib_dev
.destroy_cq
= mthca_destroy_cq
;
1347 dev
->ib_dev
.poll_cq
= mthca_poll_cq
;
1348 dev
->ib_dev
.get_dma_mr
= mthca_get_dma_mr
;
1349 dev
->ib_dev
.reg_phys_mr
= mthca_reg_phys_mr
;
1350 dev
->ib_dev
.reg_user_mr
= mthca_reg_user_mr
;
1351 dev
->ib_dev
.dereg_mr
= mthca_dereg_mr
;
1352 dev
->ib_dev
.get_port_immutable
= mthca_port_immutable
;
1354 if (dev
->mthca_flags
& MTHCA_FLAG_FMR
) {
1355 dev
->ib_dev
.alloc_fmr
= mthca_alloc_fmr
;
1356 dev
->ib_dev
.unmap_fmr
= mthca_unmap_fmr
;
1357 dev
->ib_dev
.dealloc_fmr
= mthca_dealloc_fmr
;
1358 if (mthca_is_memfree(dev
))
1359 dev
->ib_dev
.map_phys_fmr
= mthca_arbel_map_phys_fmr
;
1361 dev
->ib_dev
.map_phys_fmr
= mthca_tavor_map_phys_fmr
;
1364 dev
->ib_dev
.attach_mcast
= mthca_multicast_attach
;
1365 dev
->ib_dev
.detach_mcast
= mthca_multicast_detach
;
1366 dev
->ib_dev
.process_mad
= mthca_process_mad
;
1368 if (mthca_is_memfree(dev
)) {
1369 dev
->ib_dev
.req_notify_cq
= mthca_arbel_arm_cq
;
1370 dev
->ib_dev
.post_send
= mthca_arbel_post_send
;
1371 dev
->ib_dev
.post_recv
= mthca_arbel_post_receive
;
1373 dev
->ib_dev
.req_notify_cq
= mthca_tavor_arm_cq
;
1374 dev
->ib_dev
.post_send
= mthca_tavor_post_send
;
1375 dev
->ib_dev
.post_recv
= mthca_tavor_post_receive
;
1378 mutex_init(&dev
->cap_mask_mutex
);
1380 ret
= ib_register_device(&dev
->ib_dev
, NULL
);
1384 for (i
= 0; i
< ARRAY_SIZE(mthca_dev_attributes
); ++i
) {
1385 ret
= device_create_file(&dev
->ib_dev
.dev
,
1386 mthca_dev_attributes
[i
]);
1388 ib_unregister_device(&dev
->ib_dev
);
1393 mthca_start_catas_poll(dev
);
1398 void mthca_unregister_device(struct mthca_dev
*dev
)
1400 mthca_stop_catas_poll(dev
);
1401 ib_unregister_device(&dev
->ib_dev
);