2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
17 * The BSD 2-Clause License
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 #include <linux/inet.h>
49 #include <rdma/ib_addr.h>
50 #include <rdma/ib_smi.h>
51 #include <rdma/ib_user_verbs.h>
52 #include <rdma/vmw_pvrdma-abi.h>
57 * pvrdma_query_device - query device
58 * @ibdev: the device to query
59 * @props: the device properties
62 * @return: 0 on success, otherwise negative errno
64 int pvrdma_query_device(struct ib_device
*ibdev
,
65 struct ib_device_attr
*props
,
68 struct pvrdma_dev
*dev
= to_vdev(ibdev
);
70 if (uhw
->inlen
|| uhw
->outlen
)
73 memset(props
, 0, sizeof(*props
));
75 props
->fw_ver
= dev
->dsr
->caps
.fw_ver
;
76 props
->sys_image_guid
= dev
->dsr
->caps
.sys_image_guid
;
77 props
->max_mr_size
= dev
->dsr
->caps
.max_mr_size
;
78 props
->page_size_cap
= dev
->dsr
->caps
.page_size_cap
;
79 props
->vendor_id
= dev
->dsr
->caps
.vendor_id
;
80 props
->vendor_part_id
= dev
->pdev
->device
;
81 props
->hw_ver
= dev
->dsr
->caps
.hw_ver
;
82 props
->max_qp
= dev
->dsr
->caps
.max_qp
;
83 props
->max_qp_wr
= dev
->dsr
->caps
.max_qp_wr
;
84 props
->device_cap_flags
= dev
->dsr
->caps
.device_cap_flags
;
85 props
->max_sge
= dev
->dsr
->caps
.max_sge
;
86 props
->max_sge_rd
= PVRDMA_GET_CAP(dev
, dev
->dsr
->caps
.max_sge
,
87 dev
->dsr
->caps
.max_sge_rd
);
88 props
->max_srq
= dev
->dsr
->caps
.max_srq
;
89 props
->max_srq_wr
= dev
->dsr
->caps
.max_srq_wr
;
90 props
->max_srq_sge
= dev
->dsr
->caps
.max_srq_sge
;
91 props
->max_cq
= dev
->dsr
->caps
.max_cq
;
92 props
->max_cqe
= dev
->dsr
->caps
.max_cqe
;
93 props
->max_mr
= dev
->dsr
->caps
.max_mr
;
94 props
->max_pd
= dev
->dsr
->caps
.max_pd
;
95 props
->max_qp_rd_atom
= dev
->dsr
->caps
.max_qp_rd_atom
;
96 props
->max_qp_init_rd_atom
= dev
->dsr
->caps
.max_qp_init_rd_atom
;
98 dev
->dsr
->caps
.atomic_ops
&
99 (PVRDMA_ATOMIC_OP_COMP_SWAP
| PVRDMA_ATOMIC_OP_FETCH_ADD
) ?
100 IB_ATOMIC_HCA
: IB_ATOMIC_NONE
;
101 props
->masked_atomic_cap
= props
->atomic_cap
;
102 props
->max_ah
= dev
->dsr
->caps
.max_ah
;
103 props
->max_pkeys
= dev
->dsr
->caps
.max_pkeys
;
104 props
->local_ca_ack_delay
= dev
->dsr
->caps
.local_ca_ack_delay
;
105 if ((dev
->dsr
->caps
.bmme_flags
& PVRDMA_BMME_FLAG_LOCAL_INV
) &&
106 (dev
->dsr
->caps
.bmme_flags
& PVRDMA_BMME_FLAG_REMOTE_INV
) &&
107 (dev
->dsr
->caps
.bmme_flags
& PVRDMA_BMME_FLAG_FAST_REG_WR
)) {
108 props
->device_cap_flags
|= IB_DEVICE_MEM_MGT_EXTENSIONS
;
109 props
->max_fast_reg_page_list_len
= PVRDMA_GET_CAP(dev
,
110 PVRDMA_MAX_FAST_REG_PAGES
,
111 dev
->dsr
->caps
.max_fast_reg_page_list_len
);
114 props
->device_cap_flags
|= IB_DEVICE_PORT_ACTIVE_EVENT
|
115 IB_DEVICE_RC_RNR_NAK_GEN
;
121 * pvrdma_query_port - query device port attributes
122 * @ibdev: the device to query
123 * @port: the port number
124 * @props: the device properties
126 * @return: 0 on success, otherwise negative errno
128 int pvrdma_query_port(struct ib_device
*ibdev
, u8 port
,
129 struct ib_port_attr
*props
)
131 struct pvrdma_dev
*dev
= to_vdev(ibdev
);
132 union pvrdma_cmd_req req
;
133 union pvrdma_cmd_resp rsp
;
134 struct pvrdma_cmd_query_port
*cmd
= &req
.query_port
;
135 struct pvrdma_cmd_query_port_resp
*resp
= &rsp
.query_port_resp
;
138 memset(cmd
, 0, sizeof(*cmd
));
139 cmd
->hdr
.cmd
= PVRDMA_CMD_QUERY_PORT
;
140 cmd
->port_num
= port
;
142 err
= pvrdma_cmd_post(dev
, &req
, &rsp
, PVRDMA_CMD_QUERY_PORT_RESP
);
144 dev_warn(&dev
->pdev
->dev
,
145 "could not query port, error: %d\n", err
);
149 /* props being zeroed by the caller, avoid zeroing it here */
151 props
->state
= pvrdma_port_state_to_ib(resp
->attrs
.state
);
152 props
->max_mtu
= pvrdma_mtu_to_ib(resp
->attrs
.max_mtu
);
153 props
->active_mtu
= pvrdma_mtu_to_ib(resp
->attrs
.active_mtu
);
154 props
->gid_tbl_len
= resp
->attrs
.gid_tbl_len
;
155 props
->port_cap_flags
=
156 pvrdma_port_cap_flags_to_ib(resp
->attrs
.port_cap_flags
);
157 props
->port_cap_flags
|= IB_PORT_CM_SUP
| IB_PORT_IP_BASED_GIDS
;
158 props
->max_msg_sz
= resp
->attrs
.max_msg_sz
;
159 props
->bad_pkey_cntr
= resp
->attrs
.bad_pkey_cntr
;
160 props
->qkey_viol_cntr
= resp
->attrs
.qkey_viol_cntr
;
161 props
->pkey_tbl_len
= resp
->attrs
.pkey_tbl_len
;
162 props
->lid
= resp
->attrs
.lid
;
163 props
->sm_lid
= resp
->attrs
.sm_lid
;
164 props
->lmc
= resp
->attrs
.lmc
;
165 props
->max_vl_num
= resp
->attrs
.max_vl_num
;
166 props
->sm_sl
= resp
->attrs
.sm_sl
;
167 props
->subnet_timeout
= resp
->attrs
.subnet_timeout
;
168 props
->init_type_reply
= resp
->attrs
.init_type_reply
;
169 props
->active_width
= pvrdma_port_width_to_ib(resp
->attrs
.active_width
);
170 props
->active_speed
= pvrdma_port_speed_to_ib(resp
->attrs
.active_speed
);
171 props
->phys_state
= resp
->attrs
.phys_state
;
177 * pvrdma_query_gid - query device gid
178 * @ibdev: the device to query
179 * @port: the port number
181 * @gid: the device gid value
183 * @return: 0 on success, otherwise negative errno
185 int pvrdma_query_gid(struct ib_device
*ibdev
, u8 port
, int index
,
188 struct pvrdma_dev
*dev
= to_vdev(ibdev
);
190 if (index
>= dev
->dsr
->caps
.gid_tbl_len
)
193 memcpy(gid
, &dev
->sgid_tbl
[index
], sizeof(union ib_gid
));
199 * pvrdma_query_pkey - query device port's P_Key table
200 * @ibdev: the device to query
201 * @port: the port number
203 * @pkey: the device P_Key value
205 * @return: 0 on success, otherwise negative errno
207 int pvrdma_query_pkey(struct ib_device
*ibdev
, u8 port
, u16 index
,
211 union pvrdma_cmd_req req
;
212 union pvrdma_cmd_resp rsp
;
213 struct pvrdma_cmd_query_pkey
*cmd
= &req
.query_pkey
;
215 memset(cmd
, 0, sizeof(*cmd
));
216 cmd
->hdr
.cmd
= PVRDMA_CMD_QUERY_PKEY
;
217 cmd
->port_num
= port
;
220 err
= pvrdma_cmd_post(to_vdev(ibdev
), &req
, &rsp
,
221 PVRDMA_CMD_QUERY_PKEY_RESP
);
223 dev_warn(&to_vdev(ibdev
)->pdev
->dev
,
224 "could not query pkey, error: %d\n", err
);
228 *pkey
= rsp
.query_pkey_resp
.pkey
;
233 enum rdma_link_layer
pvrdma_port_link_layer(struct ib_device
*ibdev
,
236 return IB_LINK_LAYER_ETHERNET
;
239 int pvrdma_modify_device(struct ib_device
*ibdev
, int mask
,
240 struct ib_device_modify
*props
)
244 if (mask
& ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID
|
245 IB_DEVICE_MODIFY_NODE_DESC
)) {
246 dev_warn(&to_vdev(ibdev
)->pdev
->dev
,
247 "unsupported device modify mask %#x\n", mask
);
251 if (mask
& IB_DEVICE_MODIFY_NODE_DESC
) {
252 spin_lock_irqsave(&to_vdev(ibdev
)->desc_lock
, flags
);
253 memcpy(ibdev
->node_desc
, props
->node_desc
, 64);
254 spin_unlock_irqrestore(&to_vdev(ibdev
)->desc_lock
, flags
);
257 if (mask
& IB_DEVICE_MODIFY_SYS_IMAGE_GUID
) {
258 mutex_lock(&to_vdev(ibdev
)->port_mutex
);
259 to_vdev(ibdev
)->sys_image_guid
=
260 cpu_to_be64(props
->sys_image_guid
);
261 mutex_unlock(&to_vdev(ibdev
)->port_mutex
);
268 * pvrdma_modify_port - modify device port attributes
269 * @ibdev: the device to modify
270 * @port: the port number
271 * @mask: attributes to modify
272 * @props: the device properties
274 * @return: 0 on success, otherwise negative errno
276 int pvrdma_modify_port(struct ib_device
*ibdev
, u8 port
, int mask
,
277 struct ib_port_modify
*props
)
279 struct ib_port_attr attr
;
280 struct pvrdma_dev
*vdev
= to_vdev(ibdev
);
283 if (mask
& ~IB_PORT_SHUTDOWN
) {
284 dev_warn(&vdev
->pdev
->dev
,
285 "unsupported port modify mask %#x\n", mask
);
289 mutex_lock(&vdev
->port_mutex
);
290 ret
= ib_query_port(ibdev
, port
, &attr
);
294 vdev
->port_cap_mask
|= props
->set_port_cap_mask
;
295 vdev
->port_cap_mask
&= ~props
->clr_port_cap_mask
;
297 if (mask
& IB_PORT_SHUTDOWN
)
298 vdev
->ib_active
= false;
301 mutex_unlock(&vdev
->port_mutex
);
306 * pvrdma_alloc_ucontext - allocate ucontext
307 * @ibdev: the IB device
310 * @return: the ib_ucontext pointer on success, otherwise errno.
312 struct ib_ucontext
*pvrdma_alloc_ucontext(struct ib_device
*ibdev
,
313 struct ib_udata
*udata
)
315 struct pvrdma_dev
*vdev
= to_vdev(ibdev
);
316 struct pvrdma_ucontext
*context
;
317 union pvrdma_cmd_req req
;
318 union pvrdma_cmd_resp rsp
;
319 struct pvrdma_cmd_create_uc
*cmd
= &req
.create_uc
;
320 struct pvrdma_cmd_create_uc_resp
*resp
= &rsp
.create_uc_resp
;
321 struct pvrdma_alloc_ucontext_resp uresp
= {0};
325 if (!vdev
->ib_active
)
326 return ERR_PTR(-EAGAIN
);
328 context
= kmalloc(sizeof(*context
), GFP_KERNEL
);
330 return ERR_PTR(-ENOMEM
);
333 ret
= pvrdma_uar_alloc(vdev
, &context
->uar
);
336 return ERR_PTR(-ENOMEM
);
339 /* get ctx_handle from host */
340 memset(cmd
, 0, sizeof(*cmd
));
341 cmd
->pfn
= context
->uar
.pfn
;
342 cmd
->hdr
.cmd
= PVRDMA_CMD_CREATE_UC
;
343 ret
= pvrdma_cmd_post(vdev
, &req
, &rsp
, PVRDMA_CMD_CREATE_UC_RESP
);
345 dev_warn(&vdev
->pdev
->dev
,
346 "could not create ucontext, error: %d\n", ret
);
351 context
->ctx_handle
= resp
->ctx_handle
;
353 /* copy back to user */
354 uresp
.qp_tab_size
= vdev
->dsr
->caps
.max_qp
;
355 ret
= ib_copy_to_udata(udata
, &uresp
, sizeof(uresp
));
357 pvrdma_uar_free(vdev
, &context
->uar
);
358 context
->ibucontext
.device
= ibdev
;
359 pvrdma_dealloc_ucontext(&context
->ibucontext
);
360 return ERR_PTR(-EFAULT
);
363 return &context
->ibucontext
;
366 pvrdma_uar_free(vdev
, &context
->uar
);
372 * pvrdma_dealloc_ucontext - deallocate ucontext
373 * @ibcontext: the ucontext
375 * @return: 0 on success, otherwise errno.
377 int pvrdma_dealloc_ucontext(struct ib_ucontext
*ibcontext
)
379 struct pvrdma_ucontext
*context
= to_vucontext(ibcontext
);
380 union pvrdma_cmd_req req
;
381 struct pvrdma_cmd_destroy_uc
*cmd
= &req
.destroy_uc
;
384 memset(cmd
, 0, sizeof(*cmd
));
385 cmd
->hdr
.cmd
= PVRDMA_CMD_DESTROY_UC
;
386 cmd
->ctx_handle
= context
->ctx_handle
;
388 ret
= pvrdma_cmd_post(context
->dev
, &req
, NULL
, 0);
390 dev_warn(&context
->dev
->pdev
->dev
,
391 "destroy ucontext failed, error: %d\n", ret
);
393 /* Free the UAR even if the device command failed */
394 pvrdma_uar_free(to_vdev(ibcontext
->device
), &context
->uar
);
401 * pvrdma_mmap - create mmap region
402 * @ibcontext: the user context
405 * @return: 0 on success, otherwise errno.
407 int pvrdma_mmap(struct ib_ucontext
*ibcontext
, struct vm_area_struct
*vma
)
409 struct pvrdma_ucontext
*context
= to_vucontext(ibcontext
);
410 unsigned long start
= vma
->vm_start
;
411 unsigned long size
= vma
->vm_end
- vma
->vm_start
;
412 unsigned long offset
= vma
->vm_pgoff
<< PAGE_SHIFT
;
414 dev_dbg(&context
->dev
->pdev
->dev
, "create mmap region\n");
416 if ((size
!= PAGE_SIZE
) || (offset
& ~PAGE_MASK
)) {
417 dev_warn(&context
->dev
->pdev
->dev
,
418 "invalid params for mmap region\n");
422 /* Map UAR to kernel space, VM_LOCKED? */
423 vma
->vm_flags
|= VM_DONTCOPY
| VM_DONTEXPAND
;
424 vma
->vm_page_prot
= pgprot_noncached(vma
->vm_page_prot
);
425 if (io_remap_pfn_range(vma
, start
, context
->uar
.pfn
, size
,
433 * pvrdma_alloc_pd - allocate protection domain
434 * @ibdev: the IB device
435 * @context: user context
438 * @return: the ib_pd protection domain pointer on success, otherwise errno.
440 struct ib_pd
*pvrdma_alloc_pd(struct ib_device
*ibdev
,
441 struct ib_ucontext
*context
,
442 struct ib_udata
*udata
)
444 struct pvrdma_pd
*pd
;
445 struct pvrdma_dev
*dev
= to_vdev(ibdev
);
446 union pvrdma_cmd_req req
;
447 union pvrdma_cmd_resp rsp
;
448 struct pvrdma_cmd_create_pd
*cmd
= &req
.create_pd
;
449 struct pvrdma_cmd_create_pd_resp
*resp
= &rsp
.create_pd_resp
;
453 /* Check allowed max pds */
454 if (!atomic_add_unless(&dev
->num_pds
, 1, dev
->dsr
->caps
.max_pd
))
455 return ERR_PTR(-ENOMEM
);
457 pd
= kmalloc(sizeof(*pd
), GFP_KERNEL
);
459 ptr
= ERR_PTR(-ENOMEM
);
463 memset(cmd
, 0, sizeof(*cmd
));
464 cmd
->hdr
.cmd
= PVRDMA_CMD_CREATE_PD
;
465 cmd
->ctx_handle
= (context
) ? to_vucontext(context
)->ctx_handle
: 0;
466 ret
= pvrdma_cmd_post(dev
, &req
, &rsp
, PVRDMA_CMD_CREATE_PD_RESP
);
468 dev_warn(&dev
->pdev
->dev
,
469 "failed to allocate protection domain, error: %d\n",
475 pd
->privileged
= !context
;
476 pd
->pd_handle
= resp
->pd_handle
;
477 pd
->pdn
= resp
->pd_handle
;
480 if (ib_copy_to_udata(udata
, &pd
->pdn
, sizeof(__u32
))) {
481 dev_warn(&dev
->pdev
->dev
,
482 "failed to copy back protection domain\n");
483 pvrdma_dealloc_pd(&pd
->ibpd
);
484 return ERR_PTR(-EFAULT
);
494 atomic_dec(&dev
->num_pds
);
499 * pvrdma_dealloc_pd - deallocate protection domain
500 * @pd: the protection domain to be released
502 * @return: 0 on success, otherwise errno.
504 int pvrdma_dealloc_pd(struct ib_pd
*pd
)
506 struct pvrdma_dev
*dev
= to_vdev(pd
->device
);
507 union pvrdma_cmd_req req
;
508 struct pvrdma_cmd_destroy_pd
*cmd
= &req
.destroy_pd
;
511 memset(cmd
, 0, sizeof(*cmd
));
512 cmd
->hdr
.cmd
= PVRDMA_CMD_DESTROY_PD
;
513 cmd
->pd_handle
= to_vpd(pd
)->pd_handle
;
515 ret
= pvrdma_cmd_post(dev
, &req
, NULL
, 0);
517 dev_warn(&dev
->pdev
->dev
,
518 "could not dealloc protection domain, error: %d\n",
522 atomic_dec(&dev
->num_pds
);
528 * pvrdma_create_ah - create an address handle
529 * @pd: the protection domain
530 * @ah_attr: the attributes of the AH
531 * @udata: user data blob
533 * @return: the ib_ah pointer on success, otherwise errno.
535 struct ib_ah
*pvrdma_create_ah(struct ib_pd
*pd
, struct rdma_ah_attr
*ah_attr
,
536 struct ib_udata
*udata
)
538 struct pvrdma_dev
*dev
= to_vdev(pd
->device
);
539 struct pvrdma_ah
*ah
;
540 const struct ib_global_route
*grh
;
541 u8 port_num
= rdma_ah_get_port_num(ah_attr
);
543 if (!(rdma_ah_get_ah_flags(ah_attr
) & IB_AH_GRH
))
544 return ERR_PTR(-EINVAL
);
546 grh
= rdma_ah_read_grh(ah_attr
);
547 if ((ah_attr
->type
!= RDMA_AH_ATTR_TYPE_ROCE
) ||
548 rdma_is_multicast_addr((struct in6_addr
*)grh
->dgid
.raw
))
549 return ERR_PTR(-EINVAL
);
551 if (!atomic_add_unless(&dev
->num_ahs
, 1, dev
->dsr
->caps
.max_ah
))
552 return ERR_PTR(-ENOMEM
);
554 ah
= kzalloc(sizeof(*ah
), GFP_KERNEL
);
556 atomic_dec(&dev
->num_ahs
);
557 return ERR_PTR(-ENOMEM
);
560 ah
->av
.port_pd
= to_vpd(pd
)->pd_handle
| (port_num
<< 24);
561 ah
->av
.src_path_bits
= rdma_ah_get_path_bits(ah_attr
);
562 ah
->av
.src_path_bits
|= 0x80;
563 ah
->av
.gid_index
= grh
->sgid_index
;
564 ah
->av
.hop_limit
= grh
->hop_limit
;
565 ah
->av
.sl_tclass_flowlabel
= (grh
->traffic_class
<< 20) |
567 memcpy(ah
->av
.dgid
, grh
->dgid
.raw
, 16);
568 memcpy(ah
->av
.dmac
, ah_attr
->roce
.dmac
, ETH_ALEN
);
570 ah
->ibah
.device
= pd
->device
;
572 ah
->ibah
.uobject
= NULL
;
578 * pvrdma_destroy_ah - destroy an address handle
579 * @ah: the address handle to destroyed
581 * @return: 0 on success.
583 int pvrdma_destroy_ah(struct ib_ah
*ah
)
585 struct pvrdma_dev
*dev
= to_vdev(ah
->device
);
588 atomic_dec(&dev
->num_ahs
);