2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
17 * The BSD 2-Clause License
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 #include <linux/inet.h>
49 #include <rdma/ib_addr.h>
50 #include <rdma/ib_smi.h>
51 #include <rdma/ib_user_verbs.h>
52 #include <rdma/vmw_pvrdma-abi.h>
53 #include <rdma/uverbs_ioctl.h>
58 * pvrdma_query_device - query device
59 * @ibdev: the device to query
60 * @props: the device properties
63 * @return: 0 on success, otherwise negative errno
65 int pvrdma_query_device(struct ib_device
*ibdev
,
66 struct ib_device_attr
*props
,
69 struct pvrdma_dev
*dev
= to_vdev(ibdev
);
71 if (uhw
->inlen
|| uhw
->outlen
)
74 props
->fw_ver
= dev
->dsr
->caps
.fw_ver
;
75 props
->sys_image_guid
= dev
->dsr
->caps
.sys_image_guid
;
76 props
->max_mr_size
= dev
->dsr
->caps
.max_mr_size
;
77 props
->page_size_cap
= dev
->dsr
->caps
.page_size_cap
;
78 props
->vendor_id
= dev
->dsr
->caps
.vendor_id
;
79 props
->vendor_part_id
= dev
->pdev
->device
;
80 props
->hw_ver
= dev
->dsr
->caps
.hw_ver
;
81 props
->max_qp
= dev
->dsr
->caps
.max_qp
;
82 props
->max_qp_wr
= dev
->dsr
->caps
.max_qp_wr
;
83 props
->device_cap_flags
= dev
->dsr
->caps
.device_cap_flags
;
84 props
->max_send_sge
= dev
->dsr
->caps
.max_sge
;
85 props
->max_recv_sge
= dev
->dsr
->caps
.max_sge
;
86 props
->max_sge_rd
= PVRDMA_GET_CAP(dev
, dev
->dsr
->caps
.max_sge
,
87 dev
->dsr
->caps
.max_sge_rd
);
88 props
->max_srq
= dev
->dsr
->caps
.max_srq
;
89 props
->max_srq_wr
= dev
->dsr
->caps
.max_srq_wr
;
90 props
->max_srq_sge
= dev
->dsr
->caps
.max_srq_sge
;
91 props
->max_cq
= dev
->dsr
->caps
.max_cq
;
92 props
->max_cqe
= dev
->dsr
->caps
.max_cqe
;
93 props
->max_mr
= dev
->dsr
->caps
.max_mr
;
94 props
->max_pd
= dev
->dsr
->caps
.max_pd
;
95 props
->max_qp_rd_atom
= dev
->dsr
->caps
.max_qp_rd_atom
;
96 props
->max_qp_init_rd_atom
= dev
->dsr
->caps
.max_qp_init_rd_atom
;
98 dev
->dsr
->caps
.atomic_ops
&
99 (PVRDMA_ATOMIC_OP_COMP_SWAP
| PVRDMA_ATOMIC_OP_FETCH_ADD
) ?
100 IB_ATOMIC_HCA
: IB_ATOMIC_NONE
;
101 props
->masked_atomic_cap
= props
->atomic_cap
;
102 props
->max_ah
= dev
->dsr
->caps
.max_ah
;
103 props
->max_pkeys
= dev
->dsr
->caps
.max_pkeys
;
104 props
->local_ca_ack_delay
= dev
->dsr
->caps
.local_ca_ack_delay
;
105 if ((dev
->dsr
->caps
.bmme_flags
& PVRDMA_BMME_FLAG_LOCAL_INV
) &&
106 (dev
->dsr
->caps
.bmme_flags
& PVRDMA_BMME_FLAG_REMOTE_INV
) &&
107 (dev
->dsr
->caps
.bmme_flags
& PVRDMA_BMME_FLAG_FAST_REG_WR
)) {
108 props
->device_cap_flags
|= IB_DEVICE_MEM_MGT_EXTENSIONS
;
109 props
->max_fast_reg_page_list_len
= PVRDMA_GET_CAP(dev
,
110 PVRDMA_MAX_FAST_REG_PAGES
,
111 dev
->dsr
->caps
.max_fast_reg_page_list_len
);
114 props
->device_cap_flags
|= IB_DEVICE_PORT_ACTIVE_EVENT
|
115 IB_DEVICE_RC_RNR_NAK_GEN
;
121 * pvrdma_query_port - query device port attributes
122 * @ibdev: the device to query
123 * @port: the port number
124 * @props: the device properties
126 * @return: 0 on success, otherwise negative errno
128 int pvrdma_query_port(struct ib_device
*ibdev
, u8 port
,
129 struct ib_port_attr
*props
)
131 struct pvrdma_dev
*dev
= to_vdev(ibdev
);
132 union pvrdma_cmd_req req
;
133 union pvrdma_cmd_resp rsp
;
134 struct pvrdma_cmd_query_port
*cmd
= &req
.query_port
;
135 struct pvrdma_cmd_query_port_resp
*resp
= &rsp
.query_port_resp
;
138 memset(cmd
, 0, sizeof(*cmd
));
139 cmd
->hdr
.cmd
= PVRDMA_CMD_QUERY_PORT
;
140 cmd
->port_num
= port
;
142 err
= pvrdma_cmd_post(dev
, &req
, &rsp
, PVRDMA_CMD_QUERY_PORT_RESP
);
144 dev_warn(&dev
->pdev
->dev
,
145 "could not query port, error: %d\n", err
);
149 /* props being zeroed by the caller, avoid zeroing it here */
151 props
->state
= pvrdma_port_state_to_ib(resp
->attrs
.state
);
152 props
->max_mtu
= pvrdma_mtu_to_ib(resp
->attrs
.max_mtu
);
153 props
->active_mtu
= pvrdma_mtu_to_ib(resp
->attrs
.active_mtu
);
154 props
->gid_tbl_len
= resp
->attrs
.gid_tbl_len
;
155 props
->port_cap_flags
=
156 pvrdma_port_cap_flags_to_ib(resp
->attrs
.port_cap_flags
);
157 props
->port_cap_flags
|= IB_PORT_CM_SUP
;
158 props
->ip_gids
= true;
159 props
->max_msg_sz
= resp
->attrs
.max_msg_sz
;
160 props
->bad_pkey_cntr
= resp
->attrs
.bad_pkey_cntr
;
161 props
->qkey_viol_cntr
= resp
->attrs
.qkey_viol_cntr
;
162 props
->pkey_tbl_len
= resp
->attrs
.pkey_tbl_len
;
163 props
->lid
= resp
->attrs
.lid
;
164 props
->sm_lid
= resp
->attrs
.sm_lid
;
165 props
->lmc
= resp
->attrs
.lmc
;
166 props
->max_vl_num
= resp
->attrs
.max_vl_num
;
167 props
->sm_sl
= resp
->attrs
.sm_sl
;
168 props
->subnet_timeout
= resp
->attrs
.subnet_timeout
;
169 props
->init_type_reply
= resp
->attrs
.init_type_reply
;
170 props
->active_width
= pvrdma_port_width_to_ib(resp
->attrs
.active_width
);
171 props
->active_speed
= pvrdma_port_speed_to_ib(resp
->attrs
.active_speed
);
172 props
->phys_state
= resp
->attrs
.phys_state
;
178 * pvrdma_query_gid - query device gid
179 * @ibdev: the device to query
180 * @port: the port number
182 * @gid: the device gid value
184 * @return: 0 on success, otherwise negative errno
186 int pvrdma_query_gid(struct ib_device
*ibdev
, u8 port
, int index
,
189 struct pvrdma_dev
*dev
= to_vdev(ibdev
);
191 if (index
>= dev
->dsr
->caps
.gid_tbl_len
)
194 memcpy(gid
, &dev
->sgid_tbl
[index
], sizeof(union ib_gid
));
200 * pvrdma_query_pkey - query device port's P_Key table
201 * @ibdev: the device to query
202 * @port: the port number
204 * @pkey: the device P_Key value
206 * @return: 0 on success, otherwise negative errno
208 int pvrdma_query_pkey(struct ib_device
*ibdev
, u8 port
, u16 index
,
212 union pvrdma_cmd_req req
;
213 union pvrdma_cmd_resp rsp
;
214 struct pvrdma_cmd_query_pkey
*cmd
= &req
.query_pkey
;
216 memset(cmd
, 0, sizeof(*cmd
));
217 cmd
->hdr
.cmd
= PVRDMA_CMD_QUERY_PKEY
;
218 cmd
->port_num
= port
;
221 err
= pvrdma_cmd_post(to_vdev(ibdev
), &req
, &rsp
,
222 PVRDMA_CMD_QUERY_PKEY_RESP
);
224 dev_warn(&to_vdev(ibdev
)->pdev
->dev
,
225 "could not query pkey, error: %d\n", err
);
229 *pkey
= rsp
.query_pkey_resp
.pkey
;
234 enum rdma_link_layer
pvrdma_port_link_layer(struct ib_device
*ibdev
,
237 return IB_LINK_LAYER_ETHERNET
;
240 int pvrdma_modify_device(struct ib_device
*ibdev
, int mask
,
241 struct ib_device_modify
*props
)
245 if (mask
& ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID
|
246 IB_DEVICE_MODIFY_NODE_DESC
)) {
247 dev_warn(&to_vdev(ibdev
)->pdev
->dev
,
248 "unsupported device modify mask %#x\n", mask
);
252 if (mask
& IB_DEVICE_MODIFY_NODE_DESC
) {
253 spin_lock_irqsave(&to_vdev(ibdev
)->desc_lock
, flags
);
254 memcpy(ibdev
->node_desc
, props
->node_desc
, 64);
255 spin_unlock_irqrestore(&to_vdev(ibdev
)->desc_lock
, flags
);
258 if (mask
& IB_DEVICE_MODIFY_SYS_IMAGE_GUID
) {
259 mutex_lock(&to_vdev(ibdev
)->port_mutex
);
260 to_vdev(ibdev
)->sys_image_guid
=
261 cpu_to_be64(props
->sys_image_guid
);
262 mutex_unlock(&to_vdev(ibdev
)->port_mutex
);
269 * pvrdma_modify_port - modify device port attributes
270 * @ibdev: the device to modify
271 * @port: the port number
272 * @mask: attributes to modify
273 * @props: the device properties
275 * @return: 0 on success, otherwise negative errno
277 int pvrdma_modify_port(struct ib_device
*ibdev
, u8 port
, int mask
,
278 struct ib_port_modify
*props
)
280 struct ib_port_attr attr
;
281 struct pvrdma_dev
*vdev
= to_vdev(ibdev
);
284 if (mask
& ~IB_PORT_SHUTDOWN
) {
285 dev_warn(&vdev
->pdev
->dev
,
286 "unsupported port modify mask %#x\n", mask
);
290 mutex_lock(&vdev
->port_mutex
);
291 ret
= ib_query_port(ibdev
, port
, &attr
);
295 vdev
->port_cap_mask
|= props
->set_port_cap_mask
;
296 vdev
->port_cap_mask
&= ~props
->clr_port_cap_mask
;
298 if (mask
& IB_PORT_SHUTDOWN
)
299 vdev
->ib_active
= false;
302 mutex_unlock(&vdev
->port_mutex
);
307 * pvrdma_alloc_ucontext - allocate ucontext
308 * @uctx: the uverbs countext
311 * @return: zero on success, otherwise errno.
313 int pvrdma_alloc_ucontext(struct ib_ucontext
*uctx
, struct ib_udata
*udata
)
315 struct ib_device
*ibdev
= uctx
->device
;
316 struct pvrdma_dev
*vdev
= to_vdev(ibdev
);
317 struct pvrdma_ucontext
*context
= to_vucontext(uctx
);
318 union pvrdma_cmd_req req
= {};
319 union pvrdma_cmd_resp rsp
= {};
320 struct pvrdma_cmd_create_uc
*cmd
= &req
.create_uc
;
321 struct pvrdma_cmd_create_uc_resp
*resp
= &rsp
.create_uc_resp
;
322 struct pvrdma_alloc_ucontext_resp uresp
= {};
325 if (!vdev
->ib_active
)
329 ret
= pvrdma_uar_alloc(vdev
, &context
->uar
);
333 /* get ctx_handle from host */
334 if (vdev
->dsr_version
< PVRDMA_PPN64_VERSION
)
335 cmd
->pfn
= context
->uar
.pfn
;
337 cmd
->pfn64
= context
->uar
.pfn
;
339 cmd
->hdr
.cmd
= PVRDMA_CMD_CREATE_UC
;
340 ret
= pvrdma_cmd_post(vdev
, &req
, &rsp
, PVRDMA_CMD_CREATE_UC_RESP
);
342 dev_warn(&vdev
->pdev
->dev
,
343 "could not create ucontext, error: %d\n", ret
);
347 context
->ctx_handle
= resp
->ctx_handle
;
349 /* copy back to user */
350 uresp
.qp_tab_size
= vdev
->dsr
->caps
.max_qp
;
351 ret
= ib_copy_to_udata(udata
, &uresp
, sizeof(uresp
));
353 pvrdma_uar_free(vdev
, &context
->uar
);
354 pvrdma_dealloc_ucontext(&context
->ibucontext
);
361 pvrdma_uar_free(vdev
, &context
->uar
);
366 * pvrdma_dealloc_ucontext - deallocate ucontext
367 * @ibcontext: the ucontext
369 void pvrdma_dealloc_ucontext(struct ib_ucontext
*ibcontext
)
371 struct pvrdma_ucontext
*context
= to_vucontext(ibcontext
);
372 union pvrdma_cmd_req req
= {};
373 struct pvrdma_cmd_destroy_uc
*cmd
= &req
.destroy_uc
;
376 cmd
->hdr
.cmd
= PVRDMA_CMD_DESTROY_UC
;
377 cmd
->ctx_handle
= context
->ctx_handle
;
379 ret
= pvrdma_cmd_post(context
->dev
, &req
, NULL
, 0);
381 dev_warn(&context
->dev
->pdev
->dev
,
382 "destroy ucontext failed, error: %d\n", ret
);
384 /* Free the UAR even if the device command failed */
385 pvrdma_uar_free(to_vdev(ibcontext
->device
), &context
->uar
);
389 * pvrdma_mmap - create mmap region
390 * @ibcontext: the user context
393 * @return: 0 on success, otherwise errno.
395 int pvrdma_mmap(struct ib_ucontext
*ibcontext
, struct vm_area_struct
*vma
)
397 struct pvrdma_ucontext
*context
= to_vucontext(ibcontext
);
398 unsigned long start
= vma
->vm_start
;
399 unsigned long size
= vma
->vm_end
- vma
->vm_start
;
400 unsigned long offset
= vma
->vm_pgoff
<< PAGE_SHIFT
;
402 dev_dbg(&context
->dev
->pdev
->dev
, "create mmap region\n");
404 if ((size
!= PAGE_SIZE
) || (offset
& ~PAGE_MASK
)) {
405 dev_warn(&context
->dev
->pdev
->dev
,
406 "invalid params for mmap region\n");
410 /* Map UAR to kernel space, VM_LOCKED? */
411 vma
->vm_flags
|= VM_DONTCOPY
| VM_DONTEXPAND
;
412 vma
->vm_page_prot
= pgprot_noncached(vma
->vm_page_prot
);
413 if (io_remap_pfn_range(vma
, start
, context
->uar
.pfn
, size
,
421 * pvrdma_alloc_pd - allocate protection domain
425 * @return: the ib_pd protection domain pointer on success, otherwise errno.
427 int pvrdma_alloc_pd(struct ib_pd
*ibpd
, struct ib_udata
*udata
)
429 struct ib_device
*ibdev
= ibpd
->device
;
430 struct pvrdma_pd
*pd
= to_vpd(ibpd
);
431 struct pvrdma_dev
*dev
= to_vdev(ibdev
);
432 union pvrdma_cmd_req req
= {};
433 union pvrdma_cmd_resp rsp
= {};
434 struct pvrdma_cmd_create_pd
*cmd
= &req
.create_pd
;
435 struct pvrdma_cmd_create_pd_resp
*resp
= &rsp
.create_pd_resp
;
436 struct pvrdma_alloc_pd_resp pd_resp
= {0};
438 struct pvrdma_ucontext
*context
= rdma_udata_to_drv_context(
439 udata
, struct pvrdma_ucontext
, ibucontext
);
441 /* Check allowed max pds */
442 if (!atomic_add_unless(&dev
->num_pds
, 1, dev
->dsr
->caps
.max_pd
))
445 cmd
->hdr
.cmd
= PVRDMA_CMD_CREATE_PD
;
446 cmd
->ctx_handle
= context
? context
->ctx_handle
: 0;
447 ret
= pvrdma_cmd_post(dev
, &req
, &rsp
, PVRDMA_CMD_CREATE_PD_RESP
);
449 dev_warn(&dev
->pdev
->dev
,
450 "failed to allocate protection domain, error: %d\n",
455 pd
->privileged
= !udata
;
456 pd
->pd_handle
= resp
->pd_handle
;
457 pd
->pdn
= resp
->pd_handle
;
458 pd_resp
.pdn
= resp
->pd_handle
;
461 if (ib_copy_to_udata(udata
, &pd_resp
, sizeof(pd_resp
))) {
462 dev_warn(&dev
->pdev
->dev
,
463 "failed to copy back protection domain\n");
464 pvrdma_dealloc_pd(&pd
->ibpd
, udata
);
473 atomic_dec(&dev
->num_pds
);
478 * pvrdma_dealloc_pd - deallocate protection domain
479 * @pd: the protection domain to be released
480 * @udata: user data or null for kernel object
482 * @return: 0 on success, otherwise errno.
484 void pvrdma_dealloc_pd(struct ib_pd
*pd
, struct ib_udata
*udata
)
486 struct pvrdma_dev
*dev
= to_vdev(pd
->device
);
487 union pvrdma_cmd_req req
= {};
488 struct pvrdma_cmd_destroy_pd
*cmd
= &req
.destroy_pd
;
491 cmd
->hdr
.cmd
= PVRDMA_CMD_DESTROY_PD
;
492 cmd
->pd_handle
= to_vpd(pd
)->pd_handle
;
494 ret
= pvrdma_cmd_post(dev
, &req
, NULL
, 0);
496 dev_warn(&dev
->pdev
->dev
,
497 "could not dealloc protection domain, error: %d\n",
500 atomic_dec(&dev
->num_pds
);
504 * pvrdma_create_ah - create an address handle
505 * @pd: the protection domain
506 * @ah_attr: the attributes of the AH
507 * @udata: user data blob
508 * @flags: create address handle flags (see enum rdma_create_ah_flags)
510 * @return: 0 on success, otherwise errno.
512 int pvrdma_create_ah(struct ib_ah
*ibah
, struct rdma_ah_attr
*ah_attr
,
513 u32 flags
, struct ib_udata
*udata
)
515 struct pvrdma_dev
*dev
= to_vdev(ibah
->device
);
516 struct pvrdma_ah
*ah
= to_vah(ibah
);
517 const struct ib_global_route
*grh
;
518 u8 port_num
= rdma_ah_get_port_num(ah_attr
);
520 if (!(rdma_ah_get_ah_flags(ah_attr
) & IB_AH_GRH
))
523 grh
= rdma_ah_read_grh(ah_attr
);
524 if ((ah_attr
->type
!= RDMA_AH_ATTR_TYPE_ROCE
) ||
525 rdma_is_multicast_addr((struct in6_addr
*)grh
->dgid
.raw
))
528 if (!atomic_add_unless(&dev
->num_ahs
, 1, dev
->dsr
->caps
.max_ah
))
531 ah
->av
.port_pd
= to_vpd(ibah
->pd
)->pd_handle
| (port_num
<< 24);
532 ah
->av
.src_path_bits
= rdma_ah_get_path_bits(ah_attr
);
533 ah
->av
.src_path_bits
|= 0x80;
534 ah
->av
.gid_index
= grh
->sgid_index
;
535 ah
->av
.hop_limit
= grh
->hop_limit
;
536 ah
->av
.sl_tclass_flowlabel
= (grh
->traffic_class
<< 20) |
538 memcpy(ah
->av
.dgid
, grh
->dgid
.raw
, 16);
539 memcpy(ah
->av
.dmac
, ah_attr
->roce
.dmac
, ETH_ALEN
);
545 * pvrdma_destroy_ah - destroy an address handle
546 * @ah: the address handle to destroyed
547 * @flags: destroy address handle flags (see enum rdma_destroy_ah_flags)
550 void pvrdma_destroy_ah(struct ib_ah
*ah
, u32 flags
)
552 struct pvrdma_dev
*dev
= to_vdev(ah
->device
);
554 atomic_dec(&dev
->num_ahs
);