2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
17 * The BSD 2-Clause License
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
46 #include <linux/errno.h>
47 #include <linux/inetdevice.h>
48 #include <linux/init.h>
49 #include <linux/module.h>
50 #include <linux/slab.h>
51 #include <rdma/ib_addr.h>
52 #include <rdma/ib_smi.h>
53 #include <rdma/ib_user_verbs.h>
54 #include <net/addrconf.h>
58 #define DRV_NAME "vmw_pvrdma"
59 #define DRV_VERSION "1.0.1.0-k"
61 static DEFINE_MUTEX(pvrdma_device_list_lock
);
62 static LIST_HEAD(pvrdma_device_list
);
63 static struct workqueue_struct
*event_wq
;
65 static int pvrdma_add_gid(const struct ib_gid_attr
*attr
, void **context
);
66 static int pvrdma_del_gid(const struct ib_gid_attr
*attr
, void **context
);
68 static ssize_t
hca_type_show(struct device
*device
,
69 struct device_attribute
*attr
, char *buf
)
71 return sprintf(buf
, "VMW_PVRDMA-%s\n", DRV_VERSION
);
73 static DEVICE_ATTR_RO(hca_type
);
75 static ssize_t
hw_rev_show(struct device
*device
,
76 struct device_attribute
*attr
, char *buf
)
78 return sprintf(buf
, "%d\n", PVRDMA_REV_ID
);
80 static DEVICE_ATTR_RO(hw_rev
);
82 static ssize_t
board_id_show(struct device
*device
,
83 struct device_attribute
*attr
, char *buf
)
85 return sprintf(buf
, "%d\n", PVRDMA_BOARD_ID
);
87 static DEVICE_ATTR_RO(board_id
);
89 static struct attribute
*pvrdma_class_attributes
[] = {
90 &dev_attr_hw_rev
.attr
,
91 &dev_attr_hca_type
.attr
,
92 &dev_attr_board_id
.attr
,
96 static const struct attribute_group pvrdma_attr_group
= {
97 .attrs
= pvrdma_class_attributes
,
100 static void pvrdma_get_fw_ver_str(struct ib_device
*device
, char *str
)
102 struct pvrdma_dev
*dev
=
103 container_of(device
, struct pvrdma_dev
, ib_dev
);
104 snprintf(str
, IB_FW_VERSION_NAME_MAX
, "%d.%d.%d\n",
105 (int) (dev
->dsr
->caps
.fw_ver
>> 32),
106 (int) (dev
->dsr
->caps
.fw_ver
>> 16) & 0xffff,
107 (int) dev
->dsr
->caps
.fw_ver
& 0xffff);
110 static int pvrdma_init_device(struct pvrdma_dev
*dev
)
112 /* Initialize some device related stuff */
113 spin_lock_init(&dev
->cmd_lock
);
114 sema_init(&dev
->cmd_sema
, 1);
115 atomic_set(&dev
->num_qps
, 0);
116 atomic_set(&dev
->num_srqs
, 0);
117 atomic_set(&dev
->num_cqs
, 0);
118 atomic_set(&dev
->num_pds
, 0);
119 atomic_set(&dev
->num_ahs
, 0);
124 static int pvrdma_port_immutable(struct ib_device
*ibdev
, u8 port_num
,
125 struct ib_port_immutable
*immutable
)
127 struct pvrdma_dev
*dev
= to_vdev(ibdev
);
128 struct ib_port_attr attr
;
131 if (dev
->dsr
->caps
.gid_types
== PVRDMA_GID_TYPE_FLAG_ROCE_V1
)
132 immutable
->core_cap_flags
|= RDMA_CORE_PORT_IBA_ROCE
;
133 else if (dev
->dsr
->caps
.gid_types
== PVRDMA_GID_TYPE_FLAG_ROCE_V2
)
134 immutable
->core_cap_flags
|= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP
;
136 err
= ib_query_port(ibdev
, port_num
, &attr
);
140 immutable
->pkey_tbl_len
= attr
.pkey_tbl_len
;
141 immutable
->gid_tbl_len
= attr
.gid_tbl_len
;
142 immutable
->max_mad_size
= IB_MGMT_MAD_SIZE
;
146 static const struct ib_device_ops pvrdma_dev_ops
= {
147 .owner
= THIS_MODULE
,
148 .driver_id
= RDMA_DRIVER_VMW_PVRDMA
,
149 .uverbs_abi_ver
= PVRDMA_UVERBS_ABI_VERSION
,
151 .add_gid
= pvrdma_add_gid
,
152 .alloc_mr
= pvrdma_alloc_mr
,
153 .alloc_pd
= pvrdma_alloc_pd
,
154 .alloc_ucontext
= pvrdma_alloc_ucontext
,
155 .create_ah
= pvrdma_create_ah
,
156 .create_cq
= pvrdma_create_cq
,
157 .create_qp
= pvrdma_create_qp
,
158 .dealloc_pd
= pvrdma_dealloc_pd
,
159 .dealloc_ucontext
= pvrdma_dealloc_ucontext
,
160 .del_gid
= pvrdma_del_gid
,
161 .dereg_mr
= pvrdma_dereg_mr
,
162 .destroy_ah
= pvrdma_destroy_ah
,
163 .destroy_cq
= pvrdma_destroy_cq
,
164 .destroy_qp
= pvrdma_destroy_qp
,
165 .get_dev_fw_str
= pvrdma_get_fw_ver_str
,
166 .get_dma_mr
= pvrdma_get_dma_mr
,
167 .get_link_layer
= pvrdma_port_link_layer
,
168 .get_port_immutable
= pvrdma_port_immutable
,
169 .map_mr_sg
= pvrdma_map_mr_sg
,
171 .modify_port
= pvrdma_modify_port
,
172 .modify_qp
= pvrdma_modify_qp
,
173 .poll_cq
= pvrdma_poll_cq
,
174 .post_recv
= pvrdma_post_recv
,
175 .post_send
= pvrdma_post_send
,
176 .query_device
= pvrdma_query_device
,
177 .query_gid
= pvrdma_query_gid
,
178 .query_pkey
= pvrdma_query_pkey
,
179 .query_port
= pvrdma_query_port
,
180 .query_qp
= pvrdma_query_qp
,
181 .reg_user_mr
= pvrdma_reg_user_mr
,
182 .req_notify_cq
= pvrdma_req_notify_cq
,
184 INIT_RDMA_OBJ_SIZE(ib_ah
, pvrdma_ah
, ibah
),
185 INIT_RDMA_OBJ_SIZE(ib_cq
, pvrdma_cq
, ibcq
),
186 INIT_RDMA_OBJ_SIZE(ib_pd
, pvrdma_pd
, ibpd
),
187 INIT_RDMA_OBJ_SIZE(ib_ucontext
, pvrdma_ucontext
, ibucontext
),
190 static const struct ib_device_ops pvrdma_dev_srq_ops
= {
191 .create_srq
= pvrdma_create_srq
,
192 .destroy_srq
= pvrdma_destroy_srq
,
193 .modify_srq
= pvrdma_modify_srq
,
194 .query_srq
= pvrdma_query_srq
,
196 INIT_RDMA_OBJ_SIZE(ib_srq
, pvrdma_srq
, ibsrq
),
199 static int pvrdma_register_device(struct pvrdma_dev
*dev
)
203 dev
->ib_dev
.node_guid
= dev
->dsr
->caps
.node_guid
;
204 dev
->sys_image_guid
= dev
->dsr
->caps
.sys_image_guid
;
206 dev
->ib_dev
.num_comp_vectors
= 1;
207 dev
->ib_dev
.dev
.parent
= &dev
->pdev
->dev
;
208 dev
->ib_dev
.uverbs_cmd_mask
=
209 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT
) |
210 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE
) |
211 (1ull << IB_USER_VERBS_CMD_QUERY_PORT
) |
212 (1ull << IB_USER_VERBS_CMD_ALLOC_PD
) |
213 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD
) |
214 (1ull << IB_USER_VERBS_CMD_REG_MR
) |
215 (1ull << IB_USER_VERBS_CMD_DEREG_MR
) |
216 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL
) |
217 (1ull << IB_USER_VERBS_CMD_CREATE_CQ
) |
218 (1ull << IB_USER_VERBS_CMD_POLL_CQ
) |
219 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ
) |
220 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ
) |
221 (1ull << IB_USER_VERBS_CMD_CREATE_QP
) |
222 (1ull << IB_USER_VERBS_CMD_MODIFY_QP
) |
223 (1ull << IB_USER_VERBS_CMD_QUERY_QP
) |
224 (1ull << IB_USER_VERBS_CMD_DESTROY_QP
) |
225 (1ull << IB_USER_VERBS_CMD_POST_SEND
) |
226 (1ull << IB_USER_VERBS_CMD_POST_RECV
) |
227 (1ull << IB_USER_VERBS_CMD_CREATE_AH
) |
228 (1ull << IB_USER_VERBS_CMD_DESTROY_AH
);
230 dev
->ib_dev
.node_type
= RDMA_NODE_IB_CA
;
231 dev
->ib_dev
.phys_port_cnt
= dev
->dsr
->caps
.phys_port_cnt
;
233 ib_set_device_ops(&dev
->ib_dev
, &pvrdma_dev_ops
);
235 mutex_init(&dev
->port_mutex
);
236 spin_lock_init(&dev
->desc_lock
);
238 dev
->cq_tbl
= kcalloc(dev
->dsr
->caps
.max_cq
, sizeof(struct pvrdma_cq
*),
242 spin_lock_init(&dev
->cq_tbl_lock
);
244 dev
->qp_tbl
= kcalloc(dev
->dsr
->caps
.max_qp
, sizeof(struct pvrdma_qp
*),
248 spin_lock_init(&dev
->qp_tbl_lock
);
250 /* Check if SRQ is supported by backend */
251 if (dev
->dsr
->caps
.max_srq
) {
252 dev
->ib_dev
.uverbs_cmd_mask
|=
253 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ
) |
254 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ
) |
255 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ
) |
256 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ
) |
257 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV
);
259 ib_set_device_ops(&dev
->ib_dev
, &pvrdma_dev_srq_ops
);
261 dev
->srq_tbl
= kcalloc(dev
->dsr
->caps
.max_srq
,
262 sizeof(struct pvrdma_srq
*),
267 ret
= ib_device_set_netdev(&dev
->ib_dev
, dev
->netdev
, 1);
270 spin_lock_init(&dev
->srq_tbl_lock
);
271 rdma_set_device_sysfs_group(&dev
->ib_dev
, &pvrdma_attr_group
);
273 ret
= ib_register_device(&dev
->ib_dev
, "vmw_pvrdma%d");
277 dev
->ib_active
= true;
291 static irqreturn_t
pvrdma_intr0_handler(int irq
, void *dev_id
)
293 u32 icr
= PVRDMA_INTR_CAUSE_RESPONSE
;
294 struct pvrdma_dev
*dev
= dev_id
;
296 dev_dbg(&dev
->pdev
->dev
, "interrupt 0 (response) handler\n");
298 if (!dev
->pdev
->msix_enabled
) {
300 icr
= pvrdma_read_reg(dev
, PVRDMA_REG_ICR
);
305 if (icr
== PVRDMA_INTR_CAUSE_RESPONSE
)
306 complete(&dev
->cmd_done
);
311 static void pvrdma_qp_event(struct pvrdma_dev
*dev
, u32 qpn
, int type
)
313 struct pvrdma_qp
*qp
;
316 spin_lock_irqsave(&dev
->qp_tbl_lock
, flags
);
317 qp
= dev
->qp_tbl
[qpn
% dev
->dsr
->caps
.max_qp
];
319 refcount_inc(&qp
->refcnt
);
320 spin_unlock_irqrestore(&dev
->qp_tbl_lock
, flags
);
322 if (qp
&& qp
->ibqp
.event_handler
) {
323 struct ib_qp
*ibqp
= &qp
->ibqp
;
326 e
.device
= ibqp
->device
;
328 e
.event
= type
; /* 1:1 mapping for now. */
329 ibqp
->event_handler(&e
, ibqp
->qp_context
);
332 if (refcount_dec_and_test(&qp
->refcnt
))
337 static void pvrdma_cq_event(struct pvrdma_dev
*dev
, u32 cqn
, int type
)
339 struct pvrdma_cq
*cq
;
342 spin_lock_irqsave(&dev
->cq_tbl_lock
, flags
);
343 cq
= dev
->cq_tbl
[cqn
% dev
->dsr
->caps
.max_cq
];
345 refcount_inc(&cq
->refcnt
);
346 spin_unlock_irqrestore(&dev
->cq_tbl_lock
, flags
);
348 if (cq
&& cq
->ibcq
.event_handler
) {
349 struct ib_cq
*ibcq
= &cq
->ibcq
;
352 e
.device
= ibcq
->device
;
354 e
.event
= type
; /* 1:1 mapping for now. */
355 ibcq
->event_handler(&e
, ibcq
->cq_context
);
358 if (refcount_dec_and_test(&cq
->refcnt
))
363 static void pvrdma_srq_event(struct pvrdma_dev
*dev
, u32 srqn
, int type
)
365 struct pvrdma_srq
*srq
;
368 spin_lock_irqsave(&dev
->srq_tbl_lock
, flags
);
370 srq
= dev
->srq_tbl
[srqn
% dev
->dsr
->caps
.max_srq
];
374 refcount_inc(&srq
->refcnt
);
375 spin_unlock_irqrestore(&dev
->srq_tbl_lock
, flags
);
377 if (srq
&& srq
->ibsrq
.event_handler
) {
378 struct ib_srq
*ibsrq
= &srq
->ibsrq
;
381 e
.device
= ibsrq
->device
;
382 e
.element
.srq
= ibsrq
;
383 e
.event
= type
; /* 1:1 mapping for now. */
384 ibsrq
->event_handler(&e
, ibsrq
->srq_context
);
387 if (refcount_dec_and_test(&srq
->refcnt
))
388 complete(&srq
->free
);
392 static void pvrdma_dispatch_event(struct pvrdma_dev
*dev
, int port
,
393 enum ib_event_type event
)
395 struct ib_event ib_event
;
397 memset(&ib_event
, 0, sizeof(ib_event
));
398 ib_event
.device
= &dev
->ib_dev
;
399 ib_event
.element
.port_num
= port
;
400 ib_event
.event
= event
;
401 ib_dispatch_event(&ib_event
);
404 static void pvrdma_dev_event(struct pvrdma_dev
*dev
, u8 port
, int type
)
406 if (port
< 1 || port
> dev
->dsr
->caps
.phys_port_cnt
) {
407 dev_warn(&dev
->pdev
->dev
, "event on port %d\n", port
);
411 pvrdma_dispatch_event(dev
, port
, type
);
414 static inline struct pvrdma_eqe
*get_eqe(struct pvrdma_dev
*dev
, unsigned int i
)
416 return (struct pvrdma_eqe
*)pvrdma_page_dir_get_ptr(
419 sizeof(struct pvrdma_eqe
) * i
);
422 static irqreturn_t
pvrdma_intr1_handler(int irq
, void *dev_id
)
424 struct pvrdma_dev
*dev
= dev_id
;
425 struct pvrdma_ring
*ring
= &dev
->async_ring_state
->rx
;
426 int ring_slots
= (dev
->dsr
->async_ring_pages
.num_pages
- 1) *
427 PAGE_SIZE
/ sizeof(struct pvrdma_eqe
);
430 dev_dbg(&dev
->pdev
->dev
, "interrupt 1 (async event) handler\n");
433 * Don't process events until the IB device is registered. Otherwise
434 * we'll try to ib_dispatch_event() on an invalid device.
439 while (pvrdma_idx_ring_has_data(ring
, ring_slots
, &head
) > 0) {
440 struct pvrdma_eqe
*eqe
;
442 eqe
= get_eqe(dev
, head
);
445 case PVRDMA_EVENT_QP_FATAL
:
446 case PVRDMA_EVENT_QP_REQ_ERR
:
447 case PVRDMA_EVENT_QP_ACCESS_ERR
:
448 case PVRDMA_EVENT_COMM_EST
:
449 case PVRDMA_EVENT_SQ_DRAINED
:
450 case PVRDMA_EVENT_PATH_MIG
:
451 case PVRDMA_EVENT_PATH_MIG_ERR
:
452 case PVRDMA_EVENT_QP_LAST_WQE_REACHED
:
453 pvrdma_qp_event(dev
, eqe
->info
, eqe
->type
);
456 case PVRDMA_EVENT_CQ_ERR
:
457 pvrdma_cq_event(dev
, eqe
->info
, eqe
->type
);
460 case PVRDMA_EVENT_SRQ_ERR
:
461 case PVRDMA_EVENT_SRQ_LIMIT_REACHED
:
462 pvrdma_srq_event(dev
, eqe
->info
, eqe
->type
);
465 case PVRDMA_EVENT_PORT_ACTIVE
:
466 case PVRDMA_EVENT_PORT_ERR
:
467 case PVRDMA_EVENT_LID_CHANGE
:
468 case PVRDMA_EVENT_PKEY_CHANGE
:
469 case PVRDMA_EVENT_SM_CHANGE
:
470 case PVRDMA_EVENT_CLIENT_REREGISTER
:
471 case PVRDMA_EVENT_GID_CHANGE
:
472 pvrdma_dev_event(dev
, eqe
->info
, eqe
->type
);
475 case PVRDMA_EVENT_DEVICE_FATAL
:
476 pvrdma_dev_event(dev
, 1, eqe
->type
);
483 pvrdma_idx_ring_inc(&ring
->cons_head
, ring_slots
);
489 static inline struct pvrdma_cqne
*get_cqne(struct pvrdma_dev
*dev
,
492 return (struct pvrdma_cqne
*)pvrdma_page_dir_get_ptr(
495 sizeof(struct pvrdma_cqne
) * i
);
498 static irqreturn_t
pvrdma_intrx_handler(int irq
, void *dev_id
)
500 struct pvrdma_dev
*dev
= dev_id
;
501 struct pvrdma_ring
*ring
= &dev
->cq_ring_state
->rx
;
502 int ring_slots
= (dev
->dsr
->cq_ring_pages
.num_pages
- 1) * PAGE_SIZE
/
503 sizeof(struct pvrdma_cqne
);
507 dev_dbg(&dev
->pdev
->dev
, "interrupt x (completion) handler\n");
509 while (pvrdma_idx_ring_has_data(ring
, ring_slots
, &head
) > 0) {
510 struct pvrdma_cqne
*cqne
;
511 struct pvrdma_cq
*cq
;
513 cqne
= get_cqne(dev
, head
);
514 spin_lock_irqsave(&dev
->cq_tbl_lock
, flags
);
515 cq
= dev
->cq_tbl
[cqne
->info
% dev
->dsr
->caps
.max_cq
];
517 refcount_inc(&cq
->refcnt
);
518 spin_unlock_irqrestore(&dev
->cq_tbl_lock
, flags
);
520 if (cq
&& cq
->ibcq
.comp_handler
)
521 cq
->ibcq
.comp_handler(&cq
->ibcq
, cq
->ibcq
.cq_context
);
523 if (refcount_dec_and_test(&cq
->refcnt
))
526 pvrdma_idx_ring_inc(&ring
->cons_head
, ring_slots
);
532 static void pvrdma_free_irq(struct pvrdma_dev
*dev
)
536 dev_dbg(&dev
->pdev
->dev
, "freeing interrupts\n");
537 for (i
= 0; i
< dev
->nr_vectors
; i
++)
538 free_irq(pci_irq_vector(dev
->pdev
, i
), dev
);
541 static void pvrdma_enable_intrs(struct pvrdma_dev
*dev
)
543 dev_dbg(&dev
->pdev
->dev
, "enable interrupts\n");
544 pvrdma_write_reg(dev
, PVRDMA_REG_IMR
, 0);
547 static void pvrdma_disable_intrs(struct pvrdma_dev
*dev
)
549 dev_dbg(&dev
->pdev
->dev
, "disable interrupts\n");
550 pvrdma_write_reg(dev
, PVRDMA_REG_IMR
, ~0);
553 static int pvrdma_alloc_intrs(struct pvrdma_dev
*dev
)
555 struct pci_dev
*pdev
= dev
->pdev
;
558 ret
= pci_alloc_irq_vectors(pdev
, 1, PVRDMA_MAX_INTERRUPTS
,
561 ret
= pci_alloc_irq_vectors(pdev
, 1, 1,
562 PCI_IRQ_MSI
| PCI_IRQ_LEGACY
);
566 dev
->nr_vectors
= ret
;
568 ret
= request_irq(pci_irq_vector(dev
->pdev
, 0), pvrdma_intr0_handler
,
569 pdev
->msix_enabled
? 0 : IRQF_SHARED
, DRV_NAME
, dev
);
571 dev_err(&dev
->pdev
->dev
,
572 "failed to request interrupt 0\n");
573 goto out_free_vectors
;
576 for (i
= 1; i
< dev
->nr_vectors
; i
++) {
577 ret
= request_irq(pci_irq_vector(dev
->pdev
, i
),
578 i
== 1 ? pvrdma_intr1_handler
:
579 pvrdma_intrx_handler
,
582 dev_err(&dev
->pdev
->dev
,
583 "failed to request interrupt %d\n", i
);
592 free_irq(pci_irq_vector(dev
->pdev
, i
), dev
);
594 pci_free_irq_vectors(pdev
);
598 static void pvrdma_free_slots(struct pvrdma_dev
*dev
)
600 struct pci_dev
*pdev
= dev
->pdev
;
603 dma_free_coherent(&pdev
->dev
, PAGE_SIZE
, dev
->resp_slot
,
604 dev
->dsr
->resp_slot_dma
);
606 dma_free_coherent(&pdev
->dev
, PAGE_SIZE
, dev
->cmd_slot
,
607 dev
->dsr
->cmd_slot_dma
);
610 static int pvrdma_add_gid_at_index(struct pvrdma_dev
*dev
,
611 const union ib_gid
*gid
,
616 union pvrdma_cmd_req req
;
617 struct pvrdma_cmd_create_bind
*cmd_bind
= &req
.create_bind
;
619 if (!dev
->sgid_tbl
) {
620 dev_warn(&dev
->pdev
->dev
, "sgid table not initialized\n");
624 memset(cmd_bind
, 0, sizeof(*cmd_bind
));
625 cmd_bind
->hdr
.cmd
= PVRDMA_CMD_CREATE_BIND
;
626 memcpy(cmd_bind
->new_gid
, gid
->raw
, 16);
627 cmd_bind
->mtu
= ib_mtu_enum_to_int(IB_MTU_1024
);
628 cmd_bind
->vlan
= 0xfff;
629 cmd_bind
->index
= index
;
630 cmd_bind
->gid_type
= gid_type
;
632 ret
= pvrdma_cmd_post(dev
, &req
, NULL
, 0);
634 dev_warn(&dev
->pdev
->dev
,
635 "could not create binding, error: %d\n", ret
);
638 memcpy(&dev
->sgid_tbl
[index
], gid
, sizeof(*gid
));
642 static int pvrdma_add_gid(const struct ib_gid_attr
*attr
, void **context
)
644 struct pvrdma_dev
*dev
= to_vdev(attr
->device
);
646 return pvrdma_add_gid_at_index(dev
, &attr
->gid
,
647 ib_gid_type_to_pvrdma(attr
->gid_type
),
651 static int pvrdma_del_gid_at_index(struct pvrdma_dev
*dev
, int index
)
654 union pvrdma_cmd_req req
;
655 struct pvrdma_cmd_destroy_bind
*cmd_dest
= &req
.destroy_bind
;
657 /* Update sgid table. */
658 if (!dev
->sgid_tbl
) {
659 dev_warn(&dev
->pdev
->dev
, "sgid table not initialized\n");
663 memset(cmd_dest
, 0, sizeof(*cmd_dest
));
664 cmd_dest
->hdr
.cmd
= PVRDMA_CMD_DESTROY_BIND
;
665 memcpy(cmd_dest
->dest_gid
, &dev
->sgid_tbl
[index
], 16);
666 cmd_dest
->index
= index
;
668 ret
= pvrdma_cmd_post(dev
, &req
, NULL
, 0);
670 dev_warn(&dev
->pdev
->dev
,
671 "could not destroy binding, error: %d\n", ret
);
674 memset(&dev
->sgid_tbl
[index
], 0, 16);
678 static int pvrdma_del_gid(const struct ib_gid_attr
*attr
, void **context
)
680 struct pvrdma_dev
*dev
= to_vdev(attr
->device
);
682 dev_dbg(&dev
->pdev
->dev
, "removing gid at index %u from %s",
683 attr
->index
, dev
->netdev
->name
);
685 return pvrdma_del_gid_at_index(dev
, attr
->index
);
688 static void pvrdma_netdevice_event_handle(struct pvrdma_dev
*dev
,
689 struct net_device
*ndev
,
692 struct pci_dev
*pdev_net
;
698 pvrdma_dispatch_event(dev
, 1, IB_EVENT_PORT_ERR
);
701 pvrdma_write_reg(dev
, PVRDMA_REG_CTL
,
702 PVRDMA_DEVICE_CTL_UNQUIESCE
);
706 if (pvrdma_read_reg(dev
, PVRDMA_REG_ERR
))
707 dev_err(&dev
->pdev
->dev
,
708 "failed to activate device during link up\n");
710 pvrdma_dispatch_event(dev
, 1, IB_EVENT_PORT_ACTIVE
);
712 case NETDEV_UNREGISTER
:
713 ib_device_set_netdev(&dev
->ib_dev
, NULL
, 1);
714 dev_put(dev
->netdev
);
717 case NETDEV_REGISTER
:
718 /* vmxnet3 will have same bus, slot. But func will be 0 */
719 slot
= PCI_SLOT(dev
->pdev
->devfn
);
720 pdev_net
= pci_get_slot(dev
->pdev
->bus
,
722 if ((dev
->netdev
== NULL
) &&
723 (pci_get_drvdata(pdev_net
) == ndev
)) {
724 /* this is our netdev */
725 ib_device_set_netdev(&dev
->ib_dev
, ndev
, 1);
729 pci_dev_put(pdev_net
);
733 dev_dbg(&dev
->pdev
->dev
, "ignore netdevice event %ld on %s\n",
734 event
, dev_name(&dev
->ib_dev
.dev
));
739 static void pvrdma_netdevice_event_work(struct work_struct
*work
)
741 struct pvrdma_netdevice_work
*netdev_work
;
742 struct pvrdma_dev
*dev
;
744 netdev_work
= container_of(work
, struct pvrdma_netdevice_work
, work
);
746 mutex_lock(&pvrdma_device_list_lock
);
747 list_for_each_entry(dev
, &pvrdma_device_list
, device_link
) {
748 if ((netdev_work
->event
== NETDEV_REGISTER
) ||
749 (dev
->netdev
== netdev_work
->event_netdev
)) {
750 pvrdma_netdevice_event_handle(dev
,
751 netdev_work
->event_netdev
,
756 mutex_unlock(&pvrdma_device_list_lock
);
761 static int pvrdma_netdevice_event(struct notifier_block
*this,
762 unsigned long event
, void *ptr
)
764 struct net_device
*event_netdev
= netdev_notifier_info_to_dev(ptr
);
765 struct pvrdma_netdevice_work
*netdev_work
;
767 netdev_work
= kmalloc(sizeof(*netdev_work
), GFP_ATOMIC
);
771 INIT_WORK(&netdev_work
->work
, pvrdma_netdevice_event_work
);
772 netdev_work
->event_netdev
= event_netdev
;
773 netdev_work
->event
= event
;
774 queue_work(event_wq
, &netdev_work
->work
);
779 static int pvrdma_pci_probe(struct pci_dev
*pdev
,
780 const struct pci_device_id
*id
)
782 struct pci_dev
*pdev_net
;
783 struct pvrdma_dev
*dev
;
787 dma_addr_t slot_dma
= 0;
789 dev_dbg(&pdev
->dev
, "initializing driver %s\n", pci_name(pdev
));
791 /* Allocate zero-out device */
792 dev
= ib_alloc_device(pvrdma_dev
, ib_dev
);
794 dev_err(&pdev
->dev
, "failed to allocate IB device\n");
798 mutex_lock(&pvrdma_device_list_lock
);
799 list_add(&dev
->device_link
, &pvrdma_device_list
);
800 mutex_unlock(&pvrdma_device_list_lock
);
802 ret
= pvrdma_init_device(dev
);
804 goto err_free_device
;
807 pci_set_drvdata(pdev
, dev
);
809 ret
= pci_enable_device(pdev
);
811 dev_err(&pdev
->dev
, "cannot enable PCI device\n");
812 goto err_free_device
;
815 dev_dbg(&pdev
->dev
, "PCI resource flags BAR0 %#lx\n",
816 pci_resource_flags(pdev
, 0));
817 dev_dbg(&pdev
->dev
, "PCI resource len %#llx\n",
818 (unsigned long long)pci_resource_len(pdev
, 0));
819 dev_dbg(&pdev
->dev
, "PCI resource start %#llx\n",
820 (unsigned long long)pci_resource_start(pdev
, 0));
821 dev_dbg(&pdev
->dev
, "PCI resource flags BAR1 %#lx\n",
822 pci_resource_flags(pdev
, 1));
823 dev_dbg(&pdev
->dev
, "PCI resource len %#llx\n",
824 (unsigned long long)pci_resource_len(pdev
, 1));
825 dev_dbg(&pdev
->dev
, "PCI resource start %#llx\n",
826 (unsigned long long)pci_resource_start(pdev
, 1));
828 if (!(pci_resource_flags(pdev
, 0) & IORESOURCE_MEM
) ||
829 !(pci_resource_flags(pdev
, 1) & IORESOURCE_MEM
)) {
830 dev_err(&pdev
->dev
, "PCI BAR region not MMIO\n");
832 goto err_free_device
;
835 ret
= pci_request_regions(pdev
, DRV_NAME
);
837 dev_err(&pdev
->dev
, "cannot request PCI resources\n");
838 goto err_disable_pdev
;
841 /* Enable 64-Bit DMA */
842 if (pci_set_dma_mask(pdev
, DMA_BIT_MASK(64)) == 0) {
843 ret
= pci_set_consistent_dma_mask(pdev
, DMA_BIT_MASK(64));
846 "pci_set_consistent_dma_mask failed\n");
847 goto err_free_resource
;
850 ret
= pci_set_dma_mask(pdev
, DMA_BIT_MASK(32));
853 "pci_set_dma_mask failed\n");
854 goto err_free_resource
;
858 pci_set_master(pdev
);
860 /* Map register space */
861 start
= pci_resource_start(dev
->pdev
, PVRDMA_PCI_RESOURCE_REG
);
862 len
= pci_resource_len(dev
->pdev
, PVRDMA_PCI_RESOURCE_REG
);
863 dev
->regs
= ioremap(start
, len
);
865 dev_err(&pdev
->dev
, "register mapping failed\n");
867 goto err_free_resource
;
870 /* Setup per-device UAR. */
871 dev
->driver_uar
.index
= 0;
872 dev
->driver_uar
.pfn
=
873 pci_resource_start(dev
->pdev
, PVRDMA_PCI_RESOURCE_UAR
) >>
875 dev
->driver_uar
.map
=
876 ioremap(dev
->driver_uar
.pfn
<< PAGE_SHIFT
, PAGE_SIZE
);
877 if (!dev
->driver_uar
.map
) {
878 dev_err(&pdev
->dev
, "failed to remap UAR pages\n");
883 dev
->dsr_version
= pvrdma_read_reg(dev
, PVRDMA_REG_VERSION
);
884 dev_info(&pdev
->dev
, "device version %d, driver version %d\n",
885 dev
->dsr_version
, PVRDMA_VERSION
);
887 dev
->dsr
= dma_alloc_coherent(&pdev
->dev
, sizeof(*dev
->dsr
),
888 &dev
->dsrbase
, GFP_KERNEL
);
890 dev_err(&pdev
->dev
, "failed to allocate shared region\n");
895 /* Setup the shared region */
896 dev
->dsr
->driver_version
= PVRDMA_VERSION
;
897 dev
->dsr
->gos_info
.gos_bits
= sizeof(void *) == 4 ?
900 dev
->dsr
->gos_info
.gos_type
= PVRDMA_GOS_TYPE_LINUX
;
901 dev
->dsr
->gos_info
.gos_ver
= 1;
903 if (dev
->dsr_version
< PVRDMA_PPN64_VERSION
)
904 dev
->dsr
->uar_pfn
= dev
->driver_uar
.pfn
;
906 dev
->dsr
->uar_pfn64
= dev
->driver_uar
.pfn
;
909 dev
->cmd_slot
= dma_alloc_coherent(&pdev
->dev
, PAGE_SIZE
,
910 &slot_dma
, GFP_KERNEL
);
911 if (!dev
->cmd_slot
) {
916 dev
->dsr
->cmd_slot_dma
= (u64
)slot_dma
;
919 dev
->resp_slot
= dma_alloc_coherent(&pdev
->dev
, PAGE_SIZE
,
920 &slot_dma
, GFP_KERNEL
);
921 if (!dev
->resp_slot
) {
926 dev
->dsr
->resp_slot_dma
= (u64
)slot_dma
;
928 /* Async event ring */
929 dev
->dsr
->async_ring_pages
.num_pages
= PVRDMA_NUM_RING_PAGES
;
930 ret
= pvrdma_page_dir_init(dev
, &dev
->async_pdir
,
931 dev
->dsr
->async_ring_pages
.num_pages
, true);
934 dev
->async_ring_state
= dev
->async_pdir
.pages
[0];
935 dev
->dsr
->async_ring_pages
.pdir_dma
= dev
->async_pdir
.dir_dma
;
937 /* CQ notification ring */
938 dev
->dsr
->cq_ring_pages
.num_pages
= PVRDMA_NUM_RING_PAGES
;
939 ret
= pvrdma_page_dir_init(dev
, &dev
->cq_pdir
,
940 dev
->dsr
->cq_ring_pages
.num_pages
, true);
942 goto err_free_async_ring
;
943 dev
->cq_ring_state
= dev
->cq_pdir
.pages
[0];
944 dev
->dsr
->cq_ring_pages
.pdir_dma
= dev
->cq_pdir
.dir_dma
;
947 * Write the PA of the shared region to the device. The writes must be
948 * ordered such that the high bits are written last. When the writes
949 * complete, the device will have filled out the capabilities.
952 pvrdma_write_reg(dev
, PVRDMA_REG_DSRLOW
, (u32
)dev
->dsrbase
);
953 pvrdma_write_reg(dev
, PVRDMA_REG_DSRHIGH
,
954 (u32
)((u64
)(dev
->dsrbase
) >> 32));
956 /* Make sure the write is complete before reading status. */
959 /* The driver supports RoCE V1 and V2. */
960 if (!PVRDMA_SUPPORTED(dev
)) {
961 dev_err(&pdev
->dev
, "driver needs RoCE v1 or v2 support\n");
963 goto err_free_cq_ring
;
966 /* Paired vmxnet3 will have same bus, slot. But func will be 0 */
967 pdev_net
= pci_get_slot(pdev
->bus
, PCI_DEVFN(PCI_SLOT(pdev
->devfn
), 0));
969 dev_err(&pdev
->dev
, "failed to find paired net device\n");
971 goto err_free_cq_ring
;
974 if (pdev_net
->vendor
!= PCI_VENDOR_ID_VMWARE
||
975 pdev_net
->device
!= PCI_DEVICE_ID_VMWARE_VMXNET3
) {
976 dev_err(&pdev
->dev
, "failed to find paired vmxnet3 device\n");
977 pci_dev_put(pdev_net
);
979 goto err_free_cq_ring
;
982 dev
->netdev
= pci_get_drvdata(pdev_net
);
983 pci_dev_put(pdev_net
);
985 dev_err(&pdev
->dev
, "failed to get vmxnet3 device\n");
987 goto err_free_cq_ring
;
989 dev_hold(dev
->netdev
);
991 dev_info(&pdev
->dev
, "paired device to %s\n", dev
->netdev
->name
);
993 /* Interrupt setup */
994 ret
= pvrdma_alloc_intrs(dev
);
996 dev_err(&pdev
->dev
, "failed to allocate interrupts\n");
998 goto err_free_cq_ring
;
1001 /* Allocate UAR table. */
1002 ret
= pvrdma_uar_table_init(dev
);
1004 dev_err(&pdev
->dev
, "failed to allocate UAR table\n");
1006 goto err_free_intrs
;
1009 /* Allocate GID table */
1010 dev
->sgid_tbl
= kcalloc(dev
->dsr
->caps
.gid_tbl_len
,
1011 sizeof(union ib_gid
), GFP_KERNEL
);
1012 if (!dev
->sgid_tbl
) {
1014 goto err_free_uar_table
;
1016 dev_dbg(&pdev
->dev
, "gid table len %d\n", dev
->dsr
->caps
.gid_tbl_len
);
1018 pvrdma_enable_intrs(dev
);
1020 /* Activate pvrdma device */
1021 pvrdma_write_reg(dev
, PVRDMA_REG_CTL
, PVRDMA_DEVICE_CTL_ACTIVATE
);
1023 /* Make sure the write is complete before reading status. */
1026 /* Check if device was successfully activated */
1027 ret
= pvrdma_read_reg(dev
, PVRDMA_REG_ERR
);
1029 dev_err(&pdev
->dev
, "failed to activate device\n");
1031 goto err_disable_intr
;
1034 /* Register IB device */
1035 ret
= pvrdma_register_device(dev
);
1037 dev_err(&pdev
->dev
, "failed to register IB device\n");
1038 goto err_disable_intr
;
1041 dev
->nb_netdev
.notifier_call
= pvrdma_netdevice_event
;
1042 ret
= register_netdevice_notifier(&dev
->nb_netdev
);
1044 dev_err(&pdev
->dev
, "failed to register netdevice events\n");
1045 goto err_unreg_ibdev
;
1048 dev_info(&pdev
->dev
, "attached to device\n");
1052 ib_unregister_device(&dev
->ib_dev
);
1054 pvrdma_disable_intrs(dev
);
1055 kfree(dev
->sgid_tbl
);
1057 pvrdma_uar_table_cleanup(dev
);
1059 pvrdma_free_irq(dev
);
1060 pci_free_irq_vectors(pdev
);
1063 dev_put(dev
->netdev
);
1066 pvrdma_page_dir_cleanup(dev
, &dev
->cq_pdir
);
1067 err_free_async_ring
:
1068 pvrdma_page_dir_cleanup(dev
, &dev
->async_pdir
);
1070 pvrdma_free_slots(dev
);
1072 dma_free_coherent(&pdev
->dev
, sizeof(*dev
->dsr
), dev
->dsr
,
1075 iounmap(dev
->driver_uar
.map
);
1079 pci_release_regions(pdev
);
1081 pci_disable_device(pdev
);
1082 pci_set_drvdata(pdev
, NULL
);
1084 mutex_lock(&pvrdma_device_list_lock
);
1085 list_del(&dev
->device_link
);
1086 mutex_unlock(&pvrdma_device_list_lock
);
1087 ib_dealloc_device(&dev
->ib_dev
);
1091 static void pvrdma_pci_remove(struct pci_dev
*pdev
)
1093 struct pvrdma_dev
*dev
= pci_get_drvdata(pdev
);
1098 dev_info(&pdev
->dev
, "detaching from device\n");
1100 unregister_netdevice_notifier(&dev
->nb_netdev
);
1101 dev
->nb_netdev
.notifier_call
= NULL
;
1103 flush_workqueue(event_wq
);
1106 dev_put(dev
->netdev
);
1110 /* Unregister ib device */
1111 ib_unregister_device(&dev
->ib_dev
);
1113 mutex_lock(&pvrdma_device_list_lock
);
1114 list_del(&dev
->device_link
);
1115 mutex_unlock(&pvrdma_device_list_lock
);
1117 pvrdma_disable_intrs(dev
);
1118 pvrdma_free_irq(dev
);
1119 pci_free_irq_vectors(pdev
);
1121 /* Deactivate pvrdma device */
1122 pvrdma_write_reg(dev
, PVRDMA_REG_CTL
, PVRDMA_DEVICE_CTL_RESET
);
1123 pvrdma_page_dir_cleanup(dev
, &dev
->cq_pdir
);
1124 pvrdma_page_dir_cleanup(dev
, &dev
->async_pdir
);
1125 pvrdma_free_slots(dev
);
1126 dma_free_coherent(&pdev
->dev
, sizeof(*dev
->dsr
), dev
->dsr
,
1130 kfree(dev
->sgid_tbl
);
1132 kfree(dev
->srq_tbl
);
1134 pvrdma_uar_table_cleanup(dev
);
1135 iounmap(dev
->driver_uar
.map
);
1137 ib_dealloc_device(&dev
->ib_dev
);
1139 /* Free pci resources */
1140 pci_release_regions(pdev
);
1141 pci_disable_device(pdev
);
1142 pci_set_drvdata(pdev
, NULL
);
1145 static const struct pci_device_id pvrdma_pci_table
[] = {
1146 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE
, PCI_DEVICE_ID_VMWARE_PVRDMA
), },
1150 MODULE_DEVICE_TABLE(pci
, pvrdma_pci_table
);
1152 static struct pci_driver pvrdma_driver
= {
1154 .id_table
= pvrdma_pci_table
,
1155 .probe
= pvrdma_pci_probe
,
1156 .remove
= pvrdma_pci_remove
,
1159 static int __init
pvrdma_init(void)
1163 event_wq
= alloc_ordered_workqueue("pvrdma_event_wq", WQ_MEM_RECLAIM
);
1167 err
= pci_register_driver(&pvrdma_driver
);
1169 destroy_workqueue(event_wq
);
1174 static void __exit
pvrdma_cleanup(void)
1176 pci_unregister_driver(&pvrdma_driver
);
1178 destroy_workqueue(event_wq
);
1181 module_init(pvrdma_init
);
1182 module_exit(pvrdma_cleanup
);
1184 MODULE_AUTHOR("VMware, Inc");
1185 MODULE_DESCRIPTION("VMware Paravirtual RDMA driver");
1186 MODULE_LICENSE("Dual BSD/GPL");