2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
17 * The BSD 2-Clause License
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
46 #include <linux/errno.h>
47 #include <linux/inetdevice.h>
48 #include <linux/init.h>
49 #include <linux/module.h>
50 #include <linux/slab.h>
51 #include <rdma/ib_addr.h>
52 #include <rdma/ib_smi.h>
53 #include <rdma/ib_user_verbs.h>
54 #include <net/addrconf.h>
58 #define DRV_NAME "vmw_pvrdma"
59 #define DRV_VERSION "1.0.0.0-k"
61 static DEFINE_MUTEX(pvrdma_device_list_lock
);
62 static LIST_HEAD(pvrdma_device_list
);
63 static struct workqueue_struct
*event_wq
;
65 static int pvrdma_add_gid(struct ib_device
*ibdev
,
68 const union ib_gid
*gid
,
69 const struct ib_gid_attr
*attr
,
71 static int pvrdma_del_gid(struct ib_device
*ibdev
,
77 static ssize_t
show_hca(struct device
*device
, struct device_attribute
*attr
,
80 return sprintf(buf
, "VMW_PVRDMA-%s\n", DRV_VERSION
);
83 static ssize_t
show_rev(struct device
*device
, struct device_attribute
*attr
,
86 return sprintf(buf
, "%d\n", PVRDMA_REV_ID
);
89 static ssize_t
show_board(struct device
*device
, struct device_attribute
*attr
,
92 return sprintf(buf
, "%d\n", PVRDMA_BOARD_ID
);
95 static DEVICE_ATTR(hw_rev
, S_IRUGO
, show_rev
, NULL
);
96 static DEVICE_ATTR(hca_type
, S_IRUGO
, show_hca
, NULL
);
97 static DEVICE_ATTR(board_id
, S_IRUGO
, show_board
, NULL
);
99 static struct device_attribute
*pvrdma_class_attributes
[] = {
105 static void pvrdma_get_fw_ver_str(struct ib_device
*device
, char *str
,
108 struct pvrdma_dev
*dev
=
109 container_of(device
, struct pvrdma_dev
, ib_dev
);
110 snprintf(str
, str_len
, "%d.%d.%d\n",
111 (int) (dev
->dsr
->caps
.fw_ver
>> 32),
112 (int) (dev
->dsr
->caps
.fw_ver
>> 16) & 0xffff,
113 (int) dev
->dsr
->caps
.fw_ver
& 0xffff);
116 static int pvrdma_init_device(struct pvrdma_dev
*dev
)
118 /* Initialize some device related stuff */
119 spin_lock_init(&dev
->cmd_lock
);
120 sema_init(&dev
->cmd_sema
, 1);
121 atomic_set(&dev
->num_qps
, 0);
122 atomic_set(&dev
->num_cqs
, 0);
123 atomic_set(&dev
->num_pds
, 0);
124 atomic_set(&dev
->num_ahs
, 0);
129 static int pvrdma_port_immutable(struct ib_device
*ibdev
, u8 port_num
,
130 struct ib_port_immutable
*immutable
)
132 struct ib_port_attr attr
;
135 err
= pvrdma_query_port(ibdev
, port_num
, &attr
);
139 immutable
->pkey_tbl_len
= attr
.pkey_tbl_len
;
140 immutable
->gid_tbl_len
= attr
.gid_tbl_len
;
141 immutable
->core_cap_flags
= RDMA_CORE_PORT_IBA_ROCE
;
142 immutable
->max_mad_size
= IB_MGMT_MAD_SIZE
;
146 static struct net_device
*pvrdma_get_netdev(struct ib_device
*ibdev
,
149 struct net_device
*netdev
;
150 struct pvrdma_dev
*dev
= to_vdev(ibdev
);
156 netdev
= dev
->netdev
;
164 static int pvrdma_register_device(struct pvrdma_dev
*dev
)
169 strlcpy(dev
->ib_dev
.name
, "vmw_pvrdma%d", IB_DEVICE_NAME_MAX
);
170 dev
->ib_dev
.node_guid
= dev
->dsr
->caps
.node_guid
;
171 dev
->sys_image_guid
= dev
->dsr
->caps
.sys_image_guid
;
173 dev
->ib_dev
.owner
= THIS_MODULE
;
174 dev
->ib_dev
.num_comp_vectors
= 1;
175 dev
->ib_dev
.dma_device
= &dev
->pdev
->dev
;
176 dev
->ib_dev
.uverbs_abi_ver
= PVRDMA_UVERBS_ABI_VERSION
;
177 dev
->ib_dev
.uverbs_cmd_mask
=
178 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT
) |
179 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE
) |
180 (1ull << IB_USER_VERBS_CMD_QUERY_PORT
) |
181 (1ull << IB_USER_VERBS_CMD_ALLOC_PD
) |
182 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD
) |
183 (1ull << IB_USER_VERBS_CMD_REG_MR
) |
184 (1ull << IB_USER_VERBS_CMD_DEREG_MR
) |
185 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL
) |
186 (1ull << IB_USER_VERBS_CMD_CREATE_CQ
) |
187 (1ull << IB_USER_VERBS_CMD_POLL_CQ
) |
188 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ
) |
189 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ
) |
190 (1ull << IB_USER_VERBS_CMD_CREATE_QP
) |
191 (1ull << IB_USER_VERBS_CMD_MODIFY_QP
) |
192 (1ull << IB_USER_VERBS_CMD_QUERY_QP
) |
193 (1ull << IB_USER_VERBS_CMD_DESTROY_QP
) |
194 (1ull << IB_USER_VERBS_CMD_POST_SEND
) |
195 (1ull << IB_USER_VERBS_CMD_POST_RECV
) |
196 (1ull << IB_USER_VERBS_CMD_CREATE_AH
) |
197 (1ull << IB_USER_VERBS_CMD_DESTROY_AH
);
199 dev
->ib_dev
.node_type
= RDMA_NODE_IB_CA
;
200 dev
->ib_dev
.phys_port_cnt
= dev
->dsr
->caps
.phys_port_cnt
;
202 dev
->ib_dev
.query_device
= pvrdma_query_device
;
203 dev
->ib_dev
.query_port
= pvrdma_query_port
;
204 dev
->ib_dev
.query_gid
= pvrdma_query_gid
;
205 dev
->ib_dev
.query_pkey
= pvrdma_query_pkey
;
206 dev
->ib_dev
.modify_port
= pvrdma_modify_port
;
207 dev
->ib_dev
.alloc_ucontext
= pvrdma_alloc_ucontext
;
208 dev
->ib_dev
.dealloc_ucontext
= pvrdma_dealloc_ucontext
;
209 dev
->ib_dev
.mmap
= pvrdma_mmap
;
210 dev
->ib_dev
.alloc_pd
= pvrdma_alloc_pd
;
211 dev
->ib_dev
.dealloc_pd
= pvrdma_dealloc_pd
;
212 dev
->ib_dev
.create_ah
= pvrdma_create_ah
;
213 dev
->ib_dev
.destroy_ah
= pvrdma_destroy_ah
;
214 dev
->ib_dev
.create_qp
= pvrdma_create_qp
;
215 dev
->ib_dev
.modify_qp
= pvrdma_modify_qp
;
216 dev
->ib_dev
.query_qp
= pvrdma_query_qp
;
217 dev
->ib_dev
.destroy_qp
= pvrdma_destroy_qp
;
218 dev
->ib_dev
.post_send
= pvrdma_post_send
;
219 dev
->ib_dev
.post_recv
= pvrdma_post_recv
;
220 dev
->ib_dev
.create_cq
= pvrdma_create_cq
;
221 dev
->ib_dev
.modify_cq
= pvrdma_modify_cq
;
222 dev
->ib_dev
.resize_cq
= pvrdma_resize_cq
;
223 dev
->ib_dev
.destroy_cq
= pvrdma_destroy_cq
;
224 dev
->ib_dev
.poll_cq
= pvrdma_poll_cq
;
225 dev
->ib_dev
.req_notify_cq
= pvrdma_req_notify_cq
;
226 dev
->ib_dev
.get_dma_mr
= pvrdma_get_dma_mr
;
227 dev
->ib_dev
.reg_user_mr
= pvrdma_reg_user_mr
;
228 dev
->ib_dev
.dereg_mr
= pvrdma_dereg_mr
;
229 dev
->ib_dev
.alloc_mr
= pvrdma_alloc_mr
;
230 dev
->ib_dev
.map_mr_sg
= pvrdma_map_mr_sg
;
231 dev
->ib_dev
.add_gid
= pvrdma_add_gid
;
232 dev
->ib_dev
.del_gid
= pvrdma_del_gid
;
233 dev
->ib_dev
.get_netdev
= pvrdma_get_netdev
;
234 dev
->ib_dev
.get_port_immutable
= pvrdma_port_immutable
;
235 dev
->ib_dev
.get_link_layer
= pvrdma_port_link_layer
;
236 dev
->ib_dev
.get_dev_fw_str
= pvrdma_get_fw_ver_str
;
238 mutex_init(&dev
->port_mutex
);
239 spin_lock_init(&dev
->desc_lock
);
241 dev
->cq_tbl
= kcalloc(dev
->dsr
->caps
.max_cq
, sizeof(void *),
245 spin_lock_init(&dev
->cq_tbl_lock
);
247 dev
->qp_tbl
= kcalloc(dev
->dsr
->caps
.max_qp
, sizeof(void *),
251 spin_lock_init(&dev
->qp_tbl_lock
);
253 ret
= ib_register_device(&dev
->ib_dev
, NULL
);
257 for (i
= 0; i
< ARRAY_SIZE(pvrdma_class_attributes
); ++i
) {
258 ret
= device_create_file(&dev
->ib_dev
.dev
,
259 pvrdma_class_attributes
[i
]);
264 dev
->ib_active
= true;
269 ib_unregister_device(&dev
->ib_dev
);
278 static irqreturn_t
pvrdma_intr0_handler(int irq
, void *dev_id
)
280 u32 icr
= PVRDMA_INTR_CAUSE_RESPONSE
;
281 struct pvrdma_dev
*dev
= dev_id
;
283 dev_dbg(&dev
->pdev
->dev
, "interrupt 0 (response) handler\n");
285 if (dev
->intr
.type
!= PVRDMA_INTR_TYPE_MSIX
) {
287 icr
= pvrdma_read_reg(dev
, PVRDMA_REG_ICR
);
292 if (icr
== PVRDMA_INTR_CAUSE_RESPONSE
)
293 complete(&dev
->cmd_done
);
298 static void pvrdma_qp_event(struct pvrdma_dev
*dev
, u32 qpn
, int type
)
300 struct pvrdma_qp
*qp
;
303 spin_lock_irqsave(&dev
->qp_tbl_lock
, flags
);
304 qp
= dev
->qp_tbl
[qpn
% dev
->dsr
->caps
.max_qp
];
306 atomic_inc(&qp
->refcnt
);
307 spin_unlock_irqrestore(&dev
->qp_tbl_lock
, flags
);
309 if (qp
&& qp
->ibqp
.event_handler
) {
310 struct ib_qp
*ibqp
= &qp
->ibqp
;
313 e
.device
= ibqp
->device
;
315 e
.event
= type
; /* 1:1 mapping for now. */
316 ibqp
->event_handler(&e
, ibqp
->qp_context
);
319 atomic_dec(&qp
->refcnt
);
320 if (atomic_read(&qp
->refcnt
) == 0)
325 static void pvrdma_cq_event(struct pvrdma_dev
*dev
, u32 cqn
, int type
)
327 struct pvrdma_cq
*cq
;
330 spin_lock_irqsave(&dev
->cq_tbl_lock
, flags
);
331 cq
= dev
->cq_tbl
[cqn
% dev
->dsr
->caps
.max_cq
];
333 atomic_inc(&cq
->refcnt
);
334 spin_unlock_irqrestore(&dev
->cq_tbl_lock
, flags
);
336 if (cq
&& cq
->ibcq
.event_handler
) {
337 struct ib_cq
*ibcq
= &cq
->ibcq
;
340 e
.device
= ibcq
->device
;
342 e
.event
= type
; /* 1:1 mapping for now. */
343 ibcq
->event_handler(&e
, ibcq
->cq_context
);
346 atomic_dec(&cq
->refcnt
);
347 if (atomic_read(&cq
->refcnt
) == 0)
352 static void pvrdma_dispatch_event(struct pvrdma_dev
*dev
, int port
,
353 enum ib_event_type event
)
355 struct ib_event ib_event
;
357 memset(&ib_event
, 0, sizeof(ib_event
));
358 ib_event
.device
= &dev
->ib_dev
;
359 ib_event
.element
.port_num
= port
;
360 ib_event
.event
= event
;
361 ib_dispatch_event(&ib_event
);
364 static void pvrdma_dev_event(struct pvrdma_dev
*dev
, u8 port
, int type
)
366 if (port
< 1 || port
> dev
->dsr
->caps
.phys_port_cnt
) {
367 dev_warn(&dev
->pdev
->dev
, "event on port %d\n", port
);
371 pvrdma_dispatch_event(dev
, port
, type
);
374 static inline struct pvrdma_eqe
*get_eqe(struct pvrdma_dev
*dev
, unsigned int i
)
376 return (struct pvrdma_eqe
*)pvrdma_page_dir_get_ptr(
379 sizeof(struct pvrdma_eqe
) * i
);
382 static irqreturn_t
pvrdma_intr1_handler(int irq
, void *dev_id
)
384 struct pvrdma_dev
*dev
= dev_id
;
385 struct pvrdma_ring
*ring
= &dev
->async_ring_state
->rx
;
386 int ring_slots
= (dev
->dsr
->async_ring_pages
.num_pages
- 1) *
387 PAGE_SIZE
/ sizeof(struct pvrdma_eqe
);
390 dev_dbg(&dev
->pdev
->dev
, "interrupt 1 (async event) handler\n");
393 * Don't process events until the IB device is registered. Otherwise
394 * we'll try to ib_dispatch_event() on an invalid device.
399 while (pvrdma_idx_ring_has_data(ring
, ring_slots
, &head
) > 0) {
400 struct pvrdma_eqe
*eqe
;
402 eqe
= get_eqe(dev
, head
);
405 case PVRDMA_EVENT_QP_FATAL
:
406 case PVRDMA_EVENT_QP_REQ_ERR
:
407 case PVRDMA_EVENT_QP_ACCESS_ERR
:
408 case PVRDMA_EVENT_COMM_EST
:
409 case PVRDMA_EVENT_SQ_DRAINED
:
410 case PVRDMA_EVENT_PATH_MIG
:
411 case PVRDMA_EVENT_PATH_MIG_ERR
:
412 case PVRDMA_EVENT_QP_LAST_WQE_REACHED
:
413 pvrdma_qp_event(dev
, eqe
->info
, eqe
->type
);
416 case PVRDMA_EVENT_CQ_ERR
:
417 pvrdma_cq_event(dev
, eqe
->info
, eqe
->type
);
420 case PVRDMA_EVENT_SRQ_ERR
:
421 case PVRDMA_EVENT_SRQ_LIMIT_REACHED
:
424 case PVRDMA_EVENT_PORT_ACTIVE
:
425 case PVRDMA_EVENT_PORT_ERR
:
426 case PVRDMA_EVENT_LID_CHANGE
:
427 case PVRDMA_EVENT_PKEY_CHANGE
:
428 case PVRDMA_EVENT_SM_CHANGE
:
429 case PVRDMA_EVENT_CLIENT_REREGISTER
:
430 case PVRDMA_EVENT_GID_CHANGE
:
431 pvrdma_dev_event(dev
, eqe
->info
, eqe
->type
);
434 case PVRDMA_EVENT_DEVICE_FATAL
:
435 pvrdma_dev_event(dev
, 1, eqe
->type
);
442 pvrdma_idx_ring_inc(&ring
->cons_head
, ring_slots
);
448 static inline struct pvrdma_cqne
*get_cqne(struct pvrdma_dev
*dev
,
451 return (struct pvrdma_cqne
*)pvrdma_page_dir_get_ptr(
454 sizeof(struct pvrdma_cqne
) * i
);
457 static irqreturn_t
pvrdma_intrx_handler(int irq
, void *dev_id
)
459 struct pvrdma_dev
*dev
= dev_id
;
460 struct pvrdma_ring
*ring
= &dev
->cq_ring_state
->rx
;
461 int ring_slots
= (dev
->dsr
->cq_ring_pages
.num_pages
- 1) * PAGE_SIZE
/
462 sizeof(struct pvrdma_cqne
);
466 dev_dbg(&dev
->pdev
->dev
, "interrupt x (completion) handler\n");
468 while (pvrdma_idx_ring_has_data(ring
, ring_slots
, &head
) > 0) {
469 struct pvrdma_cqne
*cqne
;
470 struct pvrdma_cq
*cq
;
472 cqne
= get_cqne(dev
, head
);
473 spin_lock_irqsave(&dev
->cq_tbl_lock
, flags
);
474 cq
= dev
->cq_tbl
[cqne
->info
% dev
->dsr
->caps
.max_cq
];
476 atomic_inc(&cq
->refcnt
);
477 spin_unlock_irqrestore(&dev
->cq_tbl_lock
, flags
);
479 if (cq
&& cq
->ibcq
.comp_handler
)
480 cq
->ibcq
.comp_handler(&cq
->ibcq
, cq
->ibcq
.cq_context
);
482 atomic_dec(&cq
->refcnt
);
483 if (atomic_read(&cq
->refcnt
))
486 pvrdma_idx_ring_inc(&ring
->cons_head
, ring_slots
);
492 static void pvrdma_disable_msi_all(struct pvrdma_dev
*dev
)
494 if (dev
->intr
.type
== PVRDMA_INTR_TYPE_MSIX
)
495 pci_disable_msix(dev
->pdev
);
496 else if (dev
->intr
.type
== PVRDMA_INTR_TYPE_MSI
)
497 pci_disable_msi(dev
->pdev
);
500 static void pvrdma_free_irq(struct pvrdma_dev
*dev
)
504 dev_dbg(&dev
->pdev
->dev
, "freeing interrupts\n");
506 if (dev
->intr
.type
== PVRDMA_INTR_TYPE_MSIX
) {
507 for (i
= 0; i
< dev
->intr
.size
; i
++) {
508 if (dev
->intr
.enabled
[i
]) {
509 free_irq(dev
->intr
.msix_entry
[i
].vector
, dev
);
510 dev
->intr
.enabled
[i
] = 0;
513 } else if (dev
->intr
.type
== PVRDMA_INTR_TYPE_INTX
||
514 dev
->intr
.type
== PVRDMA_INTR_TYPE_MSI
) {
515 free_irq(dev
->pdev
->irq
, dev
);
519 static void pvrdma_enable_intrs(struct pvrdma_dev
*dev
)
521 dev_dbg(&dev
->pdev
->dev
, "enable interrupts\n");
522 pvrdma_write_reg(dev
, PVRDMA_REG_IMR
, 0);
525 static void pvrdma_disable_intrs(struct pvrdma_dev
*dev
)
527 dev_dbg(&dev
->pdev
->dev
, "disable interrupts\n");
528 pvrdma_write_reg(dev
, PVRDMA_REG_IMR
, ~0);
531 static int pvrdma_enable_msix(struct pci_dev
*pdev
, struct pvrdma_dev
*dev
)
536 for (i
= 0; i
< PVRDMA_MAX_INTERRUPTS
; i
++) {
537 dev
->intr
.msix_entry
[i
].entry
= i
;
538 dev
->intr
.msix_entry
[i
].vector
= i
;
542 /* CMD ring handler */
543 dev
->intr
.handler
[i
] = pvrdma_intr0_handler
;
546 /* Async event ring handler */
547 dev
->intr
.handler
[i
] = pvrdma_intr1_handler
;
550 /* Completion queue handler */
551 dev
->intr
.handler
[i
] = pvrdma_intrx_handler
;
556 ret
= pci_enable_msix(pdev
, dev
->intr
.msix_entry
,
557 PVRDMA_MAX_INTERRUPTS
);
559 dev
->intr
.type
= PVRDMA_INTR_TYPE_MSIX
;
560 dev
->intr
.size
= PVRDMA_MAX_INTERRUPTS
;
561 } else if (ret
> 0) {
562 ret
= pci_enable_msix(pdev
, dev
->intr
.msix_entry
, ret
);
564 dev
->intr
.type
= PVRDMA_INTR_TYPE_MSIX
;
565 dev
->intr
.size
= ret
;
571 dev_dbg(&pdev
->dev
, "using interrupt type %d, size %d\n",
572 dev
->intr
.type
, dev
->intr
.size
);
577 static int pvrdma_alloc_intrs(struct pvrdma_dev
*dev
)
582 if (pci_find_capability(dev
->pdev
, PCI_CAP_ID_MSIX
) &&
583 pvrdma_enable_msix(dev
->pdev
, dev
)) {
585 ret
= pci_enable_msi(dev
->pdev
);
587 dev
->intr
.type
= PVRDMA_INTR_TYPE_MSI
;
590 dev
->intr
.type
= PVRDMA_INTR_TYPE_INTX
;
594 /* Request First IRQ */
595 switch (dev
->intr
.type
) {
596 case PVRDMA_INTR_TYPE_INTX
:
597 case PVRDMA_INTR_TYPE_MSI
:
598 ret
= request_irq(dev
->pdev
->irq
, pvrdma_intr0_handler
,
599 IRQF_SHARED
, DRV_NAME
, dev
);
601 dev_err(&dev
->pdev
->dev
,
602 "failed to request interrupt\n");
606 case PVRDMA_INTR_TYPE_MSIX
:
607 ret
= request_irq(dev
->intr
.msix_entry
[0].vector
,
608 pvrdma_intr0_handler
, 0, DRV_NAME
, dev
);
610 dev_err(&dev
->pdev
->dev
,
611 "failed to request interrupt 0\n");
614 dev
->intr
.enabled
[0] = 1;
621 /* For MSIX: request intr for each vector */
622 if (dev
->intr
.size
> 1) {
623 ret
= request_irq(dev
->intr
.msix_entry
[1].vector
,
624 pvrdma_intr1_handler
, 0, DRV_NAME
, dev
);
626 dev_err(&dev
->pdev
->dev
,
627 "failed to request interrupt 1\n");
630 dev
->intr
.enabled
[1] = 1;
632 for (i
= 2; i
< dev
->intr
.size
; i
++) {
633 ret
= request_irq(dev
->intr
.msix_entry
[i
].vector
,
634 pvrdma_intrx_handler
, 0,
637 dev_err(&dev
->pdev
->dev
,
638 "failed to request interrupt %d\n", i
);
641 dev
->intr
.enabled
[i
] = 1;
648 pvrdma_free_irq(dev
);
650 pvrdma_disable_msi_all(dev
);
654 static void pvrdma_free_slots(struct pvrdma_dev
*dev
)
656 struct pci_dev
*pdev
= dev
->pdev
;
659 dma_free_coherent(&pdev
->dev
, PAGE_SIZE
, dev
->resp_slot
,
660 dev
->dsr
->resp_slot_dma
);
662 dma_free_coherent(&pdev
->dev
, PAGE_SIZE
, dev
->cmd_slot
,
663 dev
->dsr
->cmd_slot_dma
);
666 static int pvrdma_add_gid_at_index(struct pvrdma_dev
*dev
,
667 const union ib_gid
*gid
,
671 union pvrdma_cmd_req req
;
672 struct pvrdma_cmd_create_bind
*cmd_bind
= &req
.create_bind
;
674 if (!dev
->sgid_tbl
) {
675 dev_warn(&dev
->pdev
->dev
, "sgid table not initialized\n");
679 memset(cmd_bind
, 0, sizeof(*cmd_bind
));
680 cmd_bind
->hdr
.cmd
= PVRDMA_CMD_CREATE_BIND
;
681 memcpy(cmd_bind
->new_gid
, gid
->raw
, 16);
682 cmd_bind
->mtu
= ib_mtu_enum_to_int(IB_MTU_1024
);
683 cmd_bind
->vlan
= 0xfff;
684 cmd_bind
->index
= index
;
685 cmd_bind
->gid_type
= PVRDMA_GID_TYPE_FLAG_ROCE_V1
;
687 ret
= pvrdma_cmd_post(dev
, &req
, NULL
, 0);
689 dev_warn(&dev
->pdev
->dev
,
690 "could not create binding, error: %d\n", ret
);
693 memcpy(&dev
->sgid_tbl
[index
], gid
, sizeof(*gid
));
697 static int pvrdma_add_gid(struct ib_device
*ibdev
,
700 const union ib_gid
*gid
,
701 const struct ib_gid_attr
*attr
,
704 struct pvrdma_dev
*dev
= to_vdev(ibdev
);
706 return pvrdma_add_gid_at_index(dev
, gid
, index
);
709 static int pvrdma_del_gid_at_index(struct pvrdma_dev
*dev
, int index
)
712 union pvrdma_cmd_req req
;
713 struct pvrdma_cmd_destroy_bind
*cmd_dest
= &req
.destroy_bind
;
715 /* Update sgid table. */
716 if (!dev
->sgid_tbl
) {
717 dev_warn(&dev
->pdev
->dev
, "sgid table not initialized\n");
721 memset(cmd_dest
, 0, sizeof(*cmd_dest
));
722 cmd_dest
->hdr
.cmd
= PVRDMA_CMD_DESTROY_BIND
;
723 memcpy(cmd_dest
->dest_gid
, &dev
->sgid_tbl
[index
], 16);
724 cmd_dest
->index
= index
;
726 ret
= pvrdma_cmd_post(dev
, &req
, NULL
, 0);
728 dev_warn(&dev
->pdev
->dev
,
729 "could not destroy binding, error: %d\n", ret
);
732 memset(&dev
->sgid_tbl
[index
], 0, 16);
736 static int pvrdma_del_gid(struct ib_device
*ibdev
,
741 struct pvrdma_dev
*dev
= to_vdev(ibdev
);
743 dev_dbg(&dev
->pdev
->dev
, "removing gid at index %u from %s",
744 index
, dev
->netdev
->name
);
746 return pvrdma_del_gid_at_index(dev
, index
);
749 static void pvrdma_netdevice_event_handle(struct pvrdma_dev
*dev
,
755 pvrdma_dispatch_event(dev
, 1, IB_EVENT_PORT_ERR
);
758 pvrdma_dispatch_event(dev
, 1, IB_EVENT_PORT_ACTIVE
);
761 dev_dbg(&dev
->pdev
->dev
, "ignore netdevice event %ld on %s\n",
762 event
, dev
->ib_dev
.name
);
767 static void pvrdma_netdevice_event_work(struct work_struct
*work
)
769 struct pvrdma_netdevice_work
*netdev_work
;
770 struct pvrdma_dev
*dev
;
772 netdev_work
= container_of(work
, struct pvrdma_netdevice_work
, work
);
774 mutex_lock(&pvrdma_device_list_lock
);
775 list_for_each_entry(dev
, &pvrdma_device_list
, device_link
) {
776 if (dev
->netdev
== netdev_work
->event_netdev
) {
777 pvrdma_netdevice_event_handle(dev
, netdev_work
->event
);
781 mutex_unlock(&pvrdma_device_list_lock
);
786 static int pvrdma_netdevice_event(struct notifier_block
*this,
787 unsigned long event
, void *ptr
)
789 struct net_device
*event_netdev
= netdev_notifier_info_to_dev(ptr
);
790 struct pvrdma_netdevice_work
*netdev_work
;
792 netdev_work
= kmalloc(sizeof(*netdev_work
), GFP_ATOMIC
);
796 INIT_WORK(&netdev_work
->work
, pvrdma_netdevice_event_work
);
797 netdev_work
->event_netdev
= event_netdev
;
798 netdev_work
->event
= event
;
799 queue_work(event_wq
, &netdev_work
->work
);
804 static int pvrdma_pci_probe(struct pci_dev
*pdev
,
805 const struct pci_device_id
*id
)
807 struct pci_dev
*pdev_net
;
808 struct pvrdma_dev
*dev
;
812 unsigned int version
;
813 dma_addr_t slot_dma
= 0;
815 dev_dbg(&pdev
->dev
, "initializing driver %s\n", pci_name(pdev
));
817 /* Allocate zero-out device */
818 dev
= (struct pvrdma_dev
*)ib_alloc_device(sizeof(*dev
));
820 dev_err(&pdev
->dev
, "failed to allocate IB device\n");
824 mutex_lock(&pvrdma_device_list_lock
);
825 list_add(&dev
->device_link
, &pvrdma_device_list
);
826 mutex_unlock(&pvrdma_device_list_lock
);
828 ret
= pvrdma_init_device(dev
);
830 goto err_free_device
;
833 pci_set_drvdata(pdev
, dev
);
835 ret
= pci_enable_device(pdev
);
837 dev_err(&pdev
->dev
, "cannot enable PCI device\n");
838 goto err_free_device
;
841 dev_dbg(&pdev
->dev
, "PCI resource flags BAR0 %#lx\n",
842 pci_resource_flags(pdev
, 0));
843 dev_dbg(&pdev
->dev
, "PCI resource len %#llx\n",
844 (unsigned long long)pci_resource_len(pdev
, 0));
845 dev_dbg(&pdev
->dev
, "PCI resource start %#llx\n",
846 (unsigned long long)pci_resource_start(pdev
, 0));
847 dev_dbg(&pdev
->dev
, "PCI resource flags BAR1 %#lx\n",
848 pci_resource_flags(pdev
, 1));
849 dev_dbg(&pdev
->dev
, "PCI resource len %#llx\n",
850 (unsigned long long)pci_resource_len(pdev
, 1));
851 dev_dbg(&pdev
->dev
, "PCI resource start %#llx\n",
852 (unsigned long long)pci_resource_start(pdev
, 1));
854 if (!(pci_resource_flags(pdev
, 0) & IORESOURCE_MEM
) ||
855 !(pci_resource_flags(pdev
, 1) & IORESOURCE_MEM
)) {
856 dev_err(&pdev
->dev
, "PCI BAR region not MMIO\n");
858 goto err_free_device
;
861 ret
= pci_request_regions(pdev
, DRV_NAME
);
863 dev_err(&pdev
->dev
, "cannot request PCI resources\n");
864 goto err_disable_pdev
;
867 /* Enable 64-Bit DMA */
868 if (pci_set_dma_mask(pdev
, DMA_BIT_MASK(64)) == 0) {
869 ret
= pci_set_consistent_dma_mask(pdev
, DMA_BIT_MASK(64));
872 "pci_set_consistent_dma_mask failed\n");
873 goto err_free_resource
;
876 ret
= pci_set_dma_mask(pdev
, DMA_BIT_MASK(32));
879 "pci_set_dma_mask failed\n");
880 goto err_free_resource
;
884 pci_set_master(pdev
);
886 /* Map register space */
887 start
= pci_resource_start(dev
->pdev
, PVRDMA_PCI_RESOURCE_REG
);
888 len
= pci_resource_len(dev
->pdev
, PVRDMA_PCI_RESOURCE_REG
);
889 dev
->regs
= ioremap(start
, len
);
891 dev_err(&pdev
->dev
, "register mapping failed\n");
893 goto err_free_resource
;
896 /* Setup per-device UAR. */
897 dev
->driver_uar
.index
= 0;
898 dev
->driver_uar
.pfn
=
899 pci_resource_start(dev
->pdev
, PVRDMA_PCI_RESOURCE_UAR
) >>
901 dev
->driver_uar
.map
=
902 ioremap(dev
->driver_uar
.pfn
<< PAGE_SHIFT
, PAGE_SIZE
);
903 if (!dev
->driver_uar
.map
) {
904 dev_err(&pdev
->dev
, "failed to remap UAR pages\n");
909 version
= pvrdma_read_reg(dev
, PVRDMA_REG_VERSION
);
910 dev_info(&pdev
->dev
, "device version %d, driver version %d\n",
911 version
, PVRDMA_VERSION
);
912 if (version
< PVRDMA_VERSION
) {
913 dev_err(&pdev
->dev
, "incompatible device version\n");
917 dev
->dsr
= dma_alloc_coherent(&pdev
->dev
, sizeof(*dev
->dsr
),
918 &dev
->dsrbase
, GFP_KERNEL
);
920 dev_err(&pdev
->dev
, "failed to allocate shared region\n");
925 /* Setup the shared region */
926 memset(dev
->dsr
, 0, sizeof(*dev
->dsr
));
927 dev
->dsr
->driver_version
= PVRDMA_VERSION
;
928 dev
->dsr
->gos_info
.gos_bits
= sizeof(void *) == 4 ?
931 dev
->dsr
->gos_info
.gos_type
= PVRDMA_GOS_TYPE_LINUX
;
932 dev
->dsr
->gos_info
.gos_ver
= 1;
933 dev
->dsr
->uar_pfn
= dev
->driver_uar
.pfn
;
936 dev
->cmd_slot
= dma_alloc_coherent(&pdev
->dev
, PAGE_SIZE
,
937 &slot_dma
, GFP_KERNEL
);
938 if (!dev
->cmd_slot
) {
943 dev
->dsr
->cmd_slot_dma
= (u64
)slot_dma
;
946 dev
->resp_slot
= dma_alloc_coherent(&pdev
->dev
, PAGE_SIZE
,
947 &slot_dma
, GFP_KERNEL
);
948 if (!dev
->resp_slot
) {
953 dev
->dsr
->resp_slot_dma
= (u64
)slot_dma
;
955 /* Async event ring */
956 dev
->dsr
->async_ring_pages
.num_pages
= 4;
957 ret
= pvrdma_page_dir_init(dev
, &dev
->async_pdir
,
958 dev
->dsr
->async_ring_pages
.num_pages
, true);
961 dev
->async_ring_state
= dev
->async_pdir
.pages
[0];
962 dev
->dsr
->async_ring_pages
.pdir_dma
= dev
->async_pdir
.dir_dma
;
964 /* CQ notification ring */
965 dev
->dsr
->cq_ring_pages
.num_pages
= 4;
966 ret
= pvrdma_page_dir_init(dev
, &dev
->cq_pdir
,
967 dev
->dsr
->cq_ring_pages
.num_pages
, true);
969 goto err_free_async_ring
;
970 dev
->cq_ring_state
= dev
->cq_pdir
.pages
[0];
971 dev
->dsr
->cq_ring_pages
.pdir_dma
= dev
->cq_pdir
.dir_dma
;
974 * Write the PA of the shared region to the device. The writes must be
975 * ordered such that the high bits are written last. When the writes
976 * complete, the device will have filled out the capabilities.
979 pvrdma_write_reg(dev
, PVRDMA_REG_DSRLOW
, (u32
)dev
->dsrbase
);
980 pvrdma_write_reg(dev
, PVRDMA_REG_DSRHIGH
,
981 (u32
)((u64
)(dev
->dsrbase
) >> 32));
983 /* Make sure the write is complete before reading status. */
986 /* Currently, the driver only supports RoCE mode. */
987 if (dev
->dsr
->caps
.mode
!= PVRDMA_DEVICE_MODE_ROCE
) {
988 dev_err(&pdev
->dev
, "unsupported transport %d\n",
989 dev
->dsr
->caps
.mode
);
991 goto err_free_cq_ring
;
994 /* Currently, the driver only supports RoCE V1. */
995 if (!(dev
->dsr
->caps
.gid_types
& PVRDMA_GID_TYPE_FLAG_ROCE_V1
)) {
996 dev_err(&pdev
->dev
, "driver needs RoCE v1 support\n");
998 goto err_free_cq_ring
;
1001 /* Paired vmxnet3 will have same bus, slot. But func will be 0 */
1002 pdev_net
= pci_get_slot(pdev
->bus
, PCI_DEVFN(PCI_SLOT(pdev
->devfn
), 0));
1004 dev_err(&pdev
->dev
, "failed to find paired net device\n");
1006 goto err_free_cq_ring
;
1009 if (pdev_net
->vendor
!= PCI_VENDOR_ID_VMWARE
||
1010 pdev_net
->device
!= PCI_DEVICE_ID_VMWARE_VMXNET3
) {
1011 dev_err(&pdev
->dev
, "failed to find paired vmxnet3 device\n");
1012 pci_dev_put(pdev_net
);
1014 goto err_free_cq_ring
;
1017 dev
->netdev
= pci_get_drvdata(pdev_net
);
1018 pci_dev_put(pdev_net
);
1020 dev_err(&pdev
->dev
, "failed to get vmxnet3 device\n");
1022 goto err_free_cq_ring
;
1025 dev_info(&pdev
->dev
, "paired device to %s\n", dev
->netdev
->name
);
1027 /* Interrupt setup */
1028 ret
= pvrdma_alloc_intrs(dev
);
1030 dev_err(&pdev
->dev
, "failed to allocate interrupts\n");
1035 /* Allocate UAR table. */
1036 ret
= pvrdma_uar_table_init(dev
);
1038 dev_err(&pdev
->dev
, "failed to allocate UAR table\n");
1040 goto err_free_intrs
;
1043 /* Allocate GID table */
1044 dev
->sgid_tbl
= kcalloc(dev
->dsr
->caps
.gid_tbl_len
,
1045 sizeof(union ib_gid
), GFP_KERNEL
);
1046 if (!dev
->sgid_tbl
) {
1048 goto err_free_uar_table
;
1050 dev_dbg(&pdev
->dev
, "gid table len %d\n", dev
->dsr
->caps
.gid_tbl_len
);
1052 pvrdma_enable_intrs(dev
);
1054 /* Activate pvrdma device */
1055 pvrdma_write_reg(dev
, PVRDMA_REG_CTL
, PVRDMA_DEVICE_CTL_ACTIVATE
);
1057 /* Make sure the write is complete before reading status. */
1060 /* Check if device was successfully activated */
1061 ret
= pvrdma_read_reg(dev
, PVRDMA_REG_ERR
);
1063 dev_err(&pdev
->dev
, "failed to activate device\n");
1065 goto err_disable_intr
;
1068 /* Register IB device */
1069 ret
= pvrdma_register_device(dev
);
1071 dev_err(&pdev
->dev
, "failed to register IB device\n");
1072 goto err_disable_intr
;
1075 dev
->nb_netdev
.notifier_call
= pvrdma_netdevice_event
;
1076 ret
= register_netdevice_notifier(&dev
->nb_netdev
);
1078 dev_err(&pdev
->dev
, "failed to register netdevice events\n");
1079 goto err_unreg_ibdev
;
1082 dev_info(&pdev
->dev
, "attached to device\n");
1086 ib_unregister_device(&dev
->ib_dev
);
1088 pvrdma_disable_intrs(dev
);
1089 kfree(dev
->sgid_tbl
);
1091 pvrdma_uar_table_cleanup(dev
);
1093 pvrdma_free_irq(dev
);
1094 pvrdma_disable_msi_all(dev
);
1096 unregister_netdevice_notifier(&dev
->nb_netdev
);
1098 pvrdma_page_dir_cleanup(dev
, &dev
->cq_pdir
);
1099 err_free_async_ring
:
1100 pvrdma_page_dir_cleanup(dev
, &dev
->async_pdir
);
1102 pvrdma_free_slots(dev
);
1104 dma_free_coherent(&pdev
->dev
, sizeof(*dev
->dsr
), dev
->dsr
,
1107 iounmap(dev
->driver_uar
.map
);
1111 pci_release_regions(pdev
);
1113 pci_disable_device(pdev
);
1114 pci_set_drvdata(pdev
, NULL
);
1116 mutex_lock(&pvrdma_device_list_lock
);
1117 list_del(&dev
->device_link
);
1118 mutex_unlock(&pvrdma_device_list_lock
);
1119 ib_dealloc_device(&dev
->ib_dev
);
1123 static void pvrdma_pci_remove(struct pci_dev
*pdev
)
1125 struct pvrdma_dev
*dev
= pci_get_drvdata(pdev
);
1130 dev_info(&pdev
->dev
, "detaching from device\n");
1132 unregister_netdevice_notifier(&dev
->nb_netdev
);
1133 dev
->nb_netdev
.notifier_call
= NULL
;
1135 flush_workqueue(event_wq
);
1137 /* Unregister ib device */
1138 ib_unregister_device(&dev
->ib_dev
);
1140 mutex_lock(&pvrdma_device_list_lock
);
1141 list_del(&dev
->device_link
);
1142 mutex_unlock(&pvrdma_device_list_lock
);
1144 pvrdma_disable_intrs(dev
);
1145 pvrdma_free_irq(dev
);
1146 pvrdma_disable_msi_all(dev
);
1148 /* Deactivate pvrdma device */
1149 pvrdma_write_reg(dev
, PVRDMA_REG_CTL
, PVRDMA_DEVICE_CTL_RESET
);
1150 pvrdma_page_dir_cleanup(dev
, &dev
->cq_pdir
);
1151 pvrdma_page_dir_cleanup(dev
, &dev
->async_pdir
);
1152 pvrdma_free_slots(dev
);
1155 kfree(dev
->sgid_tbl
);
1158 pvrdma_uar_table_cleanup(dev
);
1159 iounmap(dev
->driver_uar
.map
);
1161 ib_dealloc_device(&dev
->ib_dev
);
1163 /* Free pci resources */
1164 pci_release_regions(pdev
);
1165 pci_disable_device(pdev
);
1166 pci_set_drvdata(pdev
, NULL
);
1169 static struct pci_device_id pvrdma_pci_table
[] = {
1170 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE
, PCI_DEVICE_ID_VMWARE_PVRDMA
), },
1174 MODULE_DEVICE_TABLE(pci
, pvrdma_pci_table
);
1176 static struct pci_driver pvrdma_driver
= {
1178 .id_table
= pvrdma_pci_table
,
1179 .probe
= pvrdma_pci_probe
,
1180 .remove
= pvrdma_pci_remove
,
1183 static int __init
pvrdma_init(void)
1187 event_wq
= alloc_ordered_workqueue("pvrdma_event_wq", WQ_MEM_RECLAIM
);
1191 err
= pci_register_driver(&pvrdma_driver
);
1193 destroy_workqueue(event_wq
);
1198 static void __exit
pvrdma_cleanup(void)
1200 pci_unregister_driver(&pvrdma_driver
);
1202 destroy_workqueue(event_wq
);
1205 module_init(pvrdma_init
);
1206 module_exit(pvrdma_cleanup
);
1208 MODULE_AUTHOR("VMware, Inc");
1209 MODULE_DESCRIPTION("VMware Paravirtual RDMA driver");
1210 MODULE_VERSION(DRV_VERSION
);
1211 MODULE_LICENSE("Dual BSD/GPL");